In [2]:
import datetime

import numpy as np
import pandas as pd
import wandb
import yaml
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger
from utils.tokenizer import get_tokenizer
from data_loader.data_loaders import TextDataLoader
from utils.util import set_seed
from model.model import STSModel
from utils.util import WandbCheckpointCallback

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
wandb.login( key="api-key")

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /data/ephemeral/home/.netrc


True

In [5]:
with open('config.yml', 'r') as file:
    config = yaml.safe_load(file)

wandb.init(project="my_project", config=config)

[34m[1mwandb[0m: Currently logged in as: [33mkangjun205[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
def main(config):
    
    ## data
    train = pd.read_csv('data/train.csv')
    dev = pd.read_csv('data/dev.csv')

    now_min = datetime.datetime.now().strftime('%d%H%M')
    now_sec = datetime.datetime.now().strftime('%d%H%M%S')

    tokenizer = get_tokenizer(config['MODEL_NAME'])
    dataloader = TextDataLoader(
        tokenizer=tokenizer,
        max_len=config['MAX_LEN'],
        train_data=train,
        dev_data=dev,
        truncation=True,
        batch_size=config['BATCH_SIZE']
    )
    model = STSModel(config)

    early_stop_callback = EarlyStopping(
        monitor='val_loss',
        patience=3,
        mode='min'
    )

    checkpoint_callback = ModelCheckpoint(
        dirpath='saved',
        filename=f'best-model-{now_sec}',
        save_top_k=3,
        monitor='val_loss',
        mode='min'
    )

    wandb_checkpoint_callback = WandbCheckpointCallback(top_k=3)

    model_name = config['MODEL_NAME']
    run_name = f'{model_name}-{now_min}'
    wandb_logger = WandbLogger(name = run_name, project="Level1-STS")

    trainer = Trainer(
        accelerator="gpu",
        devices=1,
        max_epochs=config['EPOCH'],
        log_every_n_steps=1,
        callbacks=[early_stop_callback, checkpoint_callback, wandb_checkpoint_callback],
        logger = wandb_logger
        )
    
    trainer.fit(model, dataloader)
    trainer.validate(model, dataloader)

In [9]:
main(wandb.config)

FileNotFoundError: [Errno 2] No such file or directory: 'data/train.csv'