In [1]:
import datetime

import numpy as np
import pandas as pd
import wandb
import yaml
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger
from utils.tokenizer import get_tokenizer
from data_loader.data_loaders import TextDataLoader
from utils.util import set_seed
from model.model import STSModel
from utils.util import WandbCheckpointCallback

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train = pd.read_csv('data/train.csv')
dev = pd.read_csv('data/dev.csv')

train.head()

Unnamed: 0,id,source,sentence_1,sentence_2,label,binary-label
0,boostcamp-sts-v1-train-000,nsmc-sampled,스릴도있고 반전도 있고 여느 한국영화 쓰레기들하고는 차원이 다르네요~,"반전도 있고,사랑도 있고재미도있네요.",2.2,0.0
1,boostcamp-sts-v1-train-001,slack-rtt,앗 제가 접근권한이 없다고 뜹니다;;,"오, 액세스 권한이 없다고 합니다.",4.2,1.0
2,boostcamp-sts-v1-train-002,petition-sampled,주택청약조건 변경해주세요.,주택청약 무주택기준 변경해주세요.,2.4,0.0
3,boostcamp-sts-v1-train-003,slack-sampled,입사후 처음 대면으로 만나 반가웠습니다.,화상으로만 보다가 리얼로 만나니 정말 반가웠습니다.,3.0,1.0
4,boostcamp-sts-v1-train-004,slack-sampled,뿌듯뿌듯 하네요!!,꼬옥 실제로 한번 뵈어요 뿌뿌뿌~!~!,0.0,0.0


In [4]:
train[['label', 'binary-label']] = train[['label', 'binary-label']].astype('float32')

In [5]:
config = {
    'BATCH_SIZE': 32,
    'MAX_LEN': 128,
    'LEARNING_RATE': 0.001,
    'EPOCHS': 10,
    'MODEL_NAME': 'intfloat/multilingual-e5-small'
}

In [6]:
wandb.login(key='api-key')
wandb.init(project="Level1_STS", config = config)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


[34m[1mwandb[0m: Currently logged in as: [33mkangjun205[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /data/ephemeral/home/.netrc


In [7]:
now_min = datetime.datetime.now().strftime('%d%H%M')
now_sec = datetime.datetime.now().strftime('%d%H%M%S')

tokenizer = get_tokenizer(config['MODEL_NAME'])
dataloader = TextDataLoader(
    tokenizer=tokenizer,
    max_len=config['MAX_LEN'],
    train_data=train,
    dev_data=dev,
    truncation=True,
    batch_size=config['BATCH_SIZE']
)
model = STSModel(config)

early_stop_callback = EarlyStopping(
    monitor='val_loss',
    patience=3,
    mode='min'
)

checkpoint_callback = ModelCheckpoint(
    dirpath='saved',
    filename=f'best-model-{now_sec}',
    save_top_k=3,
    monitor='val_loss',
    mode='min'
)

wandb_checkpoint_callback = WandbCheckpointCallback(top_k=3)

model_name = config['MODEL_NAME']
run_name = f'{model_name}-{now_min}'
wandb_logger = WandbLogger(name = run_name, project="Level1-STS")

trainer = Trainer(
    accelerator="gpu",
    devices=1,
    max_epochs=config['EPOCHS'],
    log_every_n_steps=1,
    callbacks=[early_stop_callback, checkpoint_callback, wandb_checkpoint_callback],
    logger = wandb_logger
    )

trainer.fit(model, dataloader)
trainer.validate(model, dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/conda/envs/STS/lib/python3.11/site-packages/pytorch_lightning/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
/opt/conda/envs/STS/lib/python3.11/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type             | Params | Mode 
---------------------------------------------------------------
0 | mod               | BertModel        | 117 M  | eval 
1 | cosine_similarity | CosineSimilarity | 0      | train
---------------------------------------------------------------
117 M     Trainable params
0         Non-trainable params
117 M     Total 

Epoch 0:   0%|          | 0/292 [00:00<?, ?it/s] 

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0: 100%|██████████| 292/292 [00:20<00:00, 14.37it/s, v_num=8qyy]

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: Early stopping conditioned on metric `val_loss` which is not available. Pass in or modify your `EarlyStopping` callback to use any of the following: `train_loss`