In [1]:
from src.config.data_loader_config import DATA_LOADER_CONFIG, OPTIMIZER_CONFIG
from src.data_loader.loader import Dataloader
from src.model.model import Model, Models, LossFunctions
from src.trainer.predict import save_result
import src.callback as callback
import pytorch_lightning as pl
import os

# Parameters 설정
batch_size = DATA_LOADER_CONFIG['batch_size']
shuffle = DATA_LOADER_CONFIG['shuffle']
learning_rate = OPTIMIZER_CONFIG['learning_rate']
max_epoch = OPTIMIZER_CONFIG['max_epoch']
os.environ["TOKENIZERS_PARALLELISM"] = "false"
num_workers: int = DATA_LOADER_CONFIG.get('num_workers', 4)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 모델 선언
# select Model -> roberta_base, roberta_small, roberta_large, electra_base, electra_base_v3
model_name = 'electra_base_v3'
model = Model(Models.electra_base_v3, learning_rate, LossFunctions.hu_loss)

# Dataloader 선언
dataloader = Dataloader(batch_size, shuffle, Models.electra_base_v3, check_aug=True)

# callback 정의
epoch_print_callback = callback.EpochPrintCallback()
checkpoint_callback = callback.ModelCheckpoint(model_name=model_name)
early_stopping = callback.EarlyStopping()
lr_monitor = callback.LearningRateMonitor()

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# 학습

In [4]:
trainer = pl.Trainer(accelerator='gpu', devices='auto', max_epochs=max_epoch, callbacks=[lr_monitor, epoch_print_callback,checkpoint_callback, early_stopping], precision='16-mixed')
trainer.fit(model=model, datamodule=dataloader)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_zero_labels[['sentence_1', 'sentence_2']] = non_zero_labels[['sentence_2', 'sentence_1']]
/opt/conda/lib/python3.10/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:639: Checkpoint directory /da

                                                                           

  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)


Epoch 0: 100%|██████████| 1033/1033 [00:57<00:00, 17.84it/s, v_num=8]

  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)


Epoch 0: 100%|██████████| 1033/1033 [00:58<00:00, 17.57it/s, v_num=8]Epoch 0 ended
Validation Loss: 0.8524
Training Loss: 1.1511
Validation Pearson Correlation: 0.5748
----------------------------------------
Epoch 0: 100%|██████████| 1033/1033 [00:58<00:00, 17.57it/s, v_num=8]

  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)
Metric val_loss improved. New best score: 0.852
Epoch 0, global step 1033: 'val_loss' reached 0.85244 (best 0.85244), saving model to '/data/ephemeral/home/level1-semantictextsimilarity-nlp-07/electra_base_v3/checkpoints/best-model_name=0-epoch=00-v1.ckpt' as top 1


Epoch 1: 100%|██████████| 1033/1033 [00:58<00:00, 17.51it/s, v_num=8]Epoch 1 ended
Validation Loss: 0.8434
Training Loss: 0.7482
Validation Pearson Correlation: 0.8259
----------------------------------------
Epoch 1: 100%|██████████| 1033/1033 [00:59<00:00, 17.51it/s, v_num=8]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.843
Epoch 1, global step 2066: 'val_loss' was not in top 1


Epoch 2: 100%|██████████| 1033/1033 [00:58<00:00, 17.54it/s, v_num=8]Epoch 2 ended
Validation Loss: 0.8861
Training Loss: 0.9342
Validation Pearson Correlation: 0.8678
----------------------------------------
Epoch 2: 100%|██████████| 1033/1033 [00:58<00:00, 17.54it/s, v_num=8]

Epoch 2, global step 3099: 'val_loss' reached 0.88609 (best 0.88609), saving model to '/data/ephemeral/home/level1-semantictextsimilarity-nlp-07/electra_base_v3/checkpoints/best-model_name=0-epoch=02-v1.ckpt' as top 1


Epoch 3: 100%|██████████| 1033/1033 [00:59<00:00, 17.48it/s, v_num=8]Epoch 3 ended
Validation Loss: 0.8737
Training Loss: 1.0542
Validation Pearson Correlation: 0.8755
----------------------------------------
Epoch 3: 100%|██████████| 1033/1033 [00:59<00:00, 17.48it/s, v_num=8]

Epoch 3, global step 4132: 'val_loss' was not in top 1


Epoch 4: 100%|██████████| 1033/1033 [00:58<00:00, 17.62it/s, v_num=8]Epoch 4 ended
Validation Loss: 0.8696
Training Loss: 0.7315
Validation Pearson Correlation: 0.8592
----------------------------------------
Epoch 4: 100%|██████████| 1033/1033 [00:58<00:00, 17.62it/s, v_num=8]

Epoch 4, global step 5165: 'val_loss' was not in top 1


Epoch 5: 100%|██████████| 1033/1033 [00:58<00:00, 17.62it/s, v_num=8]Epoch 5 ended
Validation Loss: 0.8654
Training Loss: 0.7945
Validation Pearson Correlation: 0.8590
----------------------------------------
Epoch 5: 100%|██████████| 1033/1033 [00:58<00:00, 17.62it/s, v_num=8]

Epoch 5, global step 6198: 'val_loss' was not in top 1


Epoch 6: 100%|██████████| 1033/1033 [00:59<00:00, 17.46it/s, v_num=8]Epoch 6 ended
Validation Loss: 0.8496
Training Loss: 1.1913
Validation Pearson Correlation: 0.8054
----------------------------------------
Epoch 6: 100%|██████████| 1033/1033 [00:59<00:00, 17.46it/s, v_num=8]

Epoch 6, global step 7231: 'val_loss' was not in top 1


Epoch 7: 100%|██████████| 1033/1033 [00:59<00:00, 17.47it/s, v_num=8]Epoch 7 ended
Validation Loss: 0.8399
Training Loss: 0.9407
Validation Pearson Correlation: 0.8257
----------------------------------------
Epoch 7: 100%|██████████| 1033/1033 [00:59<00:00, 17.47it/s, v_num=8]

Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.840
Epoch 7, global step 8264: 'val_loss' was not in top 1


Epoch 8: 100%|██████████| 1033/1033 [00:58<00:00, 17.61it/s, v_num=8]Epoch 8 ended
Validation Loss: 0.8591
Training Loss: 1.0255
Validation Pearson Correlation: 0.8374
----------------------------------------
Epoch 8: 100%|██████████| 1033/1033 [00:58<00:00, 17.61it/s, v_num=8]

Epoch 8, global step 9297: 'val_loss' was not in top 1


Epoch 9: 100%|██████████| 1033/1033 [00:59<00:00, 17.49it/s, v_num=8]Epoch 9 ended
Validation Loss: 0.8542
Training Loss: 0.9178
Validation Pearson Correlation: 0.8368
----------------------------------------
Epoch 9: 100%|██████████| 1033/1033 [00:59<00:00, 17.49it/s, v_num=8]

Epoch 9, global step 10330: 'val_loss' was not in top 1


Epoch 10: 100%|██████████| 1033/1033 [00:59<00:00, 17.51it/s, v_num=8]Epoch 10 ended
Validation Loss: 0.8686
Training Loss: 0.9667
Validation Pearson Correlation: 0.8404
----------------------------------------
Epoch 10: 100%|██████████| 1033/1033 [00:59<00:00, 17.51it/s, v_num=8]

Epoch 10, global step 11363: 'val_loss' was not in top 1


Epoch 11:  52%|█████▏    | 536/1033 [00:30<00:27, 17.78it/s, v_num=8] 

/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


In [None]:
import torch
torch.cuda.empty_cache()

# 추론


In [None]:
checkpoint_callback.best_model_path

''

In [None]:
# 가장 좋은 모델 불러오기
best_model_path = checkpoint_callback.best_model_path
model = Model.load_from_checkpoint(best_model_path, loss_func=LossFunctions.hu_loss)
trainer.test(model=model, datamodule=dataloader)
# 추론
predictions = trainer.predict(model=model, datamodule=dataloader)

# 결과 저장
save_result(predictions, model_name, max_epoch)

NameError: name 'checkpoint_callback' is not defined