## 라이브러리 import


In [1]:
import os, torch
import pytorch_lightning as pl
from utils import tools
from utils.data_pipeline import Dataloader
from model.model import Model

import pandas as pd

2024-09-26 04:52:51.685562: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-26 04:52:51.691274: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-26 04:52:51.704616: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-26 04:52:51.726921: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-26 04:52:51.733509: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attemptin

## 실험 환경 정의


### Experiments Config


In [2]:
CFG = {
    "admin": "admin",
    "seed": 42,
    "train": {
        "model_name": "snunlp/KR-ELECTRA-discriminator",
        "batch_size": 16,
        "epoch": 10,
        "LR": 3e-5,
        "LossF": "torch.nn.MSELoss",
        "optim": "torch.optim.AdamW",
        "weight_decay": 0.05,
        "num_hiddens": 1,
        "dropout": 0.2,
        "num_workers": 7,
    },
    "LR_scheduler": {"num_warmup_rate": 0.1, "LR_step_type": "step", "LR_step_freq": 1},
    "early_stopping": {"monitor": "val_loss", "patience": 3, "mode": "min"},
    # "inference": {"model_path": "./experiments/09-12_16_admin/model.pt"},
}

### Seed 초기화


In [3]:
tools.init_seed(CFG["seed"])

## 학습


### dataloader 설정


### Dataloader 클래스 정의

### 인스턴스 생성

In [4]:
# class Args:
#     train_path = "data/custom/train_v1.0.2_clean_spacing.csv"
#     dev_path = "data/custom/dev_v1.0.1_clean_spacing.csv"
#     test_path = "data/custom/dev_v1.0.1_clean_spacing.csv"
#     predict_path = "data/custom/dev_v1.0.1_clean_spacing.csv"

In [5]:
# dataset path 설정
class Args:
    train_path = "data/custom/full_augmented_train_swap.csv"
    dev_path = "./data/raw/dev.csv"
    test_path = "./data/raw/dev.csv"
    predict_path = "./data/raw/test.csv"

In [6]:
tools.init_seed(CFG["seed"])
args = Args()
os.environ["TOKENIZERS_PARALLELISM"] = "false"

dataloader = Dataloader(
    CFG, args.train_path, args.dev_path, args.test_path, args.predict_path
)



### model 설정


In [8]:
tools.init_seed(CFG["seed"])
model = Model(CFG)

  return self.fget.__get__(instance, owner)()
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at snunlp/KR-ELECTRA-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### early stopping setting


In [9]:
early_stopping_callbacks = pl.callbacks.EarlyStopping(
    monitor=CFG["early_stopping"]["monitor"],
    patience=CFG["early_stopping"]["patience"],
    mode=CFG["early_stopping"]["mode"],
)

### Trainer


In [10]:
tools.init_seed(CFG["seed"])
# trainer 인스턴스 생성
trainer = pl.Trainer(
    accelerator="gpu",
    devices=1,
    # callbacks=[early_stopping_callbacks],
    max_epochs=CFG["train"]["epoch"],
    log_every_n_steps=100,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [11]:
tools.init_seed(CFG["seed"])
# Train part
trainer.fit(model=model, datamodule=dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | ElectraForSequenceClassification | 109 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
109 M     Trainable params
0         Non-trainable params
109 M     Total params
436.328   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [46]:
predictions = trainer.test(model=model, datamodule=dataloader)
## datamodule에서 test_dataloader 호출
## predict_path로 설정된 test.csv가 사용된다

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

## 모델 저장


In [13]:
# experiments 폴더 내부에 실험 폴더 생성
# 폴더 이름 : 실험 날짜 - 실험 시간 - admin
experiment_path = tools.create_experiment_folder(CFG)

In [14]:
# 학습된 모델 저장 (experiment_folder 안에 model.pt로 저장)
torch.save(model, os.path.join(experiment_path, "model.pt"))
print(f"모델이 저장되었습니다: {experiment_path}")

모델이 저장되었습니다: ./experiments/09-26_01_hyejun


## test.csv 예측

In [15]:
# # inference에 쓸 모델 불러오기(CFG로 참조)
# model_path = CFG["inference"]["model_path"]

# model = torch.load(model_path)

In [16]:
# Inference part
predictions = trainer.predict(model=model, datamodule=dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [17]:
output = pd.read_csv("./data/raw/sample_submission.csv")
output.head()

Unnamed: 0,id,target
0,boostcamp-sts-v1-test-000,3.5
1,boostcamp-sts-v1-test-001,2.1
2,boostcamp-sts-v1-test-002,2.3
3,boostcamp-sts-v1-test-003,1.0
4,boostcamp-sts-v1-test-004,4.0


In [18]:
formatted_predictions = list(round(float(i), 1) for i in torch.cat(predictions))

output["target"] = formatted_predictions
output.head()

Unnamed: 0,id,target
0,boostcamp-sts-v1-test-000,3.8
1,boostcamp-sts-v1-test-001,4.1
2,boostcamp-sts-v1-test-002,0.8
3,boostcamp-sts-v1-test-003,0.1
4,boostcamp-sts-v1-test-004,3.7


In [20]:
output.to_csv("./data/inference/gpt_swap.csv", index=False)

End.
