In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [9]:
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from datasets import Dataset
import pandas as pd
import torch

# 모델 및 토크나이저 로드
model_name = "microsoft/deberta-v3-base"  # DeBERTa 모델
tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
model = DebertaV2ForSequenceClassification.from_pretrained(model_name, num_labels=2)

# 데이터 전처리 함수 정의
def preprocess_text(text):
    return tokenizer(text, truncation=True, padding="max_length", max_length=128)

# 데이터셋 로드 및 전처리
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# 데이터셋 분리
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_data['text'], train_data['target'], test_size=0.2, random_state=42
)

# 토큰화 및 변환
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(list(val_texts), truncation=True, padding=True, max_length=128)

# Hugging Face 데이터셋 포맷으로 변환
train_dataset = Dataset.from_dict({
    'input_ids': train_encodings['input_ids'],
    'attention_mask': train_encodings['attention_mask'],
    'labels': list(train_labels)
})
val_dataset = Dataset.from_dict({
    'input_ids': val_encodings['input_ids'],
    'attention_mask': val_encodings['attention_mask'],
    'labels': list(val_labels)
})

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
# 학습 설정
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=1,
    report_to="none"
)

# 평가 메트릭 함수 정의
def compute_metrics(pred):
    logits, labels = pred
    predictions = torch.argmax(torch.tensor(logits), axis=1)
    accuracy = (predictions == torch.tensor(labels)).float().mean()
    return {"accuracy": accuracy.item()}

# Trainer 설정
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# 모델 학습
trainer.train()

# 검증 데이터 예측
results = trainer.evaluate()
print("Validation Accuracy:", results["eval_accuracy"])

# 테스트 데이터 예측 및 제출 파일 생성
test_encodings = tokenizer(list(test_data['text']), truncation=True, padding=True, max_length=128)
test_dataset = Dataset.from_dict({
    'input_ids': test_encodings['input_ids'],
    'attention_mask': test_encodings['attention_mask']
})

# 예측
predictions = trainer.predict(test_dataset)
test_data['target'] = torch.argmax(torch.tensor(predictions.predictions), axis=1).numpy()

# 제출 파일 생성
submission = test_data[['id', 'target']]
submission.to_csv('submission_v5_DeBERTa_v3.csv', index=False)
print("Submission file saved as 'submission.csv'")

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3674,0.381325,0.83585
2,0.3359,0.432938,0.83585
3,0.371,0.456798,0.838477


Validation Accuracy: 0.8384767174720764
Submission file saved as 'submission.csv'


In [2]:
# 그리드 서치

from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from datasets import Dataset
import pandas as pd
import torch

# 모델 및 토크나이저 로드
model_name = "microsoft/deberta-v3-base"
tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)

# 데이터셋 로드
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# 데이터셋 분리
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_data["text"], train_data["target"], test_size=0.2, random_state=42
)

# 데이터 전처리 함수 정의
def preprocess_data(texts, labels=None):
    encodings = tokenizer(
        list(texts),
        truncation=True,
        padding=True,
        max_length=128,
    )
    if labels is not None:
        encodings["labels"] = list(labels)
    return encodings

# 데이터 토큰화
train_encodings = preprocess_data(train_texts, train_labels)
val_encodings = preprocess_data(val_texts, val_labels)

# Hugging Face 데이터셋 포맷으로 변환
train_dataset = Dataset.from_dict(train_encodings)
val_dataset = Dataset.from_dict(val_encodings)

# 평가 메트릭 함수 정의
def compute_metrics(pred):
    logits, labels = pred
    predictions = torch.argmax(torch.tensor(logits), axis=1)
    accuracy = (predictions == torch.tensor(labels)).float().mean()
    return {"accuracy": accuracy.item()}

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

In [3]:
# 하이퍼파라미터 그리드 정의
grid = {
    "learning_rate": [1e-5, 2e-5, 3e-5, 4e-5, 5e-5],
    "per_device_train_batch_size": [8, 16],
    "num_train_epochs": [2, 3, 4, 5]
}

In [4]:
# 최적의 하이퍼파라미터를 저장할 변수
best_accuracy = 0
best_params = {}

# 그리드 서치 실행
for learning_rate in grid["learning_rate"]:
    for batch_size in grid["per_device_train_batch_size"]:
        for num_epochs in grid["num_train_epochs"]:
            print(f"Testing config: lr={learning_rate}, batch_size={batch_size}, epochs={num_epochs}")

            # 학습 설정
            training_args = TrainingArguments(
                output_dir="./results",
                evaluation_strategy="epoch",
                save_strategy="no",  # 모델 저장 생략
                num_train_epochs=num_epochs,
                per_device_train_batch_size=batch_size,
                per_device_eval_batch_size=batch_size,
                learning_rate=learning_rate,
                logging_dir="./logs",
                logging_steps=10,
                report_to="none"
            )

            # 모델 초기화
            model = DebertaV2ForSequenceClassification.from_pretrained(model_name, num_labels=2)

            # Trainer 설정
            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset=train_dataset,
                eval_dataset=val_dataset,
                tokenizer=tokenizer,
                compute_metrics=compute_metrics,
            )

            # 모델 학습
            trainer.train()

            # 검증 데이터 평가
            results = trainer.evaluate()
            accuracy = results["eval_accuracy"]
            print(f"Accuracy: {accuracy}")

            # 최적의 하이퍼파라미터 업데이트
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_params = {
                    "learning_rate": learning_rate,
                    "batch_size": batch_size,
                    "num_train_epochs": num_epochs,
                }

# 최적의 하이퍼파라미터 출력
print("Best Hyperparameters:")
print(best_params)
print(f"Best Validation Accuracy: {best_accuracy}")

Testing config: lr=1e-05, batch_size=8, epochs=2




pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.339,0.383132,0.846356
2,0.3441,0.443096,0.847012


Accuracy: 0.847012460231781
Testing config: lr=1e-05, batch_size=8, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3715,0.379628,0.847669
2,0.3405,0.443084,0.847012
3,0.4399,0.499839,0.845043


Accuracy: 0.8450427055358887
Testing config: lr=1e-05, batch_size=8, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4124,0.381628,0.843073
2,0.3511,0.459504,0.84176
3,0.3567,0.585765,0.83782
4,0.253,0.586763,0.843729


Accuracy: 0.8437294960021973
Testing config: lr=1e-05, batch_size=8, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3794,0.370413,0.851609
2,0.3281,0.437225,0.848982
3,0.3774,0.582649,0.83782
4,0.2342,0.682129,0.841103
5,0.2095,0.661984,0.839133


Accuracy: 0.8391332626342773
Testing config: lr=1e-05, batch_size=16, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3583,0.383203,0.83585
2,0.3121,0.396218,0.837164


Accuracy: 0.837163507938385
Testing config: lr=1e-05, batch_size=16, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3744,0.37465,0.850295
2,0.3037,0.391772,0.842416
3,0.3408,0.420414,0.83782


Accuracy: 0.8378201127052307
Testing config: lr=1e-05, batch_size=16, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3852,0.380493,0.839133
2,0.3248,0.393953,0.848982
3,0.3706,0.437051,0.843729
4,0.2481,0.439791,0.847012


Accuracy: 0.847012460231781
Testing config: lr=1e-05, batch_size=16, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3769,0.382547,0.843073
2,0.3179,0.392748,0.845043
3,0.3992,0.468397,0.838477
4,0.2646,0.537698,0.824688
5,0.3244,0.535053,0.83388


Accuracy: 0.8338804841041565
Testing config: lr=2e-05, batch_size=8, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3729,0.376127,0.846356
2,0.3824,0.452546,0.845699


Accuracy: 0.8456992506980896
Testing config: lr=2e-05, batch_size=8, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3704,0.397454,0.841103
2,0.3496,0.535998,0.84176
3,0.3357,0.573604,0.844386


Accuracy: 0.844386100769043
Testing config: lr=2e-05, batch_size=8, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3444,0.390098,0.845699
2,0.3487,0.48742,0.83585
3,0.3169,0.610941,0.836507
4,0.2196,0.656174,0.843073


Accuracy: 0.8430728912353516
Testing config: lr=2e-05, batch_size=8, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3485,0.391585,0.848326
2,0.3836,0.502943,0.839133
3,0.2926,0.655305,0.818779
4,0.2293,0.675167,0.84176
5,0.2051,0.765942,0.826001


Accuracy: 0.8260012865066528
Testing config: lr=2e-05, batch_size=16, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3396,0.379821,0.835194
2,0.3109,0.405255,0.843729


Accuracy: 0.8437294960021973
Testing config: lr=2e-05, batch_size=16, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3543,0.371601,0.845043
2,0.2945,0.40291,0.848326
3,0.2906,0.451165,0.837164


Accuracy: 0.837163507938385
Testing config: lr=2e-05, batch_size=16, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3601,0.371389,0.851609
2,0.3087,0.440515,0.845043
3,0.3134,0.558544,0.830598
4,0.2341,0.534169,0.84176


Accuracy: 0.8417596817016602
Testing config: lr=2e-05, batch_size=16, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3675,0.370463,0.850295
2,0.3107,0.41013,0.843073
3,0.3387,0.577623,0.809586
4,0.2119,0.652479,0.822062
5,0.2683,0.710879,0.818779


Accuracy: 0.8187787532806396
Testing config: lr=3e-05, batch_size=8, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3939,0.390825,0.834537
2,0.3672,0.48269,0.83585


Accuracy: 0.8358502984046936
Testing config: lr=3e-05, batch_size=8, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3491,0.424542,0.828628
2,0.3779,0.510934,0.838477
3,0.2826,0.565262,0.847012


Accuracy: 0.847012460231781
Testing config: lr=3e-05, batch_size=8, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4093,0.414817,0.83782
2,0.398,0.518188,0.83782
3,0.3746,0.693239,0.808273
4,0.227,0.716138,0.816152


Accuracy: 0.8161523342132568
Testing config: lr=3e-05, batch_size=8, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3859,0.420907,0.832567
2,0.3871,0.536928,0.826658
3,0.3788,0.68916,0.803677
4,0.2247,0.69798,0.823375
5,0.1875,0.835678,0.80893


Accuracy: 0.8089297413825989
Testing config: lr=3e-05, batch_size=16, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3422,0.384772,0.838477
2,0.3004,0.395071,0.842416


Accuracy: 0.8424162864685059
Testing config: lr=3e-05, batch_size=16, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3675,0.380285,0.84176
2,0.3115,0.45367,0.836507
3,0.3074,0.468139,0.840446


Accuracy: 0.8404464721679688
Testing config: lr=3e-05, batch_size=16, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3937,0.384026,0.84176
2,0.3122,0.478746,0.834537
3,0.3513,0.528018,0.823375
4,0.2019,0.562109,0.828628


Accuracy: 0.8286277055740356
Testing config: lr=3e-05, batch_size=16, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3537,0.392863,0.83782
2,0.3327,0.499581,0.825345
3,0.3594,0.560687,0.822718
4,0.1851,0.647335,0.804334
5,0.243,0.716225,0.820092


Accuracy: 0.8200919032096863
Testing config: lr=4e-05, batch_size=8, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3579,0.398941,0.830598
2,0.3777,0.467603,0.83782


Accuracy: 0.8378201127052307
Testing config: lr=4e-05, batch_size=8, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3366,0.414032,0.827971
2,0.3688,0.531941,0.83388
3,0.2805,0.541269,0.838477


Accuracy: 0.8384767174720764
Testing config: lr=4e-05, batch_size=8, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3528,0.404543,0.847669
2,0.4024,0.58271,0.824688
3,0.3236,0.669716,0.812869
4,0.1441,0.665291,0.826658


Accuracy: 0.8266578912734985
Testing config: lr=4e-05, batch_size=8, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4863,0.459741,0.826001
2,0.3951,0.511432,0.828628
3,0.3343,0.564715,0.832567
4,0.2279,0.626777,0.823375
5,0.252,0.6181,0.829941


Accuracy: 0.829940915107727
Testing config: lr=4e-05, batch_size=16, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3436,0.400326,0.826001
2,0.3075,0.415295,0.833224


Accuracy: 0.8332238793373108
Testing config: lr=4e-05, batch_size=16, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3579,0.380016,0.842416
2,0.3324,0.459814,0.83388
3,0.3554,0.471065,0.83782


Accuracy: 0.8378201127052307
Testing config: lr=4e-05, batch_size=16, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4269,0.467584,0.77807
2,0.3527,0.450112,0.83782
3,0.3702,0.448034,0.811556
4,0.2616,0.501343,0.83585


Accuracy: 0.8358502984046936
Testing config: lr=4e-05, batch_size=16, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3533,0.39549,0.815496
2,0.3388,0.493091,0.829941
3,0.3496,0.573358,0.810243
4,0.1877,0.608096,0.826658
5,0.1883,0.688791,0.824032


Accuracy: 0.8240315318107605
Testing config: lr=5e-05, batch_size=8, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3935,0.429772,0.830598
2,0.3825,0.444618,0.835194


Accuracy: 0.8351936936378479
Testing config: lr=5e-05, batch_size=8, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3328,0.4414,0.834537
2,0.3604,0.509477,0.83585
3,0.3004,0.528747,0.833224


Accuracy: 0.8332238793373108
Testing config: lr=5e-05, batch_size=8, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4615,0.461175,0.831254
2,0.3653,0.535543,0.827971
3,0.4753,0.580858,0.812213
4,0.1892,0.61505,0.829284


Accuracy: 0.8292843103408813
Testing config: lr=5e-05, batch_size=8, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4977,0.486944,0.807617
2,0.4146,0.511569,0.821405
3,0.4243,0.477815,0.831911
4,0.3022,0.552411,0.831911
5,0.2419,0.536125,0.831254


Accuracy: 0.8312541246414185
Testing config: lr=5e-05, batch_size=16, epochs=2


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3372,0.395529,0.835194
2,0.3091,0.397486,0.83782


Accuracy: 0.8378201127052307
Testing config: lr=5e-05, batch_size=16, epochs=3


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.4205,0.420178,0.821405
2,0.3545,0.448557,0.821405
3,0.3433,0.449238,0.83585


Accuracy: 0.8358502984046936
Testing config: lr=5e-05, batch_size=16, epochs=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3499,0.409006,0.830598
2,0.3572,0.461904,0.8109
3,0.3649,0.51954,0.832567
4,0.2574,0.49917,0.827314


Accuracy: 0.8273144960403442
Testing config: lr=5e-05, batch_size=16, epochs=5


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3458,0.416695,0.845043
2,0.3458,0.510324,0.816809
3,0.3657,0.629422,0.797768
4,0.2275,0.653553,0.814839
5,0.2171,0.676695,0.805647


Accuracy: 0.8056467771530151
Best Hyperparameters:
{'learning_rate': 1e-05, 'batch_size': 8, 'num_train_epochs': 2}
Best Validation Accuracy: 0.847012460231781


In [5]:
# 최적의 하이퍼파라미터로 모델 재학습
print("Training with Best Hyperparameters...")
best_learning_rate = best_params["learning_rate"]
best_batch_size = best_params["batch_size"]
best_num_epochs = best_params["num_train_epochs"]

# 최적의 하이퍼파라미터로 학습 설정
final_training_args = TrainingArguments(
    output_dir="./final_results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=best_num_epochs,
    per_device_train_batch_size=best_batch_size,
    per_device_eval_batch_size=best_batch_size,
    learning_rate=best_learning_rate,
    logging_dir="./final_logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=1,
    report_to="none"
)

# 최종 모델 초기화
final_model = DebertaV2ForSequenceClassification.from_pretrained(model_name, num_labels=2)

# 최종 Trainer 설정
final_trainer = Trainer(
    model=final_model,
    args=final_training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# 최종 모델 학습
final_trainer.train()

# 테스트 데이터 예측
print("Predicting on Test Data...")
test_encodings = preprocess_data(test_data["text"])
test_dataset = Dataset.from_dict(test_encodings)

test_predictions = final_trainer.predict(test_dataset)
test_data["target"] = torch.argmax(torch.tensor(test_predictions.predictions), axis=1).numpy()

# 제출 파일 생성
submission = test_data[["id", "target"]]
submission.to_csv("submission_v5_DeBERTa_GridSearch.csv", index=False)
print("Submission file saved as 'submission.csv'")

Training with Best Hyperparameters...


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  final_trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3858,0.390622,0.83979
2,0.365,0.440609,0.847012


Predicting on Test Data...


Submission file saved as 'submission.csv'
