<a href="https://colab.research.google.com/github/byeolbyeolbyeol/MainQuest07/blob/main/DKTC_ver2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip -q install -U transformers datasets accelerate evaluate sentencepiece scikit-learn

import os, re, random
import numpy as np
import pandas as pd

import torch
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    EarlyStoppingCallback,
    set_seed
)

In [3]:
# ====== 경로 ======
BASE_DIR = "/content/drive/MyDrive/Colab Notebooks/dktc"
TRAIN_PATH = f"{BASE_DIR}/train.csv"
NORMAL_PATH = f"{BASE_DIR}/normal_conversation.csv"
TEST_PATH = f"{BASE_DIR}/test.csv"
SUBMISSION_TEMPLATE_PATH = f"{BASE_DIR}/submission.csv"

OUT_DIR = f"{BASE_DIR}/outputs"
os.makedirs(OUT_DIR, exist_ok=True)

ABLATION_LOG_PATH = f"{BASE_DIR}/ablation_log.csv"

# ====== 라벨 매핑(대회 고정) ======
label_map = {
    '협박 대화': 0,
    '갈취 대화': 1,
    '직장 내 괴롭힘 대화': 2,
    '기타 괴롭힘 대화': 3,
    '일반 대화': 4
}
id2label = {v:k for k,v in label_map.items()}
label2id = {k:v for k,v in label_map.items()}

def clean_text(text: str) -> str:
    if not isinstance(text, str):
        return ""
    # 너무 과한 정규화는 금지(PLM 성능 깎임) - 최소만
    text = text.replace("\n", " ")
    text = re.sub(r"\s+", " ", text).strip()
    return text

def macro_f1(y_true, y_pred):
    return f1_score(y_true, y_pred, average="macro")

def append_log(row: dict):
    df = pd.DataFrame([row])
    if os.path.exists(ABLATION_LOG_PATH):
        df.to_csv(ABLATION_LOG_PATH, mode="a", header=False, index=False)
    else:
        df.to_csv(ABLATION_LOG_PATH, mode="w", header=True, index=False)
    print(f"[LOG] appended -> {ABLATION_LOG_PATH}")

# transformers 버전 차이 대응용(핵심!)
def make_training_args(**kwargs):
    """
    transformers 버전에 따라 eval_strategy/evaluation_strategy 이름이 다를 수 있음
    -> 둘 다 시도해서 되는 쪽으로 생성
    """
    try:
        return TrainingArguments(**kwargs)
    except TypeError as e:
        msg = str(e)
        if "evaluation_strategy" in msg:
            # 새 버전: eval_strategy로 교체
            kwargs["eval_strategy"] = kwargs.pop("evaluation_strategy")
            return TrainingArguments(**kwargs)
        elif "eval_strategy" in msg:
            # 구버전: evaluation_strategy로 교체
            kwargs["evaluation_strategy"] = kwargs.pop("eval_strategy")
            return TrainingArguments(**kwargs)
        else:
            raise

In [4]:
train_df = pd.read_csv(TRAIN_PATH)
normal_df = pd.read_csv(NORMAL_PATH)

# normal에 idx 붙이기
normal_df = normal_df.copy()
normal_df["idx"] = range(train_df["idx"].max() + 1,
                         train_df["idx"].max() + 1 + len(normal_df))
normal_df = normal_df[["idx","class","conversation"]]

train_df = pd.concat([train_df, normal_df], ignore_index=True)

# 라벨/텍스트
train_df["label"] = train_df["class"].map(label_map)
train_df["text"]  = train_df["conversation"].apply(clean_text)

assert train_df["label"].isna().sum() == 0, "라벨 매핑 실패 class 존재"

print("Train shape:", train_df.shape)
print(train_df["class"].value_counts())

test_df = pd.read_csv(TEST_PATH)
test_df["text"] = test_df["text"].apply(clean_text)
print("Test shape:", test_df.shape)

sub_template = pd.read_csv(SUBMISSION_TEMPLATE_PATH).sort_values("idx").reset_index(drop=True)

Train shape: (5216, 5)
class
일반 대화          1266
기타 괴롭힘 대화      1094
갈취 대화           981
직장 내 괴롭힘 대화     979
협박 대화           896
Name: count, dtype: int64
Test shape: (500, 2)


In [5]:
def run_kfold_softvote(config: dict):
    """
    config 예:
    {
      "exp_name": "A_koelectra_256_3fold",
      "model_name": "monologg/koelectra-base-v3-discriminator",
      "folds": 3,
      "max_length": 256,
      "lr": 2e-5,
      "batch_size": 16,
      "epochs": 4,
      "weight_decay": 0.01,
      "warmup_ratio": 0.06,
      "seed": 42,
      "fp16": True
    }
    """
    set_seed(config["seed"])
    random.seed(config["seed"])
    np.random.seed(config["seed"])
    torch.manual_seed(config["seed"])

    tokenizer = AutoTokenizer.from_pretrained(config["model_name"], use_fast=True)
    collator = DataCollatorWithPadding(tokenizer=tokenizer)

    def tok(batch):
        return tokenizer(batch["text"], truncation=True, max_length=config["max_length"])

    # Dataset으로 변환
    full_ds = Dataset.from_pandas(train_df[["text","label"]].reset_index(drop=True))
    test_ds = Dataset.from_pandas(test_df[["idx","text"]].reset_index(drop=True))

    y = train_df["label"].values
    skf = StratifiedKFold(n_splits=config["folds"], shuffle=True, random_state=config["seed"])

    test_proba_sum = np.zeros((len(test_df), 5), dtype=np.float64)
    fold_f1s = []

    for fold, (tr_idx, va_idx) in enumerate(skf.split(np.zeros(len(y)), y), start=1):
        print(f"\n===== Fold {fold}/{config['folds']} | {config['exp_name']} =====")

        tr = full_ds.select(tr_idx).map(tok, batched=True, remove_columns=["text"])
        va = full_ds.select(va_idx).map(tok, batched=True, remove_columns=["text"])
        te = test_ds.map(tok, batched=True, remove_columns=["idx","text"])

        model = AutoModelForSequenceClassification.from_pretrained(
            config["model_name"],
            num_labels=5,
            id2label=id2label,
            label2id=label2id
        )

        safe_model = config["model_name"].replace("/", "_")
        fold_out = f"{OUT_DIR}/{safe_model}/{config['exp_name']}/fold_{fold}"
        os.makedirs(fold_out, exist_ok=True)

        args = make_training_args(
            output_dir=fold_out,
            learning_rate=config["lr"],
            per_device_train_batch_size=config["batch_size"],
            per_device_eval_batch_size=config["batch_size"],
            num_train_epochs=config["epochs"],
            weight_decay=config["weight_decay"],
            warmup_ratio=config["warmup_ratio"],
            evaluation_strategy="epoch",   # 버전 따라 자동 대응됨
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="eval_macro_f1",
            greater_is_better=True,
            logging_strategy="steps",
            save_total_limit=1,
            logging_steps=50,
            report_to="none",
            fp16=bool(config["fp16"]) and torch.cuda.is_available(),
            seed=config["seed"]
        )

        def compute_metrics(eval_pred):
            logits, labels = eval_pred
            pred = np.argmax(logits, axis=1)
            return {"macro_f1": macro_f1(labels, pred)}

        trainer = Trainer(
            model=model,
            args=args,
            train_dataset=tr,
            eval_dataset=va,
            tokenizer=tokenizer,
            data_collator=collator,
            compute_metrics=compute_metrics,
            callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
        )

        trainer.train()

        ev = trainer.evaluate()
        fold_f1 = float(ev["eval_macro_f1"])
        fold_f1s.append(fold_f1)
        print(f"[Fold {fold}] val macro_f1 = {fold_f1:.5f}")

        pred = trainer.predict(te).predictions
        proba = torch.softmax(torch.tensor(pred), dim=1).numpy()
        test_proba_sum += proba

        del model
        torch.cuda.empty_cache()

    # soft voting
    test_proba_avg = test_proba_sum / config["folds"]
    test_pred = np.argmax(test_proba_avg, axis=1)

    # 제출 파일 생성 (idx 맞춰 merge)
    pred_df = pd.DataFrame({"idx": test_df["idx"].values, "target": test_pred})

    sub = sub_template.copy()
    if "target" in sub.columns:
       sub = sub.drop(columns=["target"])  # <- 충돌 원인 제거

    sub = sub.merge(pred_df, on="idx", how="left")
    assert sub["target"].isna().sum() == 0, "idx 매칭 실패. test_df idx와 submission idx 확인!"

    safe_model = config["model_name"].replace("/", "_")
    save_path = f"{BASE_DIR}/submissions/{safe_model}/my_submission_{config['exp_name']}.csv"
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    sub.to_csv(save_path, index=False)
    print(f"\n[SAVED] submission -> {save_path}")

    # 로그 저장
    mean_f1 = float(np.mean(fold_f1s))
    std_f1  = float(np.std(fold_f1s))

    row = {
        "exp_name": config["exp_name"],
        "model_name": config["model_name"],
        "folds": config["folds"],
        "max_length": config["max_length"],
        "lr": config["lr"],
        "batch_size": config["batch_size"],
        "epochs": config["epochs"],
        "weight_decay": config["weight_decay"],
        "warmup_ratio": config["warmup_ratio"],
        "seed": config["seed"],
        "fp16": int(bool(config["fp16"])),
        "normal_added_n": config.get("normal_added_n", ""),
        "threat_aug_type": config.get("threat_aug_type", ""),
        "threat_aug_mult": config.get("threat_aug_mult", ""),
        "val_macro_f1_mean": mean_f1,
        "val_macro_f1_std": std_f1,
        "lb_macro_f1": ""  # 제출 후 수동으로 채우기
    }
    append_log(row)

    return {"fold_f1s": fold_f1s, "mean": mean_f1, "std": std_f1, "submission_path": save_path}

In [6]:
base_config_1 = {
    "max_length": 256,
    "lr": 2e-5,
    "batch_size": 16,
    "epochs": 4,
    "warmup_ratio": 0.1,
    "weight_decay": 0.01,
    "folds": 3,
    "seed": 42,
    "fp16": True,
    "normal_added_n": int((train_df["label"] == 4).sum()),
    "threat_aug_type": "none",
    "threat_aug_mult": 1.0
}

In [7]:
base_config_2 = {
    "max_length": 192,
    "lr": 2e-5,
    "batch_size": 16,
    "epochs": 3,
    "warmup_ratio": 0.0,
    "weight_decay": 0.0,
    "folds": 3,
    "seed": 42,
    "fp16": True,
    "normal_added_n": int((train_df["label"] == 4).sum()),
    "threat_aug_type": "none",
    "threat_aug_mult": 1.0
}

In [8]:
config_koelectra_1 = {
    "model_name": "monologg/koelectra-base-v3-discriminator",
    "exp_name": "koelectra_base_cfg1",
    **base_config_1
}

config_koelectra_2 = {
    "model_name": "monologg/koelectra-base-v3-discriminator",
    "exp_name": "koelectra_base_cfg2",
    **base_config_2
}

In [9]:
config_roberta_1 = {
    "model_name": "klue/roberta-base",
    "exp_name": "roberta_base_cfg1",
    **base_config_1
}

config_roberta_2 = {
    "model_name": "klue/roberta-base",
    "exp_name": "roberta_base_cfg2",
    **base_config_2
}

In [21]:
res_A1 = run_kfold_softvote(config_koelectra_1)
res_A1


===== Fold 1/3 | koelectra_base_cfg1 =====


Map:   0%|          | 0/3477 [00:00<?, ? examples/s]

Map:   0%|          | 0/1739 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

pytorch_model.bin:   0%|          | 0.00/452M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


model.safetensors:   0%|          | 0.00/452M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Macro F1
1,0.7529,0.54108,0.85453
2,0.3485,0.299267,0.907985
3,0.2185,0.29056,0.914836
4,0.1408,0.287034,0.917546


[Fold 1] val macro_f1 = 0.91755

===== Fold 2/3 | koelectra_base_cfg1 =====


Map:   0%|          | 0/3477 [00:00<?, ? examples/s]

Map:   0%|          | 0/1739 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.7364,0.56785,0.825396
2,0.3169,0.283042,0.91361
3,0.2143,0.279155,0.915947
4,0.1007,0.272436,0.92278


[Fold 2] val macro_f1 = 0.92278

===== Fold 3/3 | koelectra_base_cfg1 =====


Map:   0%|          | 0/3478 [00:00<?, ? examples/s]

Map:   0%|          | 0/1738 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.7189,0.483013,0.887588
2,0.3438,0.275247,0.919018
3,0.2196,0.286958,0.914296
4,0.1369,0.286454,0.915457


[Fold 3] val macro_f1 = 0.91902

[SAVED] submission -> /content/drive/MyDrive/Colab Notebooks/dktc/submissions/monologg_koelectra-base-v3-discriminator/my_submission_koelectra_base_cfg1.csv
[LOG] appended -> /content/drive/MyDrive/Colab Notebooks/dktc/ablation_log.csv


{'fold_f1s': [0.9175456679357324, 0.9227800730168161, 0.9190181533580104],
 'mean': 0.9197812981035197,
 'std': 0.0022040177805040992,
 'submission_path': '/content/drive/MyDrive/Colab Notebooks/dktc/submissions/monologg_koelectra-base-v3-discriminator/my_submission_koelectra_base_cfg1.csv'}

In [10]:
res_A2 = run_kfold_softvote(config_koelectra_2)
res_A2

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/61.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/467 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]


===== Fold 1/3 | koelectra_base_cfg2 =====


Map:   0%|          | 0/3477 [00:00<?, ? examples/s]

Map:   0%|          | 0/1739 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

pytorch_model.bin:   0%|          | 0.00/452M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/452M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.6275,0.49165,0.859645
2,0.3276,0.303112,0.909045
3,0.2143,0.287817,0.915083


[Fold 1] val macro_f1 = 0.91508

===== Fold 2/3 | koelectra_base_cfg2 =====


Map:   0%|          | 0/3477 [00:00<?, ? examples/s]

Map:   0%|          | 0/1739 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.66,0.568092,0.817623
2,0.3183,0.30747,0.910104
3,0.2322,0.293889,0.908512


[Fold 2] val macro_f1 = 0.91010

===== Fold 3/3 | koelectra_base_cfg2 =====


Map:   0%|          | 0/3478 [00:00<?, ? examples/s]

Map:   0%|          | 0/1738 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.6788,0.482443,0.877593
2,0.3537,0.298134,0.910375
3,0.2371,0.276597,0.916367


[Fold 3] val macro_f1 = 0.91637

[SAVED] submission -> /content/drive/MyDrive/Colab Notebooks/dktc/submissions/monologg_koelectra-base-v3-discriminator/my_submission_koelectra_base_cfg2.csv
[LOG] appended -> /content/drive/MyDrive/Colab Notebooks/dktc/ablation_log.csv


{'fold_f1s': [0.915082629632737, 0.9101042382621356, 0.9163674853789647],
 'mean': 0.9138514510912792,
 'std': 0.0027011005858509967,
 'submission_path': '/content/drive/MyDrive/Colab Notebooks/dktc/submissions/monologg_koelectra-base-v3-discriminator/my_submission_koelectra_base_cfg2.csv'}

In [11]:
res_B1 = run_kfold_softvote(config_roberta_1)
res_B1

tokenizer_config.json:   0%|          | 0.00/375 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]


===== Fold 1/3 | roberta_base_cfg1 =====


Map:   0%|          | 0/3477 [00:00<?, ? examples/s]

Map:   0%|          | 0/1739 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/546 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.3147,0.28402,0.910021
2,0.2401,0.293631,0.911426
3,0.1306,0.338043,0.91912
4,0.0577,0.331468,0.923138


[Fold 1] val macro_f1 = 0.92314

===== Fold 2/3 | roberta_base_cfg1 =====


Map:   0%|          | 0/3477 [00:00<?, ? examples/s]

Map:   0%|          | 0/1739 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.3302,0.33823,0.886538
2,0.2184,0.295484,0.910808
3,0.1436,0.300127,0.920787
4,0.0836,0.285003,0.932115


[Fold 2] val macro_f1 = 0.93211

===== Fold 3/3 | roberta_base_cfg1 =====


Map:   0%|          | 0/3478 [00:00<?, ? examples/s]

Map:   0%|          | 0/1738 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.3579,0.253353,0.91524
2,0.2127,0.232215,0.926448
3,0.1315,0.299118,0.921633
4,0.0542,0.315678,0.927587


[Fold 3] val macro_f1 = 0.92759

[SAVED] submission -> /content/drive/MyDrive/Colab Notebooks/dktc/submissions/klue_roberta-base/my_submission_roberta_base_cfg1.csv
[LOG] appended -> /content/drive/MyDrive/Colab Notebooks/dktc/ablation_log.csv


{'fold_f1s': [0.9231376745436164, 0.9321146716239277, 0.9275871039759579],
 'mean': 0.9276131500478341,
 'std': 0.003664889988651277,
 'submission_path': '/content/drive/MyDrive/Colab Notebooks/dktc/submissions/klue_roberta-base/my_submission_roberta_base_cfg1.csv'}

In [12]:
res_B2 = run_kfold_softvote(config_roberta_2)
res_B2


===== Fold 1/3 | roberta_base_cfg2 =====


Map:   0%|          | 0/3477 [00:00<?, ? examples/s]

Map:   0%|          | 0/1739 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.3081,0.288054,0.908478
2,0.2236,0.315764,0.909438
3,0.1216,0.305311,0.921006


[Fold 1] val macro_f1 = 0.92101

===== Fold 2/3 | roberta_base_cfg2 =====


Map:   0%|          | 0/3477 [00:00<?, ? examples/s]

Map:   0%|          | 0/1739 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.327,0.317139,0.891488
2,0.1976,0.257488,0.920903
3,0.1073,0.250246,0.9283


[Fold 2] val macro_f1 = 0.92830

===== Fold 3/3 | roberta_base_cfg2 =====


Map:   0%|          | 0/3478 [00:00<?, ? examples/s]

Map:   0%|          | 0/1738 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Macro F1
1,0.3357,0.273337,0.910719
2,0.2109,0.268854,0.917749
3,0.1157,0.291107,0.917232


[Fold 3] val macro_f1 = 0.91775

[SAVED] submission -> /content/drive/MyDrive/Colab Notebooks/dktc/submissions/klue_roberta-base/my_submission_roberta_base_cfg2.csv
[LOG] appended -> /content/drive/MyDrive/Colab Notebooks/dktc/ablation_log.csv


{'fold_f1s': [0.9210058760564834, 0.9283001197545147, 0.9177492246126292],
 'mean': 0.9223517401412091,
 'std': 0.004411262833148904,
 'submission_path': '/content/drive/MyDrive/Colab Notebooks/dktc/submissions/klue_roberta-base/my_submission_roberta_base_cfg2.csv'}