In [1]:
import os

os.chdir("..")

In [2]:
import datetime
from pathlib import Path

import evaluate
import numpy as np
import pandas as pd
from loguru import logger
from tqdm.auto import tqdm
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback,
)
from engine.data import prepare_data_for_fine_tuning
from engine.replace_persons import replace_ner

tqdm.pandas()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
DATA_PATH = Path("data/isot")
MODEL = "roberta-base"

In [4]:
train = pd.read_csv(DATA_PATH / "train.csv")
train["text_masked"] = train["text"].progress_apply(replace_ner)
valid = pd.read_csv(DATA_PATH / "valid.csv")
valid["text_masked"] = valid["text"].progress_apply(replace_ner)
test = pd.read_csv(DATA_PATH / "test.csv")
test["text_masked"] = test["text"].progress_apply(replace_ner)

100%|██████████| 31428/31428 [01:53<00:00, 277.13it/s]
100%|██████████| 4490/4490 [00:16<00:00, 279.94it/s]
100%|██████████| 8980/8980 [00:32<00:00, 276.17it/s]


In [5]:
metric = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
config = AutoConfig.from_pretrained(MODEL)
tokenizer = AutoTokenizer.from_pretrained(MODEL)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
def get_metric_of_finetuning(
    train_df: pd.DataFrame, valid_df: pd.DataFrame, test_df: pd.DataFrame, seed: int
):
    print("STARTING TRAINING")
    print(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
    train_dataset = prepare_data_for_fine_tuning(train_df, tokenizer)
    valid_dataset = prepare_data_for_fine_tuning(valid_df, tokenizer)
    test_dataset = prepare_data_for_fine_tuning(test_df, tokenizer)
    model = AutoModelForSequenceClassification.from_pretrained(MODEL, config=config)
    timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    training_args = TrainingArguments(
        output_dir=f"output/{DATA_PATH.stem}/{MODEL}/{timestamp}",
        num_train_epochs=10,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=128,
        eval_strategy="epoch",
        logging_strategy="steps",
        logging_steps=10,
        learning_rate=5e-5,
        weight_decay=0.01,
        warmup_steps=500,
        save_strategy="epoch",
        load_best_model_at_end=True,
        save_total_limit=2,
        report_to="tensorboard",
        seed=seed,
        greater_is_better=True,
        metric_for_best_model="eval_accuracy",
    )
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=valid_dataset,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2, early_stopping_threshold=0.01)],
    )
    trainer.train()
    return trainer.evaluate(test_dataset)["eval_accuracy"]

In [10]:
basic_accuracies = [get_metric_of_finetuning(train, valid, test, seed=i) for i in range(3)]
renaming_mapper = {"text_masked": "text"}
masked_accuracies = [
    get_metric_of_finetuning(
        train.rename(renaming_mapper),
        valid.rename(renaming_mapper),
        test.rename(renaming_mapper),
        seed=i,
    )
    for i in range(3)
]

STARTING TRAINING
2024_12_08_20_49_02


Map: 100%|██████████| 31428/31428 [00:04<00:00, 6561.13 examples/s]
Map: 100%|██████████| 4490/4490 [00:00<00:00, 6444.85 examples/s]
Map: 100%|██████████| 8980/8980 [00:01<00:00, 5782.03 examples/s]


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0003,0.007595,0.999109
2,0.0006,0.00529,0.999332
3,0.0004,0.005472,0.999332


STARTING TRAINING
2024_12_08_21_40_59


Map: 100%|██████████| 31428/31428 [00:06<00:00, 5051.88 examples/s]
Map: 100%|██████████| 4490/4490 [00:00<00:00, 5317.48 examples/s]
Map: 100%|██████████| 8980/8980 [00:04<00:00, 2097.03 examples/s]


Epoch,Training Loss,Validation Loss,Accuracy
1,0.063,0.070213,0.985746
2,0.0007,0.005692,0.999332
3,0.0008,0.011313,0.998441
4,0.0424,0.005104,0.999332


STARTING TRAINING
2024_12_08_22_47_28


Map: 100%|██████████| 31428/31428 [00:05<00:00, 5551.60 examples/s]
Map: 100%|██████████| 4490/4490 [00:00<00:00, 5584.39 examples/s]
Map: 100%|██████████| 8980/8980 [00:01<00:00, 5845.50 examples/s]


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6951,0.717313,0.47706
2,0.6982,0.692624,0.52294
3,0.6309,0.920664,0.47706
4,0.5738,0.934932,0.47706


STARTING TRAINING
2024_12_08_23_55_06


Map: 100%|██████████| 31428/31428 [00:06<00:00, 4532.54 examples/s]
Map: 100%|██████████| 4490/4490 [00:00<00:00, 5368.24 examples/s]
Map: 100%|██████████| 8980/8980 [00:01<00:00, 5297.45 examples/s]


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0143,0.13016,0.970601
2,0.6054,0.857782,0.47706
3,0.6036,0.968171,0.47706


STARTING TRAINING
2024_12_09_00_44_29


Map: 100%|██████████| 31428/31428 [00:05<00:00, 6057.01 examples/s]
Map: 100%|██████████| 4490/4490 [00:00<00:00, 6306.21 examples/s]
Map: 100%|██████████| 8980/8980 [00:01<00:00, 6617.54 examples/s]


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5053,0.474952,0.743875
2,0.3309,0.326153,0.866147
3,0.6741,0.856251,0.47706
4,0.6355,0.859647,0.47706


STARTING TRAINING
2024_12_09_01_48_20


Map: 100%|██████████| 31428/31428 [00:05<00:00, 6052.61 examples/s]
Map: 100%|██████████| 4490/4490 [00:00<00:00, 6358.37 examples/s]
Map: 100%|██████████| 8980/8980 [00:01<00:00, 6383.16 examples/s]


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2113,0.195565,0.936971
2,0.1447,0.151026,0.95902
3,0.1194,0.123595,0.967929
4,0.6826,0.778821,0.47706


In [None]:
print(basic_accuracies)
print(masked_accuracies)

# LIAR
# [0.6641509433962264, 0.6515723270440251, 0.650314465408805]
# [0.6528301886792452, 0.6528301886792452, 0.6855345911949685]

# coaid
# [0.9798534798534798, 0.9716117216117216, 0.9844322344322345]
# [0.9816849816849816, 0.9844322344322345, 0.9798534798534798]

# isot
# [0.999332, 0.999332, 0.522940]
# [0.970601, 0.866147, 0.967929]

[0.9798534798534798, 0.9716117216117216, 0.9844322344322345]
[0.9816849816849816, 0.9844322344322345, 0.9798534798534798]


In [1]:
import numpy as np

In [None]:
print(
    f"LIAR unmasked = {np.mean([0.6641509433962264, 0.6515723270440251, 0.650314465408805]):.3f}, {np.std([0.6641509433962264, 0.6515723270440251, 0.650314465408805]):.3f}"
)
print(
    f"LIAR masked = {np.mean([0.6528301886792452, 0.6528301886792452, 0.6855345911949685]):.3f}, {np.std([0.6528301886792452, 0.6528301886792452, 0.6855345911949685]):.3f}"
)

print(
    f"COAID unmasked = {np.mean([0.9798534798534798, 0.9716117216117216, 0.9844322344322345]):.3f}, {np.std([0.9798534798534798, 0.9716117216117216, 0.9844322344322345]):.3f}"
)
print(
    f"COAID masked = {np.mean([0.9816849816849816, 0.9844322344322345, 0.9798534798534798]):.3f}, {np.std([0.9816849816849816, 0.9844322344322345, 0.9798534798534798]):.3f}"
)

print(
    f"ISOT unmasked = {np.mean([0.999332, 0.999332, 0.522940]):.3f}, {np.std([0.999332, 0.999332, 0.522940]):.3f}"
)
print(
    f"ISOT masked = {np.mean([0.970601, 0.866147, 0.967929]):.3f}, {np.std([0.970601, 0.866147, 0.967929]):.3f}"
)

LIAR unmasked = 0.655, 0.006
LIAR masked = 0.664, 0.015
COAID unmasked = 0.979, 0.005
COAID masked = 0.982, 0.002
ISOT unmasked = 0.841, 0.225
ISOT masked = 0.935, 0.049
