In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments, EvalPrediction
from datasets import Dataset
import evaluate

In [None]:
labels = ["pants-fire", "false", "barely-true", "half-true", "mostly-true", "true"]

# Load tokenizer and model
model_name = "facebook/bart-large-mnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = len(labels), ignore_mismatched_sizes=True)

In [None]:
# Load training data and validation data
label2id = { label: i for (i, label) in enumerate(labels) }
id2label = { i:label for (i, label) in enumerate(labels) }

train_df = pd.read_csv("./data/train.tsv"
                 , sep='\t'
                 , header=None
                 , names = ["id", "label", "text", "subject", "speaker", "speaker_job_title",
                            "state", "party_affiliation", "barely_true_counts", "false_counts", "half_true_counts", "mostly_true_counts", "pants_fire_counts", "context"])
# train_df = train_df[["text", "label"]].head()

val_df = pd.read_csv("./data/valid.tsv"
                 , sep='\t'
                 , header=None
                 , names = ["id", "label", "text", "subject", "speaker", "speaker_job_title",
                            "state", "party_affiliation", "barely_true_counts", "false_counts", "half_true_counts", "mostly_true_counts", "pants_fire_counts", "context"])
# val_df = val_df[["text", "label"]].head()

train_df["label"] = train_df["label"].map(label2id)
val_df["label"] = val_df["label"].map(label2id)

train_df["label"] = train_df["label"].astype(int)
val_df["label"] = val_df["label"].astype(int)

train_ds = Dataset.from_pandas(train_df)
val_ds = Dataset.from_pandas(val_df)

In [None]:
# Preprocess data
def preprocess(batch):
    return tokenizer(batch["text"], truncation = True)

train_ds = train_ds.map(preprocess, batched = True)
val_ds = val_ds.map(preprocess, batched = True)

data_collator = DataCollatorWithPadding(tokenizer)


In [None]:
# Metrics
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    preds = eval_pred.predictions
    # If predictions come as a tuple, select the first element
    if isinstance(preds, tuple):
        preds = preds[0]
    predictions = preds.argmax(axis=-1)
    return accuracy.compute(predictions=predictions, references=eval_pred.label_ids)


In [None]:
training_args = TrainingArguments(
    output_dir = "./bart_mnli_finetuned_1",
    eval_strategy = "epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()
print(trainer.evaluate())

In [None]:
import matplotlib.pyplot as plt

# 2. Grab the log history
logs = trainer.state.log_history

# 3. Extract epoch numbers & corresponding eval accuracies
epochs = []
accuracies = []
for entry in logs:
    if "eval_accuracy" in entry:
        epochs.append(entry["epoch"])
        accuracies.append(entry["eval_accuracy"])

# 4. Plot
plt.figure(figsize=(8, 5))
plt.plot(epochs, accuracies, marker="o", linestyle="-", color="tab:blue")
plt.title("Validation Accuracy per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.xticks(epochs)           # ensure we label each epoch
plt.ylim(0, 1.0)             # accuracy ranges from 0 to 1
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()