In [None]:
import time, pathlib, numpy as np, pandas as pd, torch
from datasets import Dataset
from transformers import (AutoTokenizer, AutoModelForSequenceClassification,
                          TrainingArguments, Trainer)
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix

DATA_DIR = pathlib.Path("data/processed")
train_df = pd.read_csv(DATA_DIR/"phrasebank_train.csv")
test_df  = pd.read_csv(DATA_DIR/"phrasebank_test.csv")

id2label = {0:'negative',1:'neutral',2:'positive'}
label2id = {v:k for k,v in id2label.items()}

tok = AutoTokenizer.from_pretrained("ProsusAI/finbert")
def tokenize(batch):
    return tok(batch["sentence"], truncation=True, padding="max_length", max_length=64)

train_ds = Dataset.from_pandas(train_df[['sentence','label']].rename(columns={'label':'labels'}))
test_ds  = Dataset.from_pandas(test_df [['sentence','label']].rename(columns={'label':'labels'}))
train_ds = train_ds.map(tokenize, batched=True)
test_ds  = test_ds.map(tokenize, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(
    "ProsusAI/finbert", num_labels=3, id2label=id2label, label2id=label2id)

args = TrainingArguments(
    output_dir="finbert-ft",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    num_train_epochs=3,
    evaluation_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    save_total_limit=1,
    logging_steps=50,
)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(-1)
    return {
        "f1": f1_score(labels, preds, average="macro"),
        "acc": accuracy_score(labels, preds)
    }

trainer = Trainer(model=model, args=args, train_dataset=train_ds, eval_dataset=test_ds, compute_metrics=compute_metrics)

t0 = time.perf_counter()
trainer.train()
train_time_s = time.perf_counter() - t0

# Inference on test set
pred_logits = trainer.predict(test_ds).predictions
pred_ids = pred_logits.argmax(-1)
pred_labels = pd.Series(pred_ids).map(id2label)

out = test_df.copy()
out['pred'] = pred_labels.values
out.to_csv(DATA_DIR/"finbert_preds.csv", index=False)

macro_f1 = f1_score(test_df['label'], pred_ids, average='macro')
acc = accuracy_score(test_df['label'], pred_ids)
print(f"FinBERT  macro-F1={macro_f1:.3f}  acc={acc:.3f}  train_time={train_time_s/60:.1f} min")
print(confusion_matrix(test_df['label'], pred_ids, labels=[0,1,2]))
