# Inference and Batch Evaluation

Use a fine-tuned checkpoint directory to run predictions and compute F1/Accuracy on a test CSV.

In [None]:

# !pip install -U transformers datasets scikit-learn pandas matplotlib --quiet
import os, pandas as pd, numpy as np, matplotlib.pyplot as plt
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, Trainer, TrainingArguments
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix

CHECKPOINT_DIR = "/content/outputs_distilbert_lora"   # change to your best checkpoint
TEST_CSV = "/content/test.csv"                        # CSV with text,label

df = pd.read_csv(TEST_CSV)
labels = sorted(df["label"].astype(str).unique().tolist())
label2id = {l:i for i,l in enumerate(labels)}
id2label = {i:l for l,i in label2id.items()}
df["label_id"] = df["label"].astype(str).map(label2id)

ds = Dataset.from_pandas(df[["text","label_id"]])
tok = AutoTokenizer.from_pretrained(CHECKPOINT_DIR)
def preprocess(ex): 
    out = tok(ex["text"], truncation=True, max_length=256)
    out["labels"] = ex["label_id"]
    return out
ds = ds.map(preprocess, batched=True, remove_columns=ds.column_names)
collator = DataCollatorWithPadding(tokenizer=tok)
model = AutoModelForSequenceClassification.from_pretrained(CHECKPOINT_DIR, num_labels=len(labels), id2label=id2label, label2id=label2id)

args = TrainingArguments("/content/_tmp_eval", per_device_eval_batch_size=32)
trainer = Trainer(model=model, args=args, data_collator=collator, tokenizer=tok)
preds = np.argmax(trainer.predict(ds).predictions, axis=-1)
f1 = f1_score(ds["labels"], preds, average="weighted")
acc = accuracy_score(ds["labels"], preds)
print("F1:", f1, "Acc:", acc)

cm = confusion_matrix(ds["labels"], preds, labels=list(range(len(labels))))
plt.figure(); plt.imshow(cm, interpolation='nearest'); plt.title('Confusion Matrix'); plt.colorbar()
plt.tight_layout(); plt.ylabel('True'); plt.xlabel('Pred')
os.makedirs("outputs_eval", exist_ok=True)
plt.savefig("outputs_eval/confusion_matrix.png", dpi=150)
with open("outputs_eval/metrics.txt","w") as f:
    f.write(f"F1={f1}\nACC={acc}\n")
print("Saved to outputs_eval")
