In [123]:
import torch
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
import pandas as pd
import os
from sklearn.metrics import classification_report, accuracy_score

In [124]:
ID2LABEL = {
    0: 'Drastic Fall', 1: 'Drastic Rise', 2: 'Fall', 3: 'Rise', 4: 'Stable'
}
LABEL2ID = {v: k for k, v in ID2LABEL.items()}

In [133]:
def run_evaluation(model_path, dataset_path):
    model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)
    tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
    model.eval()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    df = pd.read_csv(dataset_path)
    input_texts = df['Input_Text'].tolist()
    ground_truth_labels = df['Ground_Truth'].tolist()

    print("Running predictions on the dataset...")
    ai_predictions = []
    with torch.no_grad():
        for text in input_texts:
            inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=256).to(device)
            outputs = model(**inputs)
            logits = outputs.logits
            predicted_class_id = torch.argmax(logits, dim=1).item()
            ai_predictions.append(ID2LABEL[predicted_class_id])

    print("Prediction complete.")

    df['AI_Prediction'] = ai_predictions
    df['Is_Correct'] = (df['Ground_Truth'] == df['AI_Prediction'])
    df.to_csv('study_dataset_with_predictions.csv', index=False)

    # --- Generate and Print Report ---
    print("\n" + "="*50)
    print("          Final Classification Report")
    print("="*50 + "\n")

    # Ensure all labels are present for a complete report
    all_labels = list(LABEL2ID.keys())

    report = classification_report(
        ground_truth_labels,
        ai_predictions,
        labels=all_labels,
        target_names=all_labels,
        zero_division=0
    )

    accuracy = accuracy_score(ground_truth_labels, ai_predictions)

    print(report)
    print("-" * 50)
    print(f"AI-Only Performance (Accuracy): {accuracy*100:.2f}%")
    print("="*50)

In [134]:
saved_model_directory = '/results/checkpoint-10350'
study_dataset_path = '/study_dataset.csv'
run_evaluation(saved_model_directory, study_dataset_path)

Running predictions on the dataset...
Prediction complete.

          Final Classification Report

              precision    recall  f1-score   support

Drastic Fall       0.83      0.71      0.77         7
Drastic Rise       1.00      0.33      0.50         3
        Fall       0.12      0.25      0.17         8
        Rise       0.12      0.11      0.12         9
      Stable       0.91      0.89      0.90       173

    accuracy                           0.81       200
   macro avg       0.60      0.46      0.49       200
weighted avg       0.84      0.81      0.83       200

--------------------------------------------------
AI-Only Performance (Accuracy): 81.50%
