### Model Testing Script

This script performs a evaluation fo the difference between a pretrained and a fine-tuned model.


### Change Model name here to switch which one you want to test

In [None]:
augmented_dataset = 1
model_name = "unsloth/llama-3-8b-bnb-4bit"  # Change this to switch models
if augmented_dataset:
    model_dir = f'../finetuned_models/outputmodel_{model_name.split("/")[-1]}'
else:
    model_dir = f'../finetuned_models/outputmodel_{model_name.split("/")[-1]}_augmented_dataset'

### Step 1: Load dependencies

In [None]:
import os
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
from datasets import Dataset
import matplotlib.pyplot as plt


### Step 2: Clear Cuda cache

In [None]:
def clear_cuda_cache():
    torch.cuda.empty_cache()

clear_cuda_cache()

### Step 3: Load the Fine-tuned Model and Tokenizer

In [None]:

finetuned_model = AutoModelForSequenceClassification.from_pretrained(model_dir, low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

### Step 4: Load the pre-finetuned model for comparison

In [None]:
pretrained_model_name = 'unsloth/llama-3-8b-bnb-4bit'
pretrained_model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name, low_cpu_mem_usage=True)

### Step 5: Load and Preprocess Validation Dataset

In [None]:
try:
    val_df = pd.read_csv(os.path.join(model_dir, 'val_dataset.csv'))
    print("Validation dataset loaded successfully.")
except FileNotFoundError:
    raise FileNotFoundError(f"Error: The file was not found.")

try:
    val_dataset = Dataset.from_pandas(val_df)
    print("Validation dataset converted to Huggingface Dataset successfully.")
except Exception as e:
    raise RuntimeError(f"Error converting validation DataFrame to Dataset: {e}")

def tokenize_function(examples):
    if 'Quote' not in examples:
        raise KeyError("Error: 'Quote' column not found in dataset.")
    return tokenizer(examples['Quote'], padding='max_length', truncation=True)

val_dataset = val_dataset.map(tokenize_function, batched=True)
val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'Memorable'])
val_dataloader = DataLoader(val_dataset, batch_size=4)

### Step 6: Define Evaluation Function and evaluate both models

In [None]:
def evaluate_model(model, dataloader):
    model.eval()
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc='Evaluating Model'):
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['Memorable']

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)

            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)

    return accuracy, precision, recall, f1

# Evaluate pre-trained model
pretrained_metrics = evaluate_model(pretrained_model, val_dataloader)
print(f'Pre-Trained Model - Accuracy: {pretrained_metrics[0]:.4f}, Precision: {pretrained_metrics[1]:.4f}, Recall: {pretrained_metrics[2]:.4f}, F1 Score: {pretrained_metrics[3]:.4f}')

# Evaluate fine-tuned model
finetuned_metrics = evaluate_model(finetuned_model, val_dataloader)
print(f'Fine-Tuned Model - Accuracy: {finetuned_metrics[0]:.4f}, Precision: {finetuned_metrics[1]:.4f}, Recall: {finetuned_metrics[2]:.4f}, F1 Score: {finetuned_metrics[3]:.4f}')


### Step 7: Visualize Model Performance

In [None]:

graphics_folder = '../graphics/'

# Data for visualization
metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
pretrained_scores = list(pretrained_metrics)
finetuned_scores = list(finetuned_metrics)

x = range(len(metrics))

plt.figure(figsize=(10, 6))
plt.bar(x, pretrained_scores, width=0.4, label='Pre-Trained', align='center')
plt.bar(x, finetuned_scores, width=0.4, label='Fine-Tuned', align='edge')

plt.xticks(x, metrics)
plt.ylim(0, 1)
plt.ylabel('Score')
plt.title('Comparison of Pre-Trained and Fine-Tuned Model Performance')
plt.legend()


plot_save_path = os.path.join(graphics_folder, 'model_performance_comparison.png')
try:
    if not os.path.exists(graphics_folder):
        os.makedirs(graphics_folder)

    plt.savefig(plot_save_path, bbox_inches='tight')
    print(f'Plot saved successfully at {plot_save_path}')

except PermissionError:
    print(f'Permission denied: Unable to save the plot to {plot_save_path}.')
except FileNotFoundError:
    print(f'Directory {model_dir} not found or cannot be created.')
except Exception as e:
    print(f'An unexpected error occurred while saving the plot: {e}')

# Display the plot
plt.show()