### Model Inference on Remote Machine

This notebook performs model inference on a remote machine using a quantized model. The process includes loading the model and tokenizer, preparing the dataset, and evaluating the model's performance.


In [6]:
import os
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd
from datasets import Dataset
import matplotlib.pyplot as plt

# Set output directory
output_dir = '../finetuned_models/outputmodel_standard/llama3'

# Clear CUDA cache function
def clear_cuda_cache():
    torch.cuda.empty_cache()

clear_cuda_cache()

# Load the fine-tuned Model and Tokenizer
finetuned_model = AutoModelForSequenceClassification.from_pretrained(output_dir, low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

# Load the pre-trained model (before fine-tuning)
pretrained_model_name = "unsloth/llama-3-8b-bnb-4bit"
pretrained_model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name, low_cpu_mem_usage=True)

# Load the validation dataset
val_df = pd.read_csv(os.path.join(output_dir, 'val_dataset.csv'))
val_dataset = Dataset.from_pandas(val_df)

def tokenize_function(examples):
    return tokenizer(examples["Quote"], padding="max_length", truncation=True)

val_dataset = val_dataset.map(tokenize_function, batched=True)
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "Memorable"])
val_dataloader = DataLoader(val_dataset, batch_size=1)

# Function to evaluate a model
def evaluate_model(model, dataloader):
    model.eval()
    predictions, true_labels = [], []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating Model"):
            input_ids = batch["input_ids"]
            attention_mask = batch["attention_mask"]
            labels = batch["Memorable"]

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=-1)

            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)

    return accuracy, precision, recall, f1

# Evaluate pre-trained model
pretrained_metrics = evaluate_model(pretrained_model, val_dataloader)
print(f"Pre-Trained Model - Accuracy: {pretrained_metrics[0]:.4f}, Precision: {pretrained_metrics[1]:.4f}, Recall: {pretrained_metrics[2]:.4f}, F1 Score: {pretrained_metrics[3]:.4f}")

# Evaluate fine-tuned model
finetuned_metrics = evaluate_model(finetuned_model, val_dataloader)
print(f"Fine-Tuned Model - Accuracy: {finetuned_metrics[0]:.4f}, Precision: {finetuned_metrics[1]:.4f}, Recall: {finetuned_metrics[2]:.4f}, F1 Score: {finetuned_metrics[3]:.4f}")

# Data for visualization
metrics = ["Accuracy", "Precision", "Recall", "F1 Score"]
pretrained_scores = list(pretrained_metrics)
finetuned_scores = list(finetuned_metrics)

x = range(len(metrics))

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(x, pretrained_scores, width=0.4, label='Pre-Trained', align='center')
plt.bar(x, finetuned_scores, width=0.4, label='Fine-Tuned', align='edge')

plt.xticks(x, metrics)
plt.ylim(0, 1)
plt.ylabel("Score")
plt.title("Comparison of Pre-Trained and Fine-Tuned Model Performance")
plt.legend()

plt.show()


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at unsloth/llama-3-8b-bnb-4bit and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at unsloth/llama-3-8b-bnb-4bit and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inf

NameError: name 'os' is not defined