<a href="https://colab.research.google.com/github/dietmarja/LLM-Elements/blob/main/model_evaluation/evaluation_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Fine-tuning and Evaluation of a pre-trained BERT Model on the Internet Movie Database (IMDB) Dataset of 50K Movie Reviews


In [9]:

#  Uninstallation/Installationof Dependencies:
import os
os.system("pip install -q transformers[torch] datasets evaluate")
os.system("pip uninstall -y pyarrow")
os.system("pip install pyarrow==14.0.1")
os.system("pip uninstall -y cudf-cu12 ibis-framework")
os.system("pip install cudf-cu12 ibis-framework")


# Install the necessary packages
!pip install -q transformers[torch] datasets evaluate matplotlib

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datasets import load_dataset
from datasets import Dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from evaluate import load
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

In [None]:
# To prevent a timeout, use the following code
from google.colab import output
output.enable_custom_widget_manager()

# Load the pre-trained model and tokenizer
model_name = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the IMDB dataset from Hugging Face and take 20% of it
dataset = load_dataset("imdb")
dataset = dataset.shuffle(seed=42)
dataset['train'] = dataset['train'].select(range(int(len(dataset['train']) * 0.01)))
dataset['test'] = dataset['test'].select(range(int(len(dataset['test']) * 0.01)))
print("10% of IMDB dataset loaded successfully")

# Preprocess the dataset
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True, max_length=128)

encoded_dataset = dataset.map(preprocess_function, batched=True)
print("Dataset preprocessed successfully")

# Define the evaluation metric
metric = load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.argmax(axis=1)
    return metric.compute(predictions=predictions, references=labels)

# Setup the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    logging_dir='./logs',
    logging_steps=10,
)

# Split the dataset into train and validation sets
encoded_dataset = encoded_dataset["train"].train_test_split(test_size=0.2)
print("Dataset split into train and validation sets")

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train the model
print("Starting training ...")
train_result = trainer.train()
print("Training completed")

# Evaluate the model
eval_result = trainer.evaluate()
print(f"Evaluation result: {eval_result}")

# Plot training and validation loss
train_loss = train_result.history['train_loss']
val_loss = train_result.history['eval_loss']
epochs = range(1, len(train_loss) + 1)

plt.figure(figsize=(10, 6))
plt.plot(epochs, train_loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


10% of IMDB dataset loaded successfully


Map:   0%|          | 0/250 [00:00<?, ? examples/s]

Dataset preprocessed successfully
Dataset split into train and validation sets
Starting training ...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6917,0.623488,0.78
2,0.5729,0.580525,0.68
3,0.377,0.659198,0.7
4,0.1055,0.565405,0.8


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6917,0.623488,0.78
2,0.5729,0.580525,0.68
3,0.377,0.659198,0.7
4,0.1055,0.565405,0.8
5,0.0997,0.642275,0.8


Training completed
