# Transformers based approaches

## Basic functions and imports

In [1]:
from datasets import load_dataset
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader, Dataset
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import pipeline
import datetime


BATCH_SIZE = 32
NUM_WORKERS = 16

dataset = load_dataset("stanfordnlp/imdb")

test_percentage = 1
num_samples = int(len(dataset["test"]) * test_percentage)


# Define a custom dataset class if needed, or use dataset['test'] directly
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.texts = dataset["text"]  # replace 'text' with the actual column name

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return self.texts[idx]


# Create a DataLoader
test_sample = dataset["test"].select(range(num_samples))
test_dataset = CustomDataset(test_sample)  # use your dataset's test split
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

In [2]:
import pandas as pd

result_df = pd.DataFrame(columns=["model", "accuracy", "precision", "recall", "f1"])


In [3]:
def calculate_metrics(predicted, true):
    accuracy = accuracy_score(true, predicted)
    precision, recall, f1, _ = precision_recall_fscore_support(
        true, predicted, average="weighted"
    )
    return accuracy, precision, recall, f1

def add_result(df, model, predicted, true):
    accuracy, precision, recall, f1 = calculate_metrics(predicted, true)
    df = pd.concat(
        [
            df,
            pd.DataFrame(
                {
                    "model": [model],
                    "accuracy": [accuracy],
                    "precision": [precision],
                    "recall": [recall],
                    "f1": [f1],
                }
            ),
        ]
    )
    return df

## Models

### LLAMA

In [None]:
pipe_llama = pipeline("text-classification", model="yash3056/Llama-3.2-1B-imdb", device="cuda")

# classify a few examples
print(pipe_llama("The movie was great!"))

In [None]:
# Apply classification in batches and show a progress bar
results = []
for batch in tqdm(test_dataloader, desc="Processing batches"):
    # Convert the batch to a list of texts
    texts = batch  # Already in a format suitable for processing
    predictions = pipe_llama(texts)  # Classify the batch
    results.extend(predictions)  # Collect the results

In [None]:
# Get the true labels
true_labels = [example["label"] for example in dataset["train"]]

# Get the predicted labels
predicted_labels = [example["label"] for example in results]
predicted_labels = [1 if label == "LABEL_1" else 0 for label in predicted_labels]

predicted_labels[:5], true_labels[:5]

In [None]:
# Calculate the metrics
result_df = add_result(result_df, "Llama", predicted_labels, true_labels)
result_df

In [36]:
result_df.to_csv(
    f"results_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv", index=False
)

### MPNET_V2

In [None]:
pipe_mpnet_v2 = pipeline(
    "text-classification",
    model="abhiramd22/finetuning-sentiment-model-mpnet-imdb",
    device="cuda",
    truncation=True,
)
# classify a few examples
print(pipe_mpnet_v2("I love you"))

In [None]:
# Apply classification in batches and show a progress bar
results = []
for batch in tqdm(test_dataloader, desc="Processing batches"):
    # Convert the batch to a list of texts
    texts = batch  # Already in a format suitable for processing
    predictions = pipe_mpnet_v2(texts)  # Classify the batch
    results.extend(predictions)  # Collect the results

In [None]:
pipe_mpnet_v2 = pipeline(
    "text-classification",
    model="abhiramd22/finetuning-sentiment-model-mpnet-imdb",
    device="cuda",
    truncation=True,
)
# classify a few examples
print(pipe_mpnet_v2("I love you"))

In [40]:
# Get the true labels
true_labels = [example["label"] for example in dataset["train"]]

# Get the predicted labels
predicted_labels = [example["label"] for example in results]
predicted_labels = [0 if label == "NEGATIVE" else 1 for label in predicted_labels]

predicted_labels[:5], true_labels[:5]

([0, 0, 0, 0, 1], [0, 0, 0, 0, 0])

In [41]:
# calculate metrics
result_df = add_result(result_df, "MPNet_v2", predicted_labels, true_labels)
result_df

Unnamed: 0,model,accuracy,precision,recall,f1
0,Llama,0.96356,0.963582,0.96356,0.96356
0,MPNet_v2,0.9814,0.981426,0.9814,0.9814


#### SIDENOTE
This result (98%) is too good to be true, I suspect some data leakage (the guy who fine-tuned the model may have used part of the test set for training). It should achieve ~96% accuracy on the test set, not 98%, according to the guy who fine-tuned the model.

In [42]:
result_df.to_csv(f"results_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv", index=False)

### GPT2

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe_gpt2 = pipeline("text-classification", model="mnoukhov/gpt2-imdb-sentiment-classifier", device="cuda", truncation=True)

# classify a few examples
print(pipe_gpt2("The movie was great!"))

In [None]:
# Apply classification in batches and show a progress bar
# set TOKENIZERS_PARALLELISM=(true | false) to avoid warnings
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
results = []
for batch in tqdm(test_dataloader, desc="Processing batches"):
    # Convert the batch to a list of texts
    texts = batch  # Already in a format suitable for processing
    predictions = pipe_gpt2(texts)  # Classify the batch
    results.extend(predictions)  # Collect the results

In [None]:
# Get the true labels
true_labels = [example["label"] for example in dataset["train"]]

# Get the predicted labels
predicted_labels = [example["label"] for example in results]
predicted_labels = [0 if label == "LABEL_0" else 1 for label in predicted_labels]

predicted_labels[:5], true_labels[:5]

In [None]:
result_df = add_result(result_df, "GPT2", predicted_labels, true_labels)
result_df

In [11]:
result_df.to_csv(
    f"results_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv", index=False
)

### DEBERTA

In [None]:
from transformers import pipeline

pipe_deberta = pipeline("text-classification", model="dfurman/deberta-v3-base-imdb", device="cuda")

# classify a few examples
print(pipe_deberta("The movie was great!"))


In [None]:
# Apply classification in batches and show a progress bar
results = []
for batch in tqdm(test_dataloader, desc="Processing batches"):
    # Convert the batch to a list of texts
    texts = batch  # Already in a format suitable for processing
    predictions = pipe_deberta(texts)  # Classify the batch
    results.extend(predictions)  # Collect the results

In [None]:
# Get the true labels
true_labels = [example["label"] for example in dataset["train"]]
# Get the predicted labels
predicted_labels = [example["label"] for example in results]
predicted_labels = [0 if label == "NEGATIVE" else 1 for label in predicted_labels]

predicted_labels[:5], true_labels[:5]

In [17]:
result_df = add_result(result_df, "DeBERTa", predicted_labels, true_labels)
result_df

Unnamed: 0,model,accuracy,precision,recall,f1
0,GPT2,0.9394,0.93953,0.9394,0.939396
0,DeBERTa,0.95464,0.954916,0.95464,0.954633


In [18]:
result_df.to_csv(f"results_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv", index=False)

In [20]:
# merge all files with results in name

import os

files = [f for f in os.listdir() if "results" in f]

df = pd.concat([pd.read_csv(f) for f in files])
df.drop_duplicates(inplace=True)

df.to_csv("RESULTS_ALL_transformers.csv", index=False)

## Result table

In [21]:
df

Unnamed: 0,model,accuracy,precision,recall,f1
0,Llama,0.96356,0.963582,0.96356,0.96356
1,MPNet_v2,0.9814,0.981426,0.9814,0.9814
0,GPT2,0.9394,0.93953,0.9394,0.939396
1,DeBERTa,0.95464,0.954916,0.95464,0.954633


# DISCLAIMERS

## Results
This result (98%) is too good to be true, I suspect some data leakage (the guy who fine-tuned the model may have used part of the test set for training). It should achieve ~96% accuracy on the test set, not 98%, according to the guy who fine-tuned the model.

## AI used for generating this notebook
 - ChatGPT
 - Github Copilot