In [None]:
!pip install evaluate transformers datasets torch scikit-learn pandas matplotlib seaborn gradio -q

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer
import pandas as pd

subset_size = 500
model_checkpoint = "bert-base-uncased"

dataset = load_dataset("yelp_review_full", split = 'train')

random_subset = dataset.shuffle(seed = 42).select(range(subset_size))
print(random_subset)

In [None]:
def map_rating_to_sentiment(example):
  rating = example["label"]
  if rating in [0, 1]:
    sentiment = 0
  elif rating == 2:
    sentiment = 1
  else:
    sentiment = 2
  return {"labels": sentiment}

In [None]:
random_subset = random_subset.map(map_rating_to_sentiment)
random_subset = random_subset.remove_columns(["label"])

In [None]:
df = random_subset.to_pandas()
print(df["labels"].value_counts().sort_index())

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

def tokenize(examples):
  return tokenizer(examples["text"], padding = "max_length", truncation = True)

tokenized_dataset = random_subset.map(tokenize, batched = True)

In [None]:
tokenized_dataset = tokenized_dataset.remove_columns(["text"])
tokenized_dataset.set_format("torch")

In [None]:
import numpy as np
import evaluate

def compute_metrics(eval_pred):
    import numpy as np
    import evaluate
    from sklearn.metrics import precision_score

    predictions, labels = eval_pred # logit values
    predictions = np.argmax(predictions, axis=1)
    accuracy = evaluate.load("accuracy").compute(predictions=predictions, references=labels)
    f1 = evaluate.load("f1").compute(predictions=predictions, references=labels, average="weighted")
    precision = precision_score(labels, predictions, average="weighted", zero_division=0)
    recall = evaluate.load("recall").compute(predictions=predictions, references=labels, average="weighted")

    return {
        "accuracy": accuracy["accuracy"],
        "f1": f1["f1"],
        "precision": precision,
        "recall": recall["recall"],
    }

In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels = 3)

In [None]:
split_dataset = tokenized_dataset.train_test_split(test_size = 0.2, seed = 42)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

training_args = TrainingArguments(
    output_dir = "./results",
    learning_rate = 2e-5, # 0.00002
    per_device_train_batch_size = 8,
    num_train_epochs = 5,
    per_device_eval_batch_size=8,
    logging_dir = "./logs",
    logging_steps = 10,
    do_eval = True,
    save_steps = 500
)

In [None]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_dataset,
    eval_dataset = eval_dataset,
    tokenizer = tokenizer,
    compute_metrics = compute_metrics
)

In [None]:
trainer.train()

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

eval_results = trainer.evaluate()

percentage_metrics = ["accuracy", "f1", "precision", "recall"]
print("Evaluation Metrics:")
for key, value in eval_results.items():
    if key.startswith("eval_") and isinstance(value, (float, int)):
        metric_name = key.replace("eval_", "")
        if metric_name in percentage_metrics:
            print(f"{metric_name.capitalize()}: {value * 100:.2f}%")
        elif metric_name == "loss":
            print(f"{metric_name.capitalize()}: {value:.4f}")
predictions_output = trainer.predict(eval_dataset)
preds = np.argmax(predictions_output.predictions, axis=1)
labels = predictions_output.label_ids

cm = confusion_matrix(labels, preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Negative", "Neutral", "Positive"])
print("Confusion Matrix:")
disp.plot(cmap=plt.cm.Blues)
plt.grid(False)
plt.show()