In [None]:
pip install transformers datasets 

In [None]:
import numpy as np
import torch
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import matplotlib.pyplot as plt

In [None]:
task = "sst2"  # you can change it to other GLUE tasks
dataset = load_dataset("nyu-mll/glue", task)

In [None]:
model_name_1 = "distilbert/distilbert-base-uncased"
model_name_2 = "pablo-chocobar/distilbert-ft-sst5"

model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)

tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)


In [None]:
def preprocess_function_1(examples):
    return tokenizer_1(examples['sentence'], truncation=True, padding=True)


def preprocess_function_2(examples):
    return tokenizer_2(examples['sentence'], truncation=True, padding=True)

encoded_dataset_1 = dataset.map(preprocess_function_1, batched=True)
encoded_dataset_2 = dataset.map(preprocess_function_2, batched=True)


In [18]:
# Initialize empty lists to store all predictions and labels across evaluations
all_predictions = []
all_labels = []

# Define a function to compute metrics during evaluation
def compute_metrics(eval_pred):
    # Unpack the evaluation predictions tuple
    logits, labels = eval_pred
    
    # Load metrics for precision, recall, F1-score, and accuracy
    metric1 = load_metric("precision")
    metric2 = load_metric("recall")
    metric3 = load_metric("f1")
    metric4 = load_metric("accuracy")
    
    # Handle the case where logits might be a tuple (in some frameworks)
    logits = logits[0] if isinstance(logits, tuple) else logits
    
    # Convert logits to predictions (assuming logits is a tensor)
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    
    # Adjust predictions based on specific logic (this logic needs clarification)
    preds = []
    for i in range(len(predictions.numpy())):
        if predictions[i] < 2:
            preds.append(0)
        elif predictions[i] > 2:
            preds.append(1)
        else:
            preds.append(0)
    
    # Compute metrics using the loaded metrics objects
    precision = metric1.compute(predictions=preds, references=labels, average="micro")["precision"]
    recall = metric2.compute(predictions=preds, references=labels, average="micro")["recall"]
    f1 = metric3.compute(predictions=preds, references=labels, average="micro")["f1"]
    accuracy = metric4.compute(predictions=preds, references=labels)["accuracy"]
    
    # Extend the global lists with current predictions and labels
    all_predictions.extend(preds)
    all_labels.extend(labels)

    # Return computed metrics as a dictionary
    return {"precision": precision, "recall": recall, "f1": f1, "accuracy": accuracy}


In [None]:
training_args = TrainingArguments(
    output_dir='/distilbert-sst5-sentiment-analyzer',
    evaluation_strategy="epoch",
    per_device_eval_batch_size=16,
)

In [29]:

trainer_1 = Trainer(
    model=model_1,
    args=training_args,
    eval_dataset=encoded_dataset_1['validation'],
    compute_metrics=compute_metrics
)

trainer_2 = Trainer(
    model=model_2,
    args=training_args,
    eval_dataset=encoded_dataset_2['validation'],
    compute_metrics=compute_metrics
)

In [None]:

results_1 = trainer_1.evaluate()

conf_matrix_1 = confusion_matrix(all_labels, all_predictions)
disp_1 = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_1)
disp_1.plot()
plt.title('Confusion Matrix for Model 1')
plt.show()

In [None]:
all_predictions = []
all_labels = []

results_2 = trainer_2.evaluate()
conf_matrix_2 = confusion_matrix(all_labels, all_predictions)
disp_2 = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_2)
disp_2.plot()
plt.title('Confusion Matrix for Model 2')
plt.show()

In [None]:
print(f"Results for model 1: {results_1}")
print(f"Results for model 2: {results_2}")

In [None]:
print(classification_report(all_labels, all_predictions))

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-classification", model=model_name_1)

In [None]:
pipe("This movie is awesome")

In [26]:
jtk = "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"

In [None]:
pip install huggingface_hub

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
tokenizer_2.push_to_hub("jigarcpatel/distilbert-sst5-sentiment-analyzer")
model_2.push_to_hub("jigarcpatel/distilbert-sst5-sentiment-analyzer")