<a href="https://colab.research.google.com/github/aarifm-pfw/NLP_assignments/blob/main/EC2/NLP_EC2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
import torch

In [31]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"Using device: {device}")

Using device: cuda


In [32]:
train_data = pd.read_csv("/content/rotten_tomatoes_train.tsv", sep="\t", names=['text', 'label'], header=None)
test_data = pd.read_csv("/content/rotten_tomatoes_test.tsv", sep="\t", names=['text', 'label'], header=None)

In [33]:
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

In [34]:
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        label = self.labels.iloc[idx]
        encoding = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt",
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(label, dtype=torch.long),
        }

In [35]:
train_dataset = SentimentDataset(train_data["text"], train_data["label"], tokenizer)
test_dataset = SentimentDataset(test_data["text"], test_data["label"], tokenizer)

In [36]:
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
model.to(device)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
     warmup_steps=500,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=lambda p: {
        "accuracy": accuracy_score(p.label_ids, p.predictions.argmax(-1))
    },
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


In [37]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4636,0.408644,0.830206
2,0.3346,0.378698,0.842402
3,0.2318,0.403376,0.836773
4,0.0775,0.518075,0.838649
5,0.0639,0.594638,0.836773


TrainOutput(global_step=1335, training_loss=0.27321168507976035, metrics={'train_runtime': 463.6363, 'train_samples_per_second': 91.99, 'train_steps_per_second': 2.879, 'total_flos': 5649734552678400.0, 'train_loss': 0.27321168507976035, 'epoch': 5.0})

In [38]:
predictions = trainer.predict(test_dataset)
pred_labels = predictions.predictions.argmax(-1)

accuracy = accuracy_score(test_data["label"], pred_labels)
precision = precision_score(test_data["label"], pred_labels, average="binary")
recall = recall_score(test_data["label"], pred_labels, average="binary")
f1 = f1_score(test_data["label"], pred_labels, average="binary")
conf_matrix = confusion_matrix(test_data["label"], pred_labels)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

# Display a detailed classification report
print("\nClassification Report:")
print(classification_report(test_data["label"], pred_labels, target_names=["Negative", "Positive"]))

Accuracy: 0.8368
Precision: 0.8343
Recall: 0.8405
F1 Score: 0.8374
Confusion Matrix:
[[444  89]
 [ 85 448]]

Classification Report:
              precision    recall  f1-score   support

    Negative       0.84      0.83      0.84       533
    Positive       0.83      0.84      0.84       533

    accuracy                           0.84      1066
   macro avg       0.84      0.84      0.84      1066
weighted avg       0.84      0.84      0.84      1066

