#Assignment 3 : Transformer (BERT) for Text Classification

Name: Aditya Raj Sinha

Roll No.: 2301201189

#93% accuracy achieved of BERT fine tuned on twitter sentiment dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
dataset_path = '/content/drive/MyDrive/twitter_training.csv'
df = pd.read_csv(dataset_path, encoding='ISO-8859-1')
df.head()

Unnamed: 0,2401,Borderlands,Positive,"im getting on borderlands and i will murder you all ,"
0,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
1,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
2,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
3,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...
4,2401,Borderlands,Positive,im getting into borderlands and i can murder y...


In [5]:
df.columns = ['id', 'game', 'sentiment', 'text']
df.dropna(subset=['text'], inplace=True)
sentiment_mapping = {'Positive': 2, 'Neutral': 1, 'Negative': 0, 'Irrelevant': -1}
df['sentiment_numeric'] = df['sentiment'].map(sentiment_mapping)

from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

display(train_df.head())
display(test_df.head())

Unnamed: 0,id,game,sentiment,text,sentiment_numeric
61022,4860,GrandTheftAuto(GTA),Irrelevant,i had a dream my girlfriend and i got married ...,-1
33058,6472,Fortnite,Irrelevant,buy yo Alix or formula now am big big fan of i...,-1
19839,12596,WorldOfCraft,Neutral,I also just earned the [ Scrappy'S s Weekly Be...,1
74051,9088,Nvidia,Neutral,NVIDIA celebrates 40 years of Pac-Man with an ...,1
13578,8730,NBA2K,Irrelevant,Congratulations to the champions of Season 2 @...,-1


Unnamed: 0,id,game,sentiment,text,sentiment_numeric
61734,4984,GrandTheftAuto(GTA),Irrelevant,Do you think you can hurt me?,-1
11260,13136,Xbox(Xseries),Positive,About The time!!,2
55969,11207,TomClancysRainbowSix,Neutral,Calls from _ z1rv _ & @ Tweet98 got me this so...,1
4111,1909,CallOfDutyBlackopsColdWar,Negative,So CoD: Black Ops Cold War is gonna be ass? @ ...,0
2308,1604,CallOfDutyBlackopsColdWar,Negative,Y HAPPY ABOUT THIS.,0


In [7]:
from transformers import BertForSequenceClassification, BertTokenizer

model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
num_labels = train_df['sentiment_numeric'].nunique()
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

  * **h_n**: tensor of shape :math:`(D * \text{num\_layers}, H_{out})` or
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
import torch
from torch.utils.data import Dataset
from transformers import Trainer, TrainingArguments

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

class SentimentDataset(Dataset):
    def __init__(self, dataframe, tokenizer):
        self.dataframe = dataframe
        self.tokenizer = tokenizer
        self.tokenized_data = self.dataframe.apply(tokenize_function, axis=1)
        self.labels = self.dataframe['sentiment_numeric'].tolist()

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val) for key, val in self.tokenized_data.iloc[idx].items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

train_dataset = SentimentDataset(train_df, tokenizer)
test_dataset = SentimentDataset(test_df, tokenizer)

training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=5e-5,
    eval_strategy="epoch",
    optim="adamw_torch", # Explicitly use standard AdamW for XLA compatibility
)

In [13]:
train_df_filtered = train_df[train_df['sentiment'] != 'Irrelevant'].copy()
test_df_filtered = test_df[test_df['sentiment'] != 'Irrelevant'].copy()
train_dataset = SentimentDataset(train_df_filtered, tokenizer)
test_dataset = SentimentDataset(test_df_filtered, tokenizer)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)
trainer.train()



Epoch,Training Loss,Validation Loss
1,0.416,0.400364
2,0.2084,0.321469
3,0.0892,0.295329




TrainOutput(global_step=18357, training_loss=0.3109754994718599, metrics={'train_runtime': 3943.1499, 'train_samples_per_second': 37.243, 'train_steps_per_second': 4.655, 'total_flos': 3.863460557039616e+16, 'train_loss': 0.3109754994718599, 'epoch': 3.0})

In [14]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

In [20]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=5e-5,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

evaluation_results = trainer.evaluate()
print(evaluation_results)



{'eval_loss': 0.29532861709594727, 'eval_model_preparation_time': 0.0018, 'eval_accuracy': 0.9373305954825462, 'eval_precision': 0.9379785541944314, 'eval_recall': 0.9373305954825462, 'eval_f1': 0.9374267791555498, 'eval_runtime': 128.2421, 'eval_samples_per_second': 94.945, 'eval_steps_per_second': 11.868}


In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

vectorizer = TfidfVectorizer()
classical_model = LogisticRegression(max_iter=1000)
pipeline = Pipeline([('tfidf', vectorizer), ('logreg', classical_model)])

In [22]:
pipeline.fit(train_df_filtered['text'], train_df_filtered['sentiment_numeric'])
predictions = pipeline.predict(test_df_filtered['text'])
accuracy = accuracy_score(test_df_filtered['sentiment_numeric'], predictions)
precision, recall, f1, _ = precision_recall_fscore_support(test_df_filtered['sentiment_numeric'], predictions, average='weighted')

print("Classical Model Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Classical Model Performance:
Accuracy: 0.8352
Precision: 0.8352
Recall: 0.8352
F1 Score: 0.8348


In [6]:
train_df_filtered = train_df[train_df['sentiment'] != 'Irrelevant'].copy()
test_df_filtered = test_df[test_df['sentiment'] != 'Irrelevant'].copy()

pipeline.fit(train_df_filtered['text'], train_df_filtered['sentiment_numeric'])

predictions = pipeline.predict(test_df_filtered['text'])

accuracy = accuracy_score(test_df_filtered['sentiment_numeric'], predictions)
precision, recall, f1, _ = precision_recall_fscore_support(test_df_filtered['sentiment_numeric'], predictions, average='weighted')

print("Classical Model Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Classical Model Performance:
Accuracy: 0.8352
Precision: 0.8352
Recall: 0.8352
F1 Score: 0.8348


In [26]:
print(evaluation_results['eval_accuracy'])

0.9373305954825462
