# Task - To train a sentiment analysis model for multiclass sentimennt covering total of 8 classes on a given dataset

# IMPORTING LIBRARIES

In [27]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch
import pandas as pd
from datasets import load_metric
from torch.utils.data import DataLoader, TensorDataset

# Loading dataset

In [2]:
df = pd.read_csv('topical_chat_dataset.csv')

# Encoding labels (sentiments)

In [3]:
# Encode labels
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['sentiment'])

In [10]:
df['message'] = df['message'].astype(str)

# Splitting data

In [11]:
# Split dataset
train_texts, val_texts, train_labels, val_labels = train_test_split(df['message'].tolist(), df['label'].tolist(), test_size=0.2)

# Loading tokenizer and model

In [12]:
# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
model = BertForSequenceClassification.from_pretrained('huawei-noah/TinyBERT_General_4L_312D', num_labels=8)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Tokenizing data

In [13]:
# Tokenize data
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=128)

In [14]:
# Create a Dataset class
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [15]:
# Create dataset objects
train_dataset = SentimentDataset(train_encodings, train_labels)
val_dataset = SentimentDataset(val_encodings, val_labels)

# Creating training arguments

In [16]:
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
)



# Initialising trainer object to train the model

In [17]:
# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Training model

In [18]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss
1,1.1765,1.151577
2,1.0985,1.133184
3,1.0361,1.134087


TrainOutput(global_step=28257, training_loss=1.1352633810956274, metrics={'train_runtime': 57245.6917, 'train_samples_per_second': 7.898, 'train_steps_per_second': 0.494, 'total_flos': 1621338631004160.0, 'train_loss': 1.1352633810956274, 'epoch': 3.0})

# Evaluating the model

In [19]:
# Evaluate the model
trainer.evaluate()

# Save the model
model.save_pretrained('./sentiment_model')
tokenizer.save_pretrained('./sentiment_model')

print("Training and evaluation complete. Model saved.")

Training and evaluation complete. Model saved.


# Inference function - for predicting

In [25]:
# Load the model and tokenizer for prediction
model = BertForSequenceClassification.from_pretrained('./sentiment_model')
tokenizer = BertTokenizer.from_pretrained('./sentiment_model')

# Function to make predictions
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=128)
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=1)
    return label_encoder.inverse_transform(predictions.cpu().numpy())[0]

# Example predictions
new_messages = [
    "I am very happy with your service!",
    "This is the worst experience I have ever had.",
    "I feel so curious about the new features.",
    "Lol its funny",
    "This is so bad",
    "This is disgusting",
    "A google seqarch takes more energy than sending astronauts to moon!!",
    "Yeah I heard about his daughter passing away!"
]

for message in new_messages:
    sentiment = predict_sentiment(message)
    print(f"Message: {message}\nPredicted Sentiment: {sentiment}\n")

Message: I am very happy with your service!
Predicted Sentiment: Happy

Message: This is the worst experience I have ever had.
Predicted Sentiment: Neutral

Message: I feel so curious about the new features.
Predicted Sentiment: Curious to dive deeper

Message: Lol its funny
Predicted Sentiment: Happy

Message: This is so bad
Predicted Sentiment: Neutral

Message: This is disgusting
Predicted Sentiment: Sad

Message: A google seqarch takes more energy than sending astronauts to moon!!
Predicted Sentiment: Surprised

Message: Yeah I heard about his daughter passing away!
Predicted Sentiment: Curious to dive deeper



# Loading the accuracy metrics for Validation accuracy

In [29]:
# Load accuracy metric
accuracy_metric = load_metric("accuracy", trust_remote_code=True)


In [30]:
# Convert validation data to torch tensors
val_input_ids = torch.tensor(val_encodings['input_ids'])
val_attention_mask = torch.tensor(val_encodings['attention_mask'])
val_labels = torch.tensor(val_labels)

In [31]:
# Create DataLoader for validation data
val_dataset = TensorDataset(val_input_ids, val_attention_mask, val_labels)
val_loader = DataLoader(val_dataset, batch_size=16)

In [32]:
# Function to calculate accuracy
def calculate_accuracy(model, val_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in val_loader:
            input_ids, attention_mask, labels = batch
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    accuracy = accuracy_metric.compute(predictions=all_preds, references=all_labels)
    return accuracy['accuracy']

In [33]:
# Calculate and print accuracy
accuracy = calculate_accuracy(model, val_loader)
print(f"Validation Accuracy: {accuracy}")

Validation Accuracy: 0.5642053296528294


# Now calculating accuracy metrics on testing data

In [36]:
# Now testing on test data
train_val_texts, test_texts, train_val_labels, test_labels = train_test_split(df['message'].tolist(), df['label'].tolist(), test_size=0.2)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=128)


In [37]:
test_dataset = SentimentDataset(test_encodings, test_labels)


In [38]:
test_input_ids = torch.tensor(test_encodings['input_ids'])


In [39]:
test_attention_mask = torch.tensor(test_encodings['attention_mask'])
test_labels = torch.tensor(test_labels)


In [40]:
# Create DataLoader for test data
test_dataset = TensorDataset(test_input_ids, test_attention_mask, test_labels)
test_loader = DataLoader(test_dataset, batch_size=16)

In [42]:
# Function to calculate accuracy on test data
def calculate_accuracy_test(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in test_loader:
            input_ids, attention_mask, labels = batch
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    accuracy = accuracy_metric.compute(predictions=all_preds, references=all_labels)
    return accuracy['accuracy']

# Testing accuracy

In [43]:
# Calculate and print test accuracy
test_accuracy = calculate_accuracy_test(model, test_loader)
print(f"Test Accuracy: {test_accuracy}")

Test Accuracy: 0.6079467034717061


In [44]:
precision_metric = load_metric("precision", trust_remote_code=True)
recall_metric = load_metric("recall", trust_remote_code=True)
f1_metric = load_metric("f1", trust_remote_code=True)

Downloading builder script:   0%|          | 0.00/2.58k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.52k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

# Function for calculating different accuracy metrics on test data

In [45]:
def calculate_metrics(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in test_loader:
            input_ids, attention_mask, labels = batch
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs.logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_metric.compute(predictions=all_preds, references=all_labels)
    precision = precision_metric.compute(predictions=all_preds, references=all_labels, average='weighted')
    recall = recall_metric.compute(predictions=all_preds, references=all_labels, average='weighted')
    f1 = f1_metric.compute(predictions=all_preds, references=all_labels, average='weighted')
    
    return {
        'accuracy': accuracy['accuracy'],
        'precision': precision['precision'],
        'recall': recall['recall'],
        'f1': f1['f1']
    }

In [46]:
# Calculate and print test metrics
test_metrics = calculate_metrics(model, test_loader)
print(f"Test Accuracy: {test_metrics['accuracy']}")
print(f"Test Precision: {test_metrics['precision']}")
print(f"Test Recall: {test_metrics['recall']}")
print(f"Test F1 Score: {test_metrics['f1']}")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Test Accuracy: 0.6079467034717061
Test Precision: 0.5932582433720968
Test Recall: 0.6079467034717061
Test F1 Score: 0.5964401990449176


# Saving label encoder for Inferencing API

In [50]:
import pickle

# Assuming `df` is your dataframe
label_encoder = LabelEncoder()
label_encoder.fit(df['sentiment'])

with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

# Summary
# Model is trained for 16 hours straight  and has a val accuracy of 56%
# Accuracy on test data -> 60.79%
# Precision -> 59.32%
# Recall -> 0.60
# F1 score-> 0.59

# I have used TinyBERT model for this problem statement becuase it is lighweight while maintaining a significant accuracy level. I have limited computational hardware that's the main reason I went for TinyBERT