In [4]:
#Task 4
# Import necessary libraries
import torch
from torch import nn
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader, TensorDataset, RandomSampler
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Define the MultiTaskBERT model
class MultiTaskBERT(nn.Module):
    def __init__(self, model_name, num_classes_task1, num_classes_task2):
        super(MultiTaskBERT, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)  # Load pre-trained BERT model
        self.dropout = nn.Dropout(0.3)  # Apply dropout to avoid overfitting
        self.classifier_task1 = nn.Linear(self.bert.config.hidden_size, num_classes_task1)  # Classifier for task 1
        self.classifier_task2 = nn.Linear(self.bert.config.hidden_size, num_classes_task2)  # Classifier for task 2

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)  # Get BERT outputs
        pooled_output = outputs[1]  # Use pooled output for classification
        pooled_output = self.dropout(pooled_output)  # Apply dropout

        task1_logits = self.classifier_task1(pooled_output)  # Get logits for task 1
        task2_logits = self.classifier_task2(pooled_output)  # Get logits for task 2

        return task1_logits, task2_logits  # Return logits for both tasks

# Load data
sentences = [
    "I love this movie!", "This is the worst film.", "It was an average experience.",
    "Absolutely fantastic!", "Terrible, I hated it.", "Just okay, not great.",
    "Brilliant work!", "Not good, very disappointing.", "Mediocre at best.",
    "A masterpiece!", "Awful movie.", "Decent watch.",
    "Amazing storyline!", "I would not recommend it.", "It was a good watch.",
    "Fantastic film!", "Horrible experience.", "Quite boring.",
    "Wonderful acting!", "Poorly executed.", "Nothing special.",
    "Stellar performance!", "Waste of time.", "Enjoyed it a lot."
]
labels_task1 = ["positive", "negative", "neutral", "positive", "negative", "neutral",
                "positive", "negative", "neutral", "positive", "negative", "neutral",
                "positive", "negative", "neutral", "positive", "negative", "neutral",
                "positive", "negative", "neutral", "positive", "negative", "neutral"]
labels_task2 = [2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1]

# Tokenize input sentences
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')  # Load pre-trained tokenizer
inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=128)  # Tokenize sentences

# Encode labels
le_task1 = LabelEncoder()
labels_task1 = le_task1.fit_transform(labels_task1)  # Encode task 1 labels
labels_task1 = torch.tensor(labels_task1)  # Convert to tensor
labels_task2 = torch.tensor(labels_task2)  # Convert task 2 labels to tensor

# Create dataset and dataloader
dataset = TensorDataset(inputs.input_ids, inputs.attention_mask, labels_task1, labels_task2)  # Create dataset
dataloader = DataLoader(dataset, sampler=RandomSampler(dataset), batch_size=2)  # Create dataloader with random sampling

# Hyperparameters
learning_rate = 2e-5  # Set learning rate
batch_size = 16  # Set batch size
num_epochs = 20  # Set number of epochs

# Initialize the model
model_name = "bert-base-uncased"
model = MultiTaskBERT(model_name, num_classes_task1=3, num_classes_task2=3)  # Initialize model

# Separate parameters into two groups: BERT parameters and task-specific parameters
bert_parameters = [param for name, param in model.named_parameters() if 'bert' in name]  # Get BERT parameters
task_parameters = [param for name, param in model.named_parameters() if 'classifier' in name]  # Get task-specific parameters

# Set different learning rates for each parameter group
optimizer = torch.optim.AdamW([
    {'params': bert_parameters, 'lr': learning_rate},  # Lower learning rate for BERT parameters
    {'params': task_parameters, 'lr': learning_rate * 10}  # Higher learning rate for task-specific parameters
])

total_steps = len(dataloader) * num_epochs  # Total training steps
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)  # Scheduler

# Training loop
for epoch in range(num_epochs):
    epoch_loss = 0.0  # Initialize epoch loss

    for batch in dataloader:
        input_ids, attention_mask, labels_task1, labels_task2 = batch  # Get batch data

        optimizer.zero_grad()  # Zero the gradients
        logits_task1, logits_task2 = model(input_ids, attention_mask)  # Forward pass

        loss_task1 = nn.CrossEntropyLoss()(logits_task1, labels_task1)  # Compute task 1 loss
        loss_task2 = nn.CrossEntropyLoss()(logits_task2, labels_task2)  # Compute task 2 loss
        loss = loss_task1 + loss_task2  # Total loss

        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
        scheduler.step()  # Update learning rate

        epoch_loss += loss.item()  # Accumulate batch loss

    # Print epoch loss after each epoch
    print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")

# Evaluation and inference
model.eval()  # Set model to evaluation mode
test_sentences = [
    "The movie was incredible!", "Very worst direction", "Acting was just normal.","I regret watching it.", "Awesome screenplay","Not bad,but not good either"
]

with torch.no_grad():  # Disable gradient calculation
    for test_sentence in test_sentences:
        inputs = tokenizer(test_sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)  # Tokenize test sentence
        input_ids, attention_mask = inputs.input_ids, inputs.attention_mask

        logits_task1, logits_task2 = model(input_ids, attention_mask)  # Forward pass
        pred_task1 = torch.argmax(logits_task1, dim=1).item()  # Get prediction for task 1
        pred_task2 = torch.argmax(logits_task2, dim=1).item()  # Get prediction for task 2

        # Print predictions
        print(f"Review: {test_sentence}")
        print(f"Task A Prediction: {le_task1.inverse_transform([pred_task1])[0]}")
        print(f"Task B Prediction: {pred_task2}\n")


Epoch 1, Loss: 2.251194655895233
Epoch 2, Loss: 1.7250710725784302
Epoch 3, Loss: 1.1445838262637456
Epoch 4, Loss: 0.6295222267508507
Epoch 5, Loss: 0.40588313589493435
Epoch 6, Loss: 0.20736613931755224
Epoch 7, Loss: 0.1292317577948173
Epoch 8, Loss: 0.08285702702899773
Epoch 9, Loss: 0.06256348267197609
Epoch 10, Loss: 0.05375462335844835
Epoch 11, Loss: 0.05163325214137634
Epoch 12, Loss: 0.03851027771209677
Epoch 13, Loss: 0.04125263774767518
Epoch 14, Loss: 0.033931197909017406
Epoch 15, Loss: 0.030026532399157684
Epoch 16, Loss: 0.03016003609324495
Epoch 17, Loss: 0.032965324353426695
Epoch 18, Loss: 0.02989083342254162
Epoch 19, Loss: 0.028645328401277464
Epoch 20, Loss: 0.028364600303272407
Review: The movie was incredible!
Task A Prediction: positive
Task B Prediction: 2

Review: Very worst direction
Task A Prediction: negative
Task B Prediction: 0

Review: Acting was just normal.
Task A Prediction: neutral
Task B Prediction: 1

Review: I regret watching it.
Task A Predictio