In [None]:
pip install torch transformers

 TASK 1 : Sentence Transformer Implementation

In [18]:
from transformers import BertModel, BertTokenizer
import torch
import torch.nn as nn
import torch.nn.functional as F

class SentenceTransformer(nn.Module):
    def __init__(self, model_name='bert-base-uncased', output_size=256):
        super(SentenceTransformer, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.output_size = output_size
        self.linear = nn.Linear(self.bert.config.hidden_size, self.output_size)

    def forward(self, input_sentences):
        input_ids = self.tokenizer(input_sentences, padding=True, truncation=True, return_tensors='pt')['input_ids']
        attention_mask = self.tokenizer(input_sentences, padding=True, truncation=True, return_tensors='pt')['attention_mask']

        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = F.normalize(pooled_output, p=2, dim=1)
        output = self.linear(pooled_output)
        return output

# Create an instance of SentenceTransformer model
model = SentenceTransformer()

# Test the model with sample sentences
sample_sentences = [
    "This is a sample sentence.",
    "I am testing the sentence transformer model.",
    "The quick brown fox jumps over the lazy dog."
]

# Get embeddings for sample sentences
embeddings = model(sample_sentences)

# Display the obtained embeddings
for i, embedding in enumerate(embeddings):
    print(f"Sentence: {sample_sentences[i]}")
    print(f"Embedding Size: {len(embedding)}")
    print(f"Embedding: {embedding}")
    print()


Sentence: This is a sample sentence.
Embedding Size: 256
Embedding: tensor([ 3.8649e-02, -3.6039e-02,  1.5646e-02,  3.7760e-04,  7.8217e-02,
         2.3166e-02,  2.4918e-02, -4.3088e-02, -8.5546e-03, -3.5421e-02,
         4.2934e-03,  3.2761e-02, -1.8365e-02,  1.2329e-02, -6.4766e-03,
         9.7012e-03,  3.3586e-02,  2.8880e-02,  1.8970e-02,  4.5649e-02,
        -1.7275e-03, -4.5884e-02,  6.2178e-02,  3.9118e-02,  4.3947e-04,
         3.0890e-02, -1.7390e-02,  2.7262e-03,  4.5604e-02, -3.4229e-02,
         2.1899e-03,  1.5095e-02, -5.1058e-02,  2.6418e-02,  2.1126e-03,
        -2.7099e-02, -3.5318e-02, -1.8752e-02, -8.0237e-03, -4.9100e-02,
        -3.2324e-02,  1.4430e-02, -1.7467e-02,  2.4817e-02, -4.5229e-02,
        -2.5471e-02,  6.9711e-03,  2.4945e-02, -2.0507e-02,  4.9558e-03,
         2.5778e-02,  2.7935e-02, -7.6811e-03,  3.9355e-02, -6.8624e-02,
         6.3510e-03, -2.5917e-02,  2.4518e-03,  2.4351e-02, -3.3620e-02,
         7.5653e-03, -7.1090e-02,  4.4897e-02, -1.4652e-

TASK 2 :- Multi-Task Learning Expansion

In [13]:
#DATA PREPROCESSING

import random
from sklearn.model_selection import train_test_split

def read_sentences_from_file(file_path):
    with open(file_path, 'r', errors='ignore') as file:
        sentences = file.readlines()
    return sentences

def parse_sentence_and_score(line):
    sentence, score = line.split('\t')
    return sentence.strip(), int(score.strip())

def extract_labeled_sentences(file_path):
    sentences_from_file = read_sentences_from_file(file_path)
    labeled_list = [parse_sentence_and_score(line) for line in sentences_from_file]
    return labeled_list

file1 = "yelp_labelled.txt"
file2 = "amazon_cells_labelled.txt"
file3 = "imdb_labelled.txt"

# Assuming file1 and file2 have the same format: sentence \t score \n
labeled_list1 = extract_labeled_sentences(file1)
labeled_list2 = extract_labeled_sentences(file2)
labeled_list3 = extract_labeled_sentences(file3)

combined_list = labeled_list1 + labeled_list2 + labeled_list3
random.shuffle(combined_list)

# Extract labels and sentences
labels = [label for _, label in combined_list][:1000]
sentences = [sentence for sentence, _ in combined_list][:1000]

sentences_train, sentences_test, labels_train, labels_test = train_test_split(sentences, labels, test_size=0.2, random_state=42)

print('labels length', len(labels))
print('sentences length', len(sentences))

print("List of labels:")
print(labels)
print("\nList of corresponding sentences:")
print(sentences)


labels length 1000
sentences length 1000
List of labels:
[0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 

In [14]:
#DEFINING THE MULTITASK SENTENCE TRANSFORMER

from transformers import BertModel, BertTokenizer
import torch
import torch.nn as nn
import torch.nn.functional as F

class MultiTaskSentenceTransformer(nn.Module):
    def __init__(self, model_name='bert-base-uncased'):
        super(MultiTaskSentenceTransformer, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.shared_linear = nn.Linear(self.bert.config.hidden_size, 256)

        # Task A: Sentence Classification
        self.classification_head = nn.Linear(256, 2)  # Assuming 2 classes: Positive, Negative

        # Task B: Sentiment Analysis
        self.sentiment_head = nn.Linear(256, 1)  # Regression head for sentiment score

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs[1]  # CLS token output
        shared_embedding = self.shared_linear(cls_output)
        shared_embedding = F.normalize(shared_embedding, p=2, dim=1)

        # Task A: Classification
        logits = self.classification_head(shared_embedding)

        # Task B: Sentiment Analysis
        # sentiment_score = self.sentiment_head(shared_embedding)
        sentiment_score = torch.sigmoid(self.sentiment_head(shared_embedding))

        return logits, sentiment_score

    def encode(self, sentences):
        inputs = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        with torch.no_grad():
            shared_embedding = self.forward(input_ids, attention_mask)[0]  # Only take shared_embedding
        return shared_embedding

    def predict(self, sentences):
        inputs = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']

        logits, sentiment_scores = self.forward(input_ids, attention_mask)

        # For classification, apply softmax to get probabilities and then argmax for the predicted label
        probs = F.softmax(logits, dim=1)
        predicted_labels = torch.argmax(probs, dim=1)

        return predicted_labels, sentiment_scores.squeeze()

model = MultiTaskSentenceTransformer()


In [15]:
#SETTING UP THE TRAINING LOOP

from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW
import torch.optim as optim

inputs = model.tokenizer(sentences_train, padding=True, truncation=True, return_tensors='pt')
train_dataset = TensorDataset(
    inputs['input_ids'], inputs['attention_mask'],
    torch.tensor(labels_train, dtype=torch.long)
)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

# Define optimizer
optimizer = optim.AdamW(model.parameters(), lr=1e-5)

# Define loss functions
classification_loss_fn = nn.CrossEntropyLoss()
# sentiment_loss_fn = nn.MSELoss()
sentiment_loss_fn = nn.BCELoss()

# Training loop
num_epochs = 3
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch in train_dataloader:
        input_ids, attention_mask, labels_classification = batch
        optimizer.zero_grad()

        logits, sentiment_scores = model(input_ids, attention_mask)

        classification_loss = classification_loss_fn(logits, labels_classification)
        sentiment_loss = sentiment_loss_fn(sentiment_scores.squeeze(), labels_classification.float())

        loss = classification_loss + sentiment_loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_dataloader)}")


Epoch 1/3, Loss: 1.3074562561511993
Epoch 2/3, Loss: 1.058390587568283
Epoch 3/3, Loss: 0.9937888383865356


In [4]:
# Sample sentences for testing
from sklearn.metrics import precision_score, recall_score, f1_score

model.eval()
with torch.no_grad():
    predicted_labels, sentiment_scores = model.predict(sentences_test)

correct_predictions = (predicted_labels == torch.tensor(labels_test)).sum().item()
accuracy = correct_predictions / len(labels_test)

precision = precision_score(labels_test, predicted_labels, average='binary')
recall = recall_score(labels_test, predicted_labels, average='binary')
f1 = f1_score(labels_test, predicted_labels, average='binary')

label_names = ["Negative", "Positive"]
print("Predicted Labels and Sentiment Scores for 50 Sentences:")
for i in range(50):
    print(f"Sentence: {sentences_test[i]}")
    print(f"Predicted Label: {label_names[predicted_labels[i]]}")
    print(f"Sentiment Score: {sentiment_scores[i]:.4f}")
    print()

print(f"Classification Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Predicted Labels and Sentiment Scores for 50 Sentences:
Sentence: Best Buffet in town, for the price you cannot beat it.
Predicted Label: Positive
Sentiment Score: 0.5888

Sentence: I came out of it feeling angry.
Predicted Label: Negative
Sentiment Score: 0.4058

Sentence: Just don't know why they were so slow.
Predicted Label: Negative
Sentiment Score: 0.4059

Sentence: Cinematography: The film was shot in an interesting way.
Predicted Label: Positive
Sentiment Score: 0.6056

Sentence: It has kept up very well.
Predicted Label: Positive
Sentiment Score: 0.6056

Sentence: This place is overpriced, not consistent with their boba, and it really is OVERPRICED!
Predicted Label: Negative
Sentiment Score: 0.4064

Sentence: It's an empty, hollow shell of a movie.
Predicted Label: Negative
Sentiment Score: 0.4061

Sentence: The servers are not pleasant to deal with and they don't always honor Pizza Hut coupons.
Predicted Label: Negative
Sentiment Score: 0.4060

Sentence: Good value, works fin

Classification Accuracy: 91.00%
Precision: 0.9192
Recall: 0.9010
F1 Score: 0.9100

Epoch 1/3, Loss: 1.2992243707180022
Epoch 2/3, Loss: 1.058097721338272
Epoch 3/3, Loss: 0.9869545352458954

1000 instances

In [9]:
#STORING OUTPUTS IN A FILE

output_file = "predicted_labels_and_scores.txt"

with open(output_file, 'w') as f:
    f.write("Predicted Labels and Sentiment Scores:\n")
    for i, sentence in enumerate(sentences_test):
        f.write(f"Sentence: {sentence}\n")
        f.write(f"Predicted Label: {label_names[predicted_labels[i]]}\n")
        f.write(f"Sentiment Score: {sentiment_scores[i]:.4f}\n")
        f.write("\n")
    f.write(f"Classification Accuracy: {accuracy * 100:.2f}%\n")
    f.write(f"Precision: {precision:.4f}\n")
    f.write(f"Recall: {recall:.4f}\n")
    f.write(f"F1 Score: {f1:.4f}\n")

TRANSFER LEARNING

In [None]:
model = MultiTaskSentenceTransformer()

# Freeze the transformer backbone and Unfreeze the shared linear layer and task-specific heads
model = MultiTaskSentenceTransformer()

for param in model.bert.parameters():
    param.requires_grad = False
for param in model.shared_linear.parameters():
    param.requires_grad = True
for param in model.classification_head.parameters():
    param.requires_grad = True # False - to Freeze the classification head (Task A)
for param in model.sentiment_head.parameters():
    param.requires_grad = True # False - to Freeze the sentiment head (Task B)

# Define optimizer only for the unfrozen parameters
optimizer = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-5
)

Layer-wise Learning Rate Implementation

In [5]:
#RUN THE DATA PREPROCESSING AND DEFINING THE MULTITASK SENTENCE TRANSFORMER BLOCK

#SETTING UP THE TRAINING LOOP with Layer-wise Learning Rate Implementation

from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW
import torch.optim as optim

inputs = model.tokenizer(sentences_train, padding=True, truncation=True, return_tensors='pt')
train_dataset = TensorDataset(
    inputs['input_ids'], inputs['attention_mask'],
    torch.tensor(labels_train, dtype=torch.long)
)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)

# Define optimizer with LAYER-WISE LEARNING RATE IMPLEMENTATION
# optimizer = optim.AdamW(model.parameters(), lr=1e-5)
optimizer = optim.AdamW([
    {'params': model.bert.parameters(), 'lr': 1e-5},  # Lower learning rate for BERT layers
    {'params': model.shared_linear.parameters(), 'lr': 2e-5},  # Higher learning rate for the shared linear layer
    {'params': model.classification_head.parameters(), 'lr': 2e-5},  # Higher learning rate for classification head
    {'params': model.sentiment_head.parameters(), 'lr': 2e-5}  # Higher learning rate for sentiment head
])

# Define loss functions
classification_loss_fn = nn.CrossEntropyLoss()
# sentiment_loss_fn = nn.MSELoss()
sentiment_loss_fn = nn.BCELoss()

# Training loop
num_epochs = 3
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch in train_dataloader:
        input_ids, attention_mask, labels_classification = batch
        optimizer.zero_grad()

        logits, sentiment_scores = model(input_ids, attention_mask)

        classification_loss = classification_loss_fn(logits, labels_classification)
        sentiment_loss = sentiment_loss_fn(sentiment_scores.squeeze(), labels_classification.float())

        loss = classification_loss + sentiment_loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_dataloader)}")


Epoch 1/3, Loss: 0.9483633637428284
Epoch 2/3, Loss: 0.9125769972801209
Epoch 3/3, Loss: 0.8977178597450256


In [7]:
from sklearn.metrics import precision_score, recall_score, f1_score

model.eval()
with torch.no_grad():
    predicted_labels, sentiment_scores = model.predict(sentences_test)

correct_predictions = (predicted_labels == torch.tensor(labels_test)).sum().item()
accuracy = correct_predictions / len(labels_test)

precision = precision_score(labels_test, predicted_labels, average='binary')
recall = recall_score(labels_test, predicted_labels, average='binary')
f1 = f1_score(labels_test, predicted_labels, average='binary')

# Print the outputs
# label_names = ["Negative", "Positive"]
# print("Predicted Labels and Sentiment Scores:")
# for i, sentence in enumerate(sentences_test):
#     print(f"Sentence: {sentence}")
#     print(f"Predicted Label: {label_names[predicted_labels[i].item()]}")
#     print(f"Sentiment Score: {sentiment_scores[i].item()}")
#     print()

label_names = ["Negative", "Positive"]
print("Predicted Labels and Sentiment Scores for 50 Sentences:")
for i in range(50):
    print(f"Sentence: {sentences_test[i]}")
    print(f"Predicted Label: {label_names[predicted_labels[i]]}")
    print(f"Sentiment Score: {sentiment_scores[i]:.4f}")
    print()

print(f"Classification Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Predicted Labels and Sentiment Scores for 50 Sentences:
Sentence: Best Buffet in town, for the price you cannot beat it.
Predicted Label: Positive
Sentiment Score: 0.6276

Sentence: I came out of it feeling angry.
Predicted Label: Negative
Sentiment Score: 0.3830

Sentence: Just don't know why they were so slow.
Predicted Label: Negative
Sentiment Score: 0.3827

Sentence: Cinematography: The film was shot in an interesting way.
Predicted Label: Positive
Sentiment Score: 0.6277

Sentence: It has kept up very well.
Predicted Label: Positive
Sentiment Score: 0.6276

Sentence: This place is overpriced, not consistent with their boba, and it really is OVERPRICED!
Predicted Label: Negative
Sentiment Score: 0.3833

Sentence: It's an empty, hollow shell of a movie.
Predicted Label: Negative
Sentiment Score: 0.3832

Sentence: The servers are not pleasant to deal with and they don't always honor Pizza Hut coupons.
Predicted Label: Negative
Sentiment Score: 0.3833

Sentence: Good value, works fin

Classification Accuracy: 91.50%
Precision: 0.8962
Recall: 0.9406
F1 Score: 0.9179

Epoch 1/3, Loss: 0.9483633637428284
Epoch 2/3, Loss: 0.9125769972801209
Epoch 3/3, Loss: 0.8977178597450256