- Importing Libraries

In [1]:
import pickle
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch.optim import Adam
from sklearn.metrics import f1_score

# Task 1
- Loading validation embeddings for task 1

In [2]:
# Load the embeddings from the validation set
val_embeddings_file = "val_embeddings.pkl"
val_embeddings_loaded = pickle.load(open(val_embeddings_file, "rb"))

# Load the labels from the validation set
task1_val_labels = "task1_labels_dev.pkl"
task1_val_labels_loaded = pickle.load(open(task1_val_labels, "rb"))

In [3]:
# Encode the labels by a fixed mapping
mapping = {
    "-1": 0,
    "sadness": 1,
    "joy": 2,
    "fear": 3,
    "anger": 4,
    "surprise": 5,
    "disgust": 6,
    "neutral": 7
}

# Convert the labels to integers
val_labels = [np.array([mapping[str(label)] for label in task1_val_labels_loaded[key]]) for key in task1_val_labels_loaded.keys()]

# Convert the embeddings to a list
val_embeddings = [val_embeddings_loaded[key] for key in val_embeddings_loaded.keys()]

### Dataset Class and Dataloader for Validation Set

In [4]:
class EmotionDataset(Dataset):
    def __init__(self, embeddings, labels):
        self.embeddings = embeddings  # List of embedding matrices
        self.labels = labels  # List of label arrays

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return torch.tensor(self.embeddings[idx]), torch.tensor(self.labels[idx], dtype=torch.long)
    
def collate_fn(batch):
    embeddings, labels = zip(*batch)
    embeddings_pad = torch.nn.utils.rnn.pad_sequence(embeddings, batch_first=True, padding_value=0)
    labels_pad = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=-1)  # Use -1 for padding
    return embeddings_pad, labels_pad

# Make val dataset and dataloader
val_dataset = EmotionDataset(val_embeddings, val_labels)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

## Model - M1 (Task 1)

In [5]:
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        output, _ = self.rnn(x)
        output = self.fc(output)
        return output
    
# Initialize the model
INPUT_SIZE = 384 # Dimension of the input embeddings
HIDDEN_SIZE = 128 # Dimension of the hidden state
OUTPUT_SIZE = 8 # Number of classes

MODEL_M1 = RNNModel(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)

# Load the model from the saved state
TASK_1_MODEL_PATH = "M1_Task1.pth"
MODEL_M1.load_state_dict(torch.load(TASK_1_MODEL_PATH))

<All keys matched successfully>

In [6]:
# Compute F1 metrics
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

# Predictions and labels should be flattened arrays
def compute_metrics(predictions, labels):
    mask = labels != -1  # Ignore padded labels
    masked_labels = labels[mask]
    masked_predictions = predictions[mask]
    weighted_f1 = f1_score(masked_labels, masked_predictions, average='weighted')
    macro_f1 = f1_score(masked_labels, masked_predictions, average='macro')

    # Calculate accuracy
    accuracy = accuracy_score(masked_labels, masked_predictions)
    return weighted_f1, macro_f1, accuracy

In [7]:
# Evaluate the model on the validation set
MODEL_M1.eval()
val_predictions = []
val_labels = []

with torch.no_grad():
    for embeddings, labels in val_dataloader:
        output = MODEL_M1(embeddings)
        
        # Flatten the output and labels
        output = output.view(-1, output.shape[-1])
        labels = labels.view(-1)

        # Get the predictions
        predictions = output.argmax(dim=-1)

        val_predictions.append(predictions.detach().cpu().numpy())
        val_labels.append(labels.detach().cpu().numpy())

val_predictions = np.concatenate(val_predictions)
val_labels = np.concatenate(val_labels)

weighted_f1_val, macro_f1_val, acc = compute_metrics(val_predictions, val_labels)

# Print the F1 scores
print("Validation Weighted F1:", weighted_f1_val)
print("Validation Macro F1:", macro_f1_val)
print("Validation Accuracy:", acc)

Validation Weighted F1: 0.8887216999599405
Validation Macro F1: 0.8618793949235899
Validation Accuracy: 0.8896750902527076


## Model - M2
- This is bidirectional Gated Recurrent Unit (GRU)

In [8]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size*2, output_size)

    def forward(self, x):
        output, _ = self.gru(x)
        output = self.fc(output)
        return output
    
# Initialize the model
INPUT_SIZE = 384 # Dimension of the input embeddings
HIDDEN_SIZE = 128 # Dimension of the hidden state
OUTPUT_SIZE = 8 # Number of classes

MODEL_M2 = GRUModel(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)

# Load the model from the saved state
TASK_2_MODEL_PATH = "M2_Task1.pth"
MODEL_M2.load_state_dict(torch.load(TASK_2_MODEL_PATH))

<All keys matched successfully>

In [9]:
# Evaluate the model on the validation set
MODEL_M2.eval()

val_predictions = []
val_labels = []

with torch.no_grad():
    for embeddings, labels in val_dataloader:
        output = MODEL_M2(embeddings)
        
        # Flatten the output and labels
        output = output.view(-1, output.shape[-1])
        labels = labels.view(-1)

        # Get the predictions
        predictions = output.argmax(dim=-1)

        val_predictions.append(predictions.detach().cpu().numpy())
        val_labels.append(labels.detach().cpu().numpy())

val_predictions = np.concatenate(val_predictions)
val_labels = np.concatenate(val_labels)

weighted_f1_val, macro_f1_val, acc = compute_metrics(val_predictions, val_labels)

# Print the F1 scores
print("Validation Weighted F1:", weighted_f1_val)
print("Validation Macro F1:", macro_f1_val)
print("Validation Accuracy:", acc)

Validation Weighted F1: 0.9526625915951877
Validation Macro F1: 0.9331200122847985
Validation Accuracy: 0.9527797833935018


# Task 2
- Emotion Flip Reasoning
- Loading validation labels for task 2

In [10]:
# Load the embeddings
val_embeddings_file = "val_embeddings.pkl"
val_embeddings_loaded = pickle.load(open(val_embeddings_file, "rb"))

# Load the labels for Task 2
task2_val_labels = "task2_labels_dev.pkl"
task2_val_labels_loaded = pickle.load(open(task2_val_labels, "rb"))

# Get all unique labels and handle null values
unique_labels = set()

for key in task2_val_labels_loaded.keys():
    # Check for None values and change them to 0
    for i in range(len(task2_val_labels_loaded[key])):
        if task2_val_labels_loaded[key][i] == None:
            task2_val_labels_loaded[key][i] = 0
    unique_labels.update(task2_val_labels_loaded[key])

print(unique_labels)

{0.0, 1.0}


In [11]:
# Convert embeddings to lists
val_embeddings = [val_embeddings_loaded[key] for key in val_embeddings_loaded.keys()]

# convert labels to lists
val_labels = [task2_val_labels_loaded[key] for key in task2_val_labels_loaded.keys()]

# Convert labels to integers
val_labels = [[int(label) for label in labels] for labels in val_labels]

- Dataset Class and Dataloader

In [12]:
import torch
from torch.utils.data import Dataset, DataLoader

class EmotionDataset(Dataset):
    def __init__(self, embeddings, labels):
        self.embeddings = embeddings  # List of embedding matrices
        self.labels = labels  # List of label arrays

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return torch.tensor(self.embeddings[idx]), torch.tensor(self.labels[idx], dtype=torch.long)
    
def collate_fn(batch):
    embeddings, labels = zip(*batch)
    embeddings_pad = torch.nn.utils.rnn.pad_sequence(embeddings, batch_first=True, padding_value=0)
    labels_pad = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=-1)  # Use -1 for padding
    return embeddings_pad, labels_pad

In [13]:
# Make val dataset and dataloader
val_dataset = EmotionDataset(val_embeddings, val_labels)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)

## Model - M3

In [14]:
import torch.nn as nn

class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        output, _ = self.rnn(x)
        output = self.fc(output)
        return output
    
# Initialize the model
INPUT_SIZE = 384 # Dimension of the input embeddings
HIDDEN_SIZE = 128 # Dimension of the hidden state
OUTPUT_SIZE = 2 # Number of classes

MODEL_M3 = RNNModel(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)

# Load the model from the saved state
TASK_3_MODEL_PATH = "M3_Task2.pth"
MODEL_M3.load_state_dict(torch.load(TASK_3_MODEL_PATH))

<All keys matched successfully>

In [15]:
# Compute F1 metrics for Task 2
from sklearn.metrics import f1_score

# Predictions and labels should be flattened arrays
def compute_metrics_task2(predictions, labels):
    # Ignore the padding value -1 
    # Also ignore where both the prediction and label are 0
    mask = (labels != -1) & ((labels != 0) | (predictions != 0))
    masked_labels = labels[mask]
    masked_predictions = predictions[mask]

    if len(masked_labels) == 0 or len(masked_predictions) == 0:
        #print("Warning: No valid data points were found after masking. Returning zero F1 scores.")
        return 0.0, 0.0
    
    weighted_f1 = f1_score(masked_labels, masked_predictions, average='weighted')
    macro_f1 = f1_score(masked_labels, masked_predictions, average='macro')
    f1 = f1_score(masked_labels, masked_predictions)
    accuracy = accuracy_score(masked_labels, masked_predictions)
    return weighted_f1, macro_f1, f1, accuracy

In [16]:
# Evaluate the model on the validation set
MODEL_M3.eval()

val_predictions = []
val_labels = []

with torch.no_grad():
    for embeddings, labels in val_dataloader:
        output = MODEL_M3(embeddings)
        
        # Flatten the output and labels
        output = output.view(-1, output.shape[-1])
        labels = labels.view(-1)

        # Get the predictions
        predictions = output.argmax(dim=-1)

        val_predictions.append(predictions.detach().cpu().numpy())
        val_labels.append(labels.detach().cpu().numpy())

val_predictions = np.concatenate(val_predictions)
val_labels = np.concatenate(val_labels)

weighted_f1_val, macro_f1_val, f1_val, acc = compute_metrics_task2(val_predictions, val_labels)

# Print the F1 scores
print("Validation Weighted F1:", weighted_f1_val)
print("Validation Macro F1:", macro_f1_val)
print("Validation F1:", f1_val)
print("Validation Accuracy:", acc)

Validation Weighted F1: 0.21824660103141114
Validation Macro F1: 0.12222222222222222
Validation F1: 0.24444444444444444
Validation Accuracy: 0.13924050632911392


## Model - M4
- Bi-directional GRU model for this task

In [17]:
import torch.nn as nn

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(GRUModel, self).__init__()
        self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size*2, output_size)

    def forward(self, x):
        output, _ = self.rnn(x)
        output = self.fc(output)
        return output
    
# Initialize the model
INPUT_SIZE = 384 # Dimension of the input embeddings
HIDDEN_SIZE = 128 # Dimension of the hidden state
OUTPUT_SIZE = 2 # Number of classes

MODEL_M4 = GRUModel(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)

# Load the model from the saved state
TASK_4_MODEL_PATH = "M4_Task2.pth"
MODEL_M4.load_state_dict(torch.load(TASK_4_MODEL_PATH))

<All keys matched successfully>

In [18]:
# Evaluate the model on the validation set
MODEL_M4.eval()

val_predictions = []
val_labels = []

with torch.no_grad():
    for embeddings, labels in val_dataloader:
        output = MODEL_M4(embeddings)
        
        # Flatten the output and labels
        output = output.view(-1, output.shape[-1])
        labels = labels.view(-1)

        # Get the predictions
        predictions = output.argmax(dim=-1)

        val_predictions.append(predictions.detach().cpu().numpy())
        val_labels.append(labels.detach().cpu().numpy())

val_predictions = np.concatenate(val_predictions)
val_labels = np.concatenate(val_labels)

weighted_f1_val, macro_f1_val, f1_val, acc = compute_metrics_task2(val_predictions, val_labels)

# Print the F1 scores
print("Validation Weighted F1:", weighted_f1_val)
print("Validation Macro F1:", macro_f1_val)
print("Validation F1:", f1_val)
print("Validation Accuracy:", acc)

Validation Weighted F1: 0.6308483224132813
Validation Macro F1: 0.36282722513089005
Validation F1: 0.7256544502617801
Validation Accuracy: 0.5694330320460148


### Testing Model M1 on a Test Set

- I am assuming the test set is in the same format as train and validation json files
- I will load the json file here and compute the sentence-bert embeddings
- Will then follow same methodology as above for computing F1 score over that

In [19]:
import json
from sentence_transformers import SentenceTransformer
import pickle

"""
Load the SentenceTransformer model for encoding the sentences
"""
model = SentenceTransformer('all-MiniLM-L6-v2')

 ### UNCOMMENT THESE TEST CODES TO PROCEED ON TEST DATA

In [20]:
# """
# Import the test json file
# - Enter file path here
# """
# TEST_PATH = "val.json"  ### ENTER HERE
# with open(TEST_PATH) as f:
#     test_data = json.load(f)

# # Store the embeddings in a dictionary
# test_embeddings_dict = {}
# test_labels_task1_dict = {}
# test_labels_task2_dict = {}
# i = 0

# for test_dict in test_data:
#     # Get the relevant information from the dictionary
#     episode_key = test_dict["episode"]
#     utterances = test_dict["utterances"]

#     # Get the emotion labels
#     task1_labels = test_dict["emotions"]
#     task2_labels = test_dict["triggers"]

#     # Generate embeddings for the utterances
#     embeddings = model.encode(utterances)
#     i+=1
#     # Print the episode key
#     print(i)

#     # Store the embedding
#     test_embeddings_dict[episode_key] = embeddings
#     test_labels_task1_dict[episode_key] = task1_labels
#     test_labels_task2_dict[episode_key] = task2_labels

### Task 1 - Test Set

In [21]:
# # Encode the labels by a fixed mapping
# mapping = {
#     "-1": 0,
#     "sadness": 1,
#     "joy": 2,
#     "fear": 3,
#     "anger": 4,
#     "surprise": 5,
#     "disgust": 6,
#     "neutral": 7
# }

# # Convert the labels to integers
# test_labels_task1 = [np.array([mapping[str(label)] for label in test_labels_task1_dict[key]]) for key in test_labels_task1_dict.keys()]

# # Convert the embeddings to a list
# test_embeddings = [test_embeddings_dict[key] for key in test_embeddings_dict.keys()]

In [22]:
# # Make test dataset and dataloader
# test_dataset_task1 = EmotionDataset(test_embeddings, test_labels_task1)
# test_dataloader_task1 = DataLoader(test_dataset_task1, batch_size=32, shuffle=False, collate_fn=collate_fn)

In [23]:
# # Evaluate Model M1 on the test set
# MODEL_M1.eval()

# test_predictions = []
# test_labels = []

# with torch.no_grad():
#     for embeddings, labels in test_dataloader_task1:
#         output = MODEL_M1(embeddings)
        
#         # Flatten the output and labels
#         output = output.view(-1, output.shape[-1])
#         labels = labels.view(-1)

#         # Get the predictions
#         predictions = output.argmax(dim=-1)

#         test_predictions.append(predictions.detach().cpu().numpy())
#         test_labels.append(labels.detach().cpu().numpy())

# test_predictions = np.concatenate(test_predictions)
# test_labels = np.concatenate(test_labels)

# weighted_f1_test, macro_f1_test = compute_metrics(test_predictions, test_labels)

# # Print the F1 scores
# print("Model - M1 on Test Set")
# print("Test Weighted F1:", weighted_f1_test)
# print("Test Macro F1:", macro_f1_test)

# print()
# # Evaluate Model M2 on the test set
# MODEL_M2.eval()

# test_predictions = []
# test_labels = []

# with torch.no_grad():
#     for embeddings, labels in test_dataloader_task1:
#         output = MODEL_M2(embeddings)
        
#         # Flatten the output and labels
#         output = output.view(-1, output.shape[-1])
#         labels = labels.view(-1)

#         # Get the predictions
#         predictions = output.argmax(dim=-1)

#         test_predictions.append(predictions.detach().cpu().numpy())
#         test_labels.append(labels.detach().cpu().numpy())

# test_predictions = np.concatenate(test_predictions)
# test_labels = np.concatenate(test_labels)

# weighted_f1_test, macro_f1_test = compute_metrics(test_predictions, test_labels)

# # Print the F1 scores
# print("Model - M2 on Test Set")
# print("Test Weighted F1:", weighted_f1_test)
# print("Test Macro F1:", macro_f1_test)

### Task 2 - Test Set

In [24]:
# # Get all unique labels and handle null values
# unique_labels = set()

# for key in test_labels_task2_dict.keys():
#     # Check for None values and change them to 0
#     for i in range(len(test_labels_task2_dict[key])):
#         if test_labels_task2_dict[key][i] == None:
#             test_labels_task2_dict[key][i] = 0
#     unique_labels.update(test_labels_task2_dict[key])

# print(unique_labels)

In [25]:
# # Convert embeddings to lists
# test_embeddings = [test_embeddings_dict[key] for key in test_embeddings_dict.keys()]

# # convert labels to lists
# test_labels = [test_labels_task2_dict[key] for key in test_labels_task2_dict.keys()]

# # Convert labels to integers
# test_labels = [[int(label) for label in labels] for labels in test_labels]

# # Make test dataset and dataloader
# test_dataset_task2 = EmotionDataset(test_embeddings, test_labels)
# test_dataloader_task2 = DataLoader(test_dataset_task2, batch_size=32, shuffle=False, collate_fn=collate_fn)

In [26]:
# # Evaluate Model M3 on the test set
# MODEL_M3.eval()

# test_predictions = []
# test_labels = []

# with torch.no_grad():
#     for embeddings, labels in test_dataloader_task2:
#         output = MODEL_M3(embeddings)
        
#         # Flatten the output and labels
#         output = output.view(-1, output.shape[-1])
#         labels = labels.view(-1)

#         # Get the predictions
#         predictions = output.argmax(dim=-1)

#         test_predictions.append(predictions.detach().cpu().numpy())
#         test_labels.append(labels.detach().cpu().numpy())

# test_predictions = np.concatenate(test_predictions)
# test_labels = np.concatenate(test_labels)

# weighted_f1_test, macro_f1_test, f1_test = compute_metrics_task2(test_predictions, test_labels)

# # Print the F1 scores
# print("Model - M3 on Test Set")
# print("Test Weighted F1:", weighted_f1_test)
# print("Test Macro F1:", macro_f1_test)
# print("Test F1:", f1_test)

# print()
# # Evaluate Model M4 on the test set
# MODEL_M4.eval()

# test_predictions = []
# test_labels = []

# with torch.no_grad():
#     for embeddings, labels in test_dataloader_task2:
#         output = MODEL_M4(embeddings)
        
#         # Flatten the output and labels
#         output = output.view(-1, output.shape[-1])
#         labels = labels.view(-1)

#         # Get the predictions
#         predictions = output.argmax(dim=-1)

#         test_predictions.append(predictions.detach().cpu().numpy())
#         test_labels.append(labels.detach().cpu().numpy())

# test_predictions = np.concatenate(test_predictions)
# test_labels = np.concatenate(test_labels)

# weighted_f1_test, macro_f1_test, f1_test = compute_metrics_task2(test_predictions, test_labels)

# # Print the F1 scores
# print("Model - M4 on Test Set")
# print("Test Weighted F1:", weighted_f1_test)
# print("Test Macro F1:", macro_f1_test)
# print("Test F1:", f1_test)