In [11]:
import torch
import torch.nn as nn

# Specify the path to the .pth file
model_path = "M2_Task1.pth"

# Initialize the model
INPUT_SIZE = 384 # Dimension of the input embeddings
HIDDEN_SIZE = 128 # Dimension of the hidden state
OUTPUT_SIZE = 8 # Number of classes

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size*2, output_size)

    def forward(self, x):
        output, _ = self.gru(x)
        output = self.fc(output)
        return output
    
# Load the saved model
model = GRUModel(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
model.load_state_dict(torch.load(model_path))
model.eval()

GRUModel(
  (gru): GRU(384, 128, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=256, out_features=8, bias=True)
)

In [12]:
import pickle
# Load these embeddings from the pickle files
val_embeddings_file = "val_embeddings.pkl"
val_embeddings_loaded = pickle.load(open(val_embeddings_file, "rb"))

In [13]:
val_labels_file = "task1_labels_dev.pkl"
val_labels_loaded = pickle.load(open(val_labels_file, "rb"))

In [14]:
val_speaker_ids_file = "speakers_dev.pkl"
val_speaker_ids_loaded = pickle.load(open(val_speaker_ids_file, "rb"))

In [15]:
# Define a function to predict emotions for a given set of embeddings
def predict_emotions(embeddings):
    predictions = []
    with torch.no_grad():
        for embedding in embeddings:
            embedding_tensor = torch.tensor(embedding)
            output = model(embedding_tensor.unsqueeze(0))
            predicted_label = output.argmax().item()
            predictions.append(predicted_label)
    return predictions

In [16]:
# Get all unique speakers
unique_speakers = set()
for episode_id in val_speaker_ids_loaded:
    unique_speakers.update(val_speaker_ids_loaded[episode_id])

print(unique_speakers)
# Create separate dictionaries for each speaker to store predicted emotions
speaker_predictions_dict = {speaker: {} for speaker in unique_speakers}

{'Carl', 'Both', 'Tom', 'Kim', 'Mr. Geller', 'Kate', 'Stage Manager', 'Earl', 'Mrs. Tedlock', 'Frank', 'Dr. Long', 'Megan', 'Young Ethan', 'Phoebe Sr', 'All', 'Joey', 'Elizabeth', 'Charlton Heston', 'Emily', 'Mike', 'Professore Clerk', 'Janine', 'Charlie', 'Wayne', 'The Director', 'Richard', 'an', 'Kyle', 'Joey and Chandler', 'Alice', 'A Student', 'Julio', 'Susan', 'Julie', 'Phoebe', 'Mrs. Geller', 'The Casting Director', 'Leslie', 'Dr. Drake Remoray', 'Dana', 'Stranger', 'Whitney', 'Stage Director', 'Phoebe Sr.', 'Tag', 'Jill', 'Duncan', 'Ben', 'Jester', 'Joey/Drake', 'Guy #1', 'The Woman', 'Mona', "Joey's Hand Twin", "Mona's Date", 'Kristen', 'Dr. Oberman', 'Boy in the Cape', 'Joshua', 'Ms. McKenna', 'Fireman No. 3', 'Jane', 'Rachel', 'Allesandro', 'Chloe', 'The Guys', 'Paul', 'Nurse', 'Mrs. Green', 'Mark', 'Stanley', 'Nancy', 'Mischa', 'Kristin', 'Pete', 'Mr. Heckles', 'Bobby', 'The Vendor', 'The Singing Man', 'Dina', 'Issac', 'The Museum Official', 'Roger', 'Krista', 'Message', 'Mo

In [17]:
# Predict emotions for each utterance and associate with each speaker
for episode_id, embeddings in val_embeddings_loaded.items():
    predictions = predict_emotions(embeddings)
    speakers = val_speaker_ids_loaded[episode_id]
    for speaker in speakers:
        # Filter predictions for the current speaker
        speaker_predictions = [pred for pred, spk in zip(predictions, val_speaker_ids_loaded[episode_id]) if spk == speaker]
        speaker_predictions_dict[speaker][episode_id] = speaker_predictions

# for speaker in speaker_predictions_dict:
#     print(f"Speaker: {speaker}")
#     for episode_id, predictions in speaker_predictions_dict[speaker].items():
#         print(f"Episode: {episode_id}, Predictions: {predictions}")
#     print()


In [18]:
from sklearn.metrics import f1_score

# Encode the labels by a fixed mapping
mapping = {
    "-1": 0,
    "sadness": 1,
    "joy": 2,
    "fear": 3,
    "anger": 4,
    "surprise": 5,
    "disgust": 6,
    "neutral": 7
}

# Function to detect emotion flips for a given sequence of emotions
def detect_emotion_flips(predicted_emotions, true_emotions):
    valid_flips = []
    invalid_flips = []
    for i in range(1, len(predicted_emotions)):
        if predicted_emotions[i] != predicted_emotions[i - 1]:
            if predicted_emotions[i-1] == true_emotions[i-1] and predicted_emotions[i] == true_emotions[i]:
                valid_flips.append((i - 1, i))  # Store the indices of the flip
            else:
                invalid_flips.append((i - 1, i))
            
    return valid_flips, invalid_flips

In [19]:
def totalnum_flips(true_emotions):
    total_flips = 0
    for i in range(1, len(true_emotions)):
        if true_emotions[i] != true_emotions[i - 1]:
            total_flips += 1
    return total_flips
    

In [23]:
import numpy as np

# Calculate emotion flips and metrics for each speaker
speaker_metrics = {}
final=0
final_correct=0

for speaker in unique_speakers:
    valid_flips = []
    invalid_flips = []
    true_labels = []
    predicted_labels = []
    total_flips = 0

    
    for episode_id, true_emotions in val_labels_loaded.items():
        if speaker in val_speaker_ids_loaded[episode_id]:
            predicted_emotions = speaker_predictions_dict[speaker][episode_id]
            #convert the emotions to the encoded values
            true_emotions = [mapping[emotion] for emotion, spk in zip(true_emotions, val_speaker_ids_loaded[episode_id]) if spk == speaker]
            valid, invalid = detect_emotion_flips(predicted_emotions, true_emotions)
            total_flips += totalnum_flips(true_emotions)
            valid_flips.extend(valid)
            invalid_flips.extend(invalid)
            true_labels.extend(true_emotions)
            predicted_labels.extend(predicted_emotions)
    
    # Calculate accuracy of valid flips
    correct_flips = len(valid_flips)
    final+=total_flips
    final_correct+=correct_flips
    accuracy = correct_flips / total_flips if total_flips > 0 else 0
    
    # Calculate macro F1-score
    macro_f1 = f1_score(true_labels, predicted_labels, average='macro')
    
    speaker_metrics[speaker] = {'accuracy': accuracy, 'macro_f1': macro_f1}

# print("Speaker-wise Metrics:")
# for speaker, metrics in speaker_metrics.items():
#     print(f"Speaker: {speaker}, Accuracy: {metrics['accuracy']}, Macro F1-score: {metrics['macro_f1']}")

# print(final_correct)
# Calculate overall accuracy and macro F1-score
overall_accuracy = final_correct / final
overall_macro_f1 = np.mean([metrics['macro_f1'] for metrics in speaker_metrics.values()])

print(f"Overall Accuracy: {overall_accuracy}")
# print(f"Overall Macro F1-score: {overall_macro_f1}")


Overall Accuracy: 0.2909441233140655
