In [20]:
pip install cv

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [21]:
import os
import cv
import imageio
import torch
import openai
import json
import numpy as np
import pandas as pd
from PIL import Image
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision.models.vision_transformer import vit_b_16, ViT_B_16_Weights

In [22]:
OPENAI_API_KEY = "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"
openai.api_key = OPENAI_API_KEY
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [23]:
# Emotion classes
emotion_labels = ["Anger", "Disgust", "Fear", "Happiness", "Neutral", "Sadness"]

# Initialize a pre-trained Vision Transformer model
class EmotionRecognitionViT(nn.Module):
    def __init__(self, num_classes=6):
        super(EmotionRecognitionViT, self).__init__()
        # Load the pre-trained Vision Transformer
        self.vit = vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_V1)
        
        # Replace the classifier head with one suitable for our task
        in_features = self.vit.heads[0].in_features  # Access the input features of the last layer
        self.vit.heads = nn.Sequential(nn.Linear(in_features, num_classes))

    def forward(self, x):
        return self.vit(x)

# Initialize the ViT model
model = EmotionRecognitionViT(num_classes=6)

In [None]:
model_path = "/home/jecroisp/Thesis/FerLLMs-Emotional-Transition-Detection-in-GIFs/p_crema/emotion_recognition_vit.pth"
model = EmotionRecognitionViT(num_classes=6)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

# Transforms for frames
frame_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

  model.load_state_dict(torch.load(model_path, map_location=device))


In [25]:
def load_gif_as_tensor(gif_path):
    """
    Loads all frames from a GIF, applies frame_transforms, 
    and returns a stacked tensor of shape (num_frames, 3, 224, 224).
    """
    gif_frames = imageio.mimread(gif_path)
    processed_frames = []
    for frame in gif_frames:
        pil_img = Image.fromarray(frame).convert("RGB")
        processed_frames.append(frame_transforms(pil_img))
    if len(processed_frames) == 0:
        return None
    return torch.stack(processed_frames)


In [26]:
def predict_emotions_for_gif(model, gif_tensor):
    """
    Given a stacked tensor of GIF frames, compute the average 
    softmax probability across frames and return a dict with scores.
    """
    if gif_tensor is None:
        return None

    gif_tensor = gif_tensor.to(device)
    with torch.no_grad():
        outputs = model(gif_tensor)
        probs = F.softmax(outputs, dim=1)  # shape: (num_frames, 6)
        avg_probs = probs.mean(dim=0).cpu().numpy()  # shape: (6,)
    
    predicted_idx = np.argmax(avg_probs)
    predicted_label = emotion_labels[predicted_idx]
    predicted_confidence = avg_probs[predicted_idx]
    
    # Construct a dictionary of all scores
    scores_dict = {
        "Predicted_Emotion": predicted_label,
        "Anger_Score": float(avg_probs[0]),
        "Disgust_Score": float(avg_probs[1]),
        "Fear_Score": float(avg_probs[2]),
        "Happiness_Score": float(avg_probs[3]),
        "Neutral_Score": float(avg_probs[4]),
        "Sadness_Score": float(avg_probs[5]),
        "Max_Confidence": float(predicted_confidence)
    }
    return scores_dict


In [27]:
import pandas as pd

# List of prompt strings
prompts = [
    "Describe the emotional transition in the given scene with clarity, focusing solely on inferences drawn from the model's confidence scores. Interpret subtle differences between scores to determine when and how the subject's emotion shifts, using specific language (e.g., 'subject transitions from excitement to sadness') without mentioning direct numerical values or static labels. Keep the description concise (under 15 words) and ensure it captures the temporal progression of the scene.",
    "Using only the variations in the confidence scores provided, succinctly articulate the subject's emotional shift over time. Capture the change from one dominant emotion to another using precise, dynamic verbs (e.g., 'morphs from surprise to calm') without quoting numbers. Limit your response to under 15 words.",
    "Based solely on the model’s confidence score fluctuations, craft a brief narrative describing how the subject’s emotion evolves throughout the scene. Use specific phrases (e.g., 'switches from joy to apprehension') and avoid direct numerical references, keeping your description under 15 words.",
    "Analyze the temporal trends in the emotion confidence scores to derive a succinct, natural language depiction of the subject's emotional transition. Employ clear descriptors (e.g., 'changes from elation to concern') without referencing exact figures, and keep it under 15 words.",
    "Interpret the progression of emotion in the scene exclusively from the provided confidence scores. Deliver a concise (under 15 words) depiction of the shift using specific language (e.g., 'evolves from optimism to melancholy'), avoiding any direct numerical or static label mentions."
]

# Create a separate DataFrame for each prompt with a simple label
prompt_dfs = {}
for idx, prompt in enumerate(prompts, start=1):
    df = pd.DataFrame({
        "Prompt_Label": [f"Prompt {idx}"],
        "Prompt_Text": [prompt]
    })
    prompt_dfs[f"Prompt_{idx}"] = df

# Optionally, combine all individual DataFrames into one master DataFrame
master_df = pd.concat(prompt_dfs.values(), ignore_index=True)

# Display the master DataFrame
print(master_df)


  Prompt_Label                                        Prompt_Text
0     Prompt 1  Describe the emotional transition in the given...
1     Prompt 2  Using only the variations in the confidence sc...
2     Prompt 3  Based solely on the model’s confidence score f...
3     Prompt 4  Analyze the temporal trends in the emotion con...
4     Prompt 5  Interpret the progression of emotion in the sc...


In [36]:
from openai import OpenAI

In [37]:
def generate_caption_from_scores(emotion_scores, prompt_text):
    """
    Prompt GPT-3.5 to generate a concise caption describing the emotional transition.
    
    Parameters:
      emotion_scores (dict): Dictionary containing emotion scores.
      prompt_text (str): The variable prompt text (one of your 5 prompts).
      
    Returns:
      str: GPT-3.5 generated caption.
    """
    # Construct a textual summary of the scores to pass as context.
    scores_text = ", ".join([f"{emo}: {emotion_scores[f'{emo}_Score']:.2f}" 
                             for emo in emotion_labels])
    
    # Constant context remains unchanged.
    context = (
        "Key Excerpts from the Paper:\n"
        "1. Emotional flexibility is defined as the ability to adapt or regulate emotional responses based on context.\n"
        "2. It involves suppressing or expressing emotions according to changing situations.\n"
        "3. The concept includes core components such as context-sensitivity, behavioral repertoire, and responsiveness to feedback.\n"
        "4. The dynamic nature of emotional responses is crucial, indicating transitions over time rather than static states."
    )
    
    # Combine the constant context with the variable prompt.
    full_prompt = (
        f"Below are some key excerpts from a research paper on emotional flexibility:\n{context}\n\n"
        f"Using the following emotion scores: {scores_text}\n"
        f"{prompt_text}"
    )
    
    client = OpenAI(api_key = OPENAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant focusing on emotion transitions."},
            {"role": "user", "content": full_prompt},
        ],
        max_tokens=50,
        temperature=0.7
    )
    
    return response.choices[0].message.content.strip()



In [43]:
import os
import pandas as pd

# ---------------------------
# 1. Load CSV and Prepare df_subset
# ---------------------------
csv_file = "/home/jecroisp/Thesis/data/processed_data/p_crema/GIF_Annotations.csv"
gif_folder = "/home/jecroisp/Thesis/data/processed_data/p_crema/CremaGifs"
df = pd.read_csv(csv_file)

# Create a new DataFrame with only the 'fileName' column.
df_subset = df[['fileName']].copy()

# Create the 'GIF_Path' column for processing.
df_subset['GIF_Path'] = df_subset['fileName'].apply(lambda x: os.path.join(gif_folder, f"{x}.gif"))

# ---------------------------
# 2. Initialize New Columns for Emotion Scores and GPT Captions
# ---------------------------
new_columns = [
    'Predicted_Emotion', 'Anger_Score', 'Disgust_Score', 'Fear_Score',
    'Happiness_Score', 'Neutral_Score', 'Sadness_Score', 'Max_Confidence'
]
# Add these columns with numerical default 0.0
for col in new_columns:
    df_subset[col] = 0.0

# We'll have five separate GPT caption columns.
for i in range(1, 6):
    df_subset[f'GPT_Caption_{i}'] = ""

# ---------------------------
# 3. Define Prompt Strings and Limit Processing Subset
# ---------------------------
prompts = [
    "Describe the emotional transition in the given scene with clarity, focusing solely on inferences drawn from the model's confidence scores. Interpret subtle differences between scores to determine when and how the subject's emotion shifts, using specific language (e.g., 'subject transitions from excitement to sadness') without mentioning direct numerical values or static labels. Keep the description concise (under 15 words) and ensure it captures the temporal progression of the scene.",
    "Using only the variations in the confidence scores provided, succinctly articulate the subject's emotional shift over time. Capture the change from one dominant emotion to another using precise, dynamic verbs (e.g., 'morphs from surprise to calm') without quoting numbers. Limit your response to under 15 words.",
    "Based solely on the model’s confidence score fluctuations, craft a brief narrative describing how the subject’s emotion evolves throughout the scene. Use specific phrases (e.g., 'switches from joy to apprehension') and avoid direct numerical references, keeping your description under 15 words.",
    "Analyze the temporal trends in the emotion confidence scores to derive a succinct, natural language depiction of the subject's emotional transition. Employ clear descriptors (e.g., 'changes from elation to concern') without referencing exact figures, and keep it under 15 words.",
    "Interpret the progression of emotion in the scene exclusively from the provided confidence scores. Deliver a concise (under 15 words) depiction of the shift using specific language (e.g., 'evolves from optimism to melancholy'), avoiding any direct numerical or static label mentions."
]

# Process only a subset (e.g., first 1125 GIFs)
n_gifs = 1125
df_subset = df_subset.iloc[:n_gifs].copy()

# ---------------------------
# 4. Process Each GIF: Predict Scores and Generate Captions for All Prompts
# ---------------------------
for idx in range(len(df_subset)):
    gif_path = df_subset.loc[idx, 'GIF_Path']
    
    # Load GIF frames as a tensor
    gif_tensor = load_gif_as_tensor(gif_path)
    if gif_tensor is None:
        for i in range(1, 6):
            df_subset.loc[idx, f'GPT_Caption_{i}'] = "Error reading GIF."
        continue
    
    # Predict emotion scores using the FER-VIT model.
    scores_dict = predict_emotions_for_gif(model, gif_tensor)
    if scores_dict is None:
        for i in range(1, 6):
            df_subset.loc[idx, f'GPT_Caption_{i}'] = "Prediction failed."
        continue
    
    # Update the DataFrame with the predicted scores.
    df_subset.loc[idx, 'Predicted_Emotion'] = scores_dict['Predicted_Emotion']
    df_subset.loc[idx, 'Anger_Score']     = scores_dict['Anger_Score']
    df_subset.loc[idx, 'Disgust_Score']    = scores_dict['Disgust_Score']
    df_subset.loc[idx, 'Fear_Score']       = scores_dict['Fear_Score']
    df_subset.loc[idx, 'Happiness_Score']  = scores_dict['Happiness_Score']
    df_subset.loc[idx, 'Neutral_Score']    = scores_dict['Neutral_Score']
    df_subset.loc[idx, 'Sadness_Score']    = scores_dict['Sadness_Score']
    df_subset.loc[idx, 'Max_Confidence']   = scores_dict['Max_Confidence']
    
    # For each prompt, generate a caption and store it in a separate column.
    for i, prompt in enumerate(prompts, start=1):
        caption = generate_caption_from_scores(scores_dict, prompt)
        df_subset.loc[idx, f'GPT_Caption_{i}'] = caption

# ---------------------------
# 5. Create Final DataFrame with Only 'fileName' and New Columns
# ---------------------------
final_columns = ['fileName'] + new_columns + [f'GPT_Caption_{i}' for i in range(1, 6)]
df_final = df_subset[final_columns].copy()

# Save the final DataFrame to a CSV file.
output_csv = "/home/jecroisp/Thesis/data/processed_data/p_crema/final_results_subset.csv"
df_final.to_csv(output_csv, index=False)
print(f"Results saved to {output_csv}")

# ---------------------------
# 6. Create a Separate Long-Format DataFrame for Prompts and Captions
# ---------------------------
# This DataFrame will have one row per GIF per prompt, with columns: fileName, Prompt_Text, GPT_Caption.
rows = []
for idx, row in df_final.iterrows():
    file_name = row['fileName']
    for i, prompt in enumerate(prompts, start=1):
        rows.append({
            "fileName": file_name,
            "Prompt_Text": prompt,
            "GPT_Caption": row[f'GPT_Caption_{i}']
        })

df_long = pd.DataFrame(rows)

# Save the long-format DataFrame to a CSV file.
output_long_csv = "/home/jecroisp/Thesis/data/processed_data/p_crema/final_results_long.csv"
df_long.to_csv(output_long_csv, index=False)
print(f"Long-format results saved to {output_long_csv}")



  df_subset.loc[idx, 'Predicted_Emotion'] = scores_dict['Predicted_Emotion']


Results saved to /home/jecroisp/Thesis/data/processed_data/p_crema/final_results_subset.csv
Long-format results saved to /home/jecroisp/Thesis/data/processed_data/p_crema/final_results_long.csv


In [45]:
df_final.head(1)

Unnamed: 0,fileName,Predicted_Emotion,Anger_Score,Disgust_Score,Fear_Score,Happiness_Score,Neutral_Score,Sadness_Score,Max_Confidence,GPT_Caption_1,GPT_Caption_2,GPT_Caption_3,GPT_Caption_4,GPT_Caption_5
0,1001_IEO_NEU_XX,Neutral,0.006635,0.002801,0.012297,0.015548,0.96109,0.001629,0.96109,Subject transitions from neutral to a hint of ...,"Shifts from neutrality to slight happiness, in...",The subject experiences shifts in emotional re...,The emotional journey shifts subtly from neutr...,"Transitioning from calm neutrality, a subtle h..."


In [55]:
for _ in range(1,6):
    print('GIF: ' + df_final['fileName'].iloc[i] + '\n')
    print('Predicted_Emotion: ' + df_final['Predicted_Emotion'].iloc[i] + '\n')
    print('Max_Confidence: ' + df_final['Max_Confidence'].iloc[i].astype(str) + '\n')
    print('Anger_Score: ' + df_final['Anger_Score'].iloc[i].astype(str) + '\n')
    print('Disgust_Score: ' + df_final['Disgust_Score'].iloc[i].astype(str) + '\n')
    print('Fear_Score: ' + df_final['Fear_Score'].iloc[i].astype(str) + '\n')
    print('Happiness_Score: ' + df_final['Happiness_Score'].iloc[i].astype(str) + '\n')
    print('Neutral_Score: ' + df_final['Neutral_Score'].iloc[i].astype(str) + '\n')
    print('Sadness_Score: ' + df_final['Sadness_Score'].iloc[i].astype(str) + '\n')
    print('Generated Caption: ' + df_final['GPT_Caption_1'].iloc[i] + '\n')
# print(df_final['GPT_Caption_2'].iloc[0] + '\n')
# print(df_final['GPT_Caption_3'].iloc[0] + '\n')
# print(df_final['GPT_Caption_4'].iloc[0] + '\n')
# print(df_final['GPT_Caption_5'].iloc[0] + '\n')


GIF: 1001_IEO_SAD_MD

Predicted_Emotion: Neutral

Max_Confidence: 0.7600553035736084

Anger_Score: 0.10586223006248474

Disgust_Score: 0.018845805898308754

Fear_Score: 0.07468371838331223

Happiness_Score: 0.04034707695245743

Neutral_Score: 0.7600553035736084

Sadness_Score: 0.00020592493820004165

Generated Caption: Subject transitions from neutrality to a slight hint of fear with subtle fluctuations.

GIF: 1001_IEO_SAD_MD

Predicted_Emotion: Neutral

Max_Confidence: 0.7600553035736084

Anger_Score: 0.10586223006248474

Disgust_Score: 0.018845805898308754

Fear_Score: 0.07468371838331223

Happiness_Score: 0.04034707695245743

Neutral_Score: 0.7600553035736084

Sadness_Score: 0.00020592493820004165

Generated Caption: Subject transitions from neutrality to a slight hint of fear with subtle fluctuations.

GIF: 1001_IEO_SAD_MD

Predicted_Emotion: Neutral

Max_Confidence: 0.7600553035736084

Anger_Score: 0.10586223006248474

Disgust_Score: 0.018845805898308754

Fear_Score: 0.07468371838