In [30]:
# Very important note! Before getting started, you need to download AffectVec v1.0 in vector version. 
# Available here: http://emotion.nlproc.org/

#load packages
from gensim.models import KeyedVectors
import numpy as np
import re

In [23]:
# Load word vectors
word_vectors = KeyedVectors.load_word2vec_format("AffectVec-v1.0-vectors/AffectVec-data.txt", binary=False)

# Open and read columns.txt line by line
with open('AffectVec-v1.0-vectors/columns.txt', 'r') as file:
    for line in file:
        parts = line.strip().split(' - ')
        # Append the emotion label (second part) to the list
        if len(parts) > 1:
            emotion_labels.append(parts[1])


In [29]:
def analyze_scene_emotions(recall):
    words = re.findall(r'\b\w+\b', recall.lower())  # Clean and split recall into words
    emotion_scores_sum = np.zeros(len(emotion_labels))  # Sum of scores for each emotion
    word_count = 0  # Track valid words in AffectVec

    for word in words:
        if word in word_vectors:  # Check if word exists in AffectVec
            scores = word_vectors[word]
            emotion_scores_sum += scores
            word_count += 1

    if word_count > 0:
        average_emotion_scores = emotion_scores_sum / word_count
    else:
        return None  # No valid words in AffectVec

    # Return a dictionary of average scores for each emotion
    return {label: score for label, score in zip(emotion_labels, average_emotion_scores)}

# Load the CSV file with free recall (NOTE: the data must be structured such that each row refers to one recalled scene
input_csv = ""  # Replace with your actual CSV file path
data = pd.read_csv(input_csv)

# Analyze each scene recall and store the results
results = []
for index, row in data.iterrows():
    recall = row['scene_recall']  # Replace 'scene_recall' with the actual column name in your CSV
    emotion_scores = analyze_scene_emotions(recall)
    if emotion_scores is not None:
        emotion_scores['scene_recall'] = recall
        emotion_scores['scene_id'] = index
        results.append(emotion_scores)

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Reorder columns so that scene_id and scene_recall appear first
columns_order = ['scene_id', 'scene_recall'] + emotion_labels
results_df = results_df[columns_order]

# Display or save the DataFrame to a new CSV file
output_csv = "scene_emotion_analysis.csv"  # Specify your output file path
results_df.to_csv(output_csv, index=False)

print("Analysis complete. Results saved to:", output_csv)

Analysis complete. Results saved to: scene_emotion_analysis.csv
