In [18]:
import pandas as pd
import pickle
import os
import glob

def open_final(model_name):
    with open(f'data/final_scores_{model_name}.pkl', 'rb') as f:
        data = pickle.load(f)
    return data

In [7]:
df = pd.read_csv('data/dataset.csv', quoting=1, escapechar='\\', doublequote=True)

## Extending dataframe by adding emotions and sentiment based on mood labels

In [8]:
emomap = {'Sadness': ['Sad', 'Lonely'],
         'Anger': ['Angry', 'Annoyed', 'Frustrated', 'Furious'],
         'Fear': ['Anxious', 'Stressed', 'Afraid', 'Nervous', 'Worried'],
         'Affection': ['Loving', 'Caring', 'Supportive'],
         'Happiness': ['Happy', 'Excited']}

emomap_r = {}
for emo in emomap:
    for label in emomap[emo]:
        emomap_r[label] = emo

sentimap = {'Sadness': 'negative', 'Anger': 'negative', 'Fear': 'negative', 'Affection': 'positive', 'Happiness': 'positive'}

In [9]:
df['emotion'] = df['mood'].map(emomap_r)
df['sentiment'] = df['emotion'].map(sentimap)

In [11]:
df.to_csv('data/dataset_extended.csv', index=False, quoting=1, escapechar='\\', doublequote=True)

## Computing errors
Note that total error is not equal to valence error plus salience error. If for example the actual
sentiment is negative but it is predicted to be positive in 33.3% of cases and neutral in 33.3% of cases
then the valence error is 50%, the salience error is 50%, and the total error is 66.6%.
The goal of this definition is to separate salience and valence errors.

In [56]:
def compute_errors(df, scores):
    # Filter df to only include rows where id is in scores
    valid_df = df[df['id'].isin(scores.keys())].copy()
    
    # If no valid rows, return zeros
    if len(valid_df) == 0:
        return {"valence_error": 0, "salience_error": 0}
    
    # Add predictions to the dataframe
    valid_df['predicted'] = valid_df['id'].map(scores)
    
    # Create flags for different types of predictions
    valid_df['is_neutral_pred'] = valid_df['predicted'] == 'neutral'
    valid_df['is_valence_error'] = (valid_df['sentiment'] != valid_df['predicted']) & (~valid_df['is_neutral_pred'])
    
    # Valence error: wrong sign among non-neutral predictions
    non_neutral_count = sum(~valid_df['is_neutral_pred'])
    valence_error = sum(valid_df['is_valence_error']) / non_neutral_count if non_neutral_count > 0 else 0
    
    # Salience error: didn't fail at detecting valence but failed to detect a sentiment
    potential_correct_count = sum(~valid_df['is_valence_error'])
    neutral_count = sum(valid_df['is_neutral_pred'])
    salience_error = neutral_count / potential_correct_count if potential_correct_count > 0 else 0
    
    return {
        "valence_error": valence_error,
        "salience_error": salience_error
    }

In [57]:
def compute_group_errors(df, scores):
    # Create empty list to store results
    result_data = []
    
    # Define the levels and their possible values
    levels = {
        'sentiment': ['positive', 'negative'],
        'emotion': df['emotion'].unique().tolist(),  # Get all unique emotion values
        'mood': df['mood'].unique().tolist()         # Get all unique mood values
    }
    
    # Iterate through sex values
    for sex_value in [0, 1]:
        df_sex = df[df['sex'] == sex_value]
        
        # Iterate through levels (sentiment, emotion, mood)
        for level, level_values in levels.items():
            # Iterate through each possible value within the level
            for level_value in level_values:
                # Filter the dataframe
                df_filtered = df_sex[df_sex[level] == level_value]
                    
                # Compute errors for this subset
                errors = compute_errors(df_filtered, scores)
                
                # Add valence error to results
                result_data.append({
                    'sex': sex_value,
                    'level': level,
                    'level_value': level_value,
                    'error_type': 'valence',
                    'error_value': errors['valence_error']
                })
                
                # Add salience error to results
                result_data.append({
                    'sex': sex_value,
                    'level': level,
                    'level_value': level_value,
                    'error_type': 'salience',
                    'error_value': errors['salience_error']
                })
    
    # Create DataFrame from the collected data
    result_df = pd.DataFrame(result_data)
    
    return result_df

Note that 0 is man and 1 is woman

In [58]:
df = pd.read_csv('data/dataset_extended.csv', quoting=1, escapechar='\\', doublequote=True)

model_files = glob.glob('data/final_scores_*.pkl')

result = []

for model_file in model_files:
    print(model_file)
    model_name = model_file.split('final_scores_')[1].split('.pkl')[0]
    
    scores = open_final(model_name)
    
    errors = compute_group_errors(df, scores)
    errors['model'] = model_name
    
    result.append(errors)

result = pd.concat(result, ignore_index=True)

data/final_scores_vader.pkl
data/final_scores_olmo.pkl
data/final_scores_falcon.pkl
data/final_scores_pysentimiento_senti.pkl
data/final_scores_hartmann.pkl
data/final_scores_leia.pkl
data/final_scores_cardif.pkl
data/final_scores_llama8.pkl
data/final_scores_siebert.pkl
data/final_scores_mistral.pkl
data/final_scores_granite.pkl
data/final_scores_pysentimiento_emo.pkl
data/final_scores_nrc.pkl
data/final_scores_qwen.pkl
data/final_scores_liwc.pkl
data/final_scores_llama70.pkl


In [62]:
model_types = {
    'dictionary': ['liwc', 'nrc', 'vader'],
    'llm': ['mistral', 'falcon', 'llama8', 'llama70', 'olmo', 'qwen', 'granite'],
    'ml': ['cardif', 'hartmann', 'leia', 'pysentimiento_emo', 'pysentimiento_senti', 'siebert']
}

model_type_map = {}
for model_type, models in model_types.items():
    for model in models:
        model_type_map[model] = model_type

In [63]:
result['model_type'] = result['model'].map(model_type_map)

In [64]:
result.to_csv('data/errors.csv')