In [None]:
import pandas as pd 
import models
import functools
import numpy as np
import warnings
import operator
warnings.filterwarnings("ignore")

# Depechemood++ features
This notebook contains the necessary functions to elicit the moral and emotional value of texts using the lexical-based approach.

# Functions

In [None]:
def emo_transform(data, emotion_lex, n_emotions):
    """
    Transforms text data into emotion representations using a given emotion lexicon.
    
    Args:
    data (pd.DataFrame): DataFrame containing text data with a 'text' column.
    emotion_lex (dict): Dictionary mapping words to their emotion vectors.
    n_emotions (int): Number of emotions in the emotion vectors.
    
    Returns:
    dict: Dictionary where keys are row indices and values are emotion representations.
    """
    
    emo_vocab = set(emotion_lex.keys())
    emo_values = {}
    for idx, row in data.iterrows():
        text = row['text']
        emo_values[idx] = extract_emo_representation(text.split(' '), emo_vocab, emotion_lex, n_emotions)
    return emo_values


def extract_emo_representation(words, emo_vocab=None, emotion_lex=None, n_emotions=None):
    """
    Extracts an emotion representation from a list of words.
    
    Args:
    words (list of str): List of words from the text.
    emo_vocab (set): Set of words in the emotion lexicon.
    emotion_lex (dict): Dictionary mapping words to their emotion vectors.
    n_emotions (int): Number of emotions in the emotion vectors.
    
    Returns:
    np.ndarray: Concatenated mean emotion representation vector.
    """
    intersection = emo_vocab & set(words)
    v = np.zeros((len(intersection), n_emotions))
    for i, word in enumerate(intersection):
        v[i, :] = emotion_lex[word] 
    return np.concatenate((
        np.mean(v, axis=0),
        #np.max(v, axis=0),
    ), axis=0)
    
def dictionary_emotion(text):
    """
    Converts a list of emotion scores into a dictionary with emotion labels as keys.
    
    Args:
    text (list): List of emotion scores.
    
    Returns:
    dict: Dictionary mapping emotion labels to their respective scores.
    """
    test_keys = ["fear", "amusement", "anger","annoyance","indifference","happiness","inspiration","sadness"]
    dictionary = dict(map(lambda i,j : (i,j) , test_keys,text))
    return dictionary

from itertools import islice

def prompt_function(text):  
    """
    Creates a prompt with the top 4 emotions based on their scores.
    
    Args:
    text (dict): Dictionary mapping emotion labels to their respective scores.
    
    Returns:
    str: A comma-separated string of the top 4 emotions.
    """
    sorted_dict = dict(sorted(text.items(), key=operator.itemgetter(1),reverse=True))    
    n_items = list(islice(sorted_dict.items(),4))
    n_items
    prompt=([i[0] for i in n_items])
    prompt= ', '.join(prompt)
    return prompt
    
def max_emotion(text):
    """
    Finds the emotion with the highest score.
    
    Args:
    text (dict): Dictionary mapping emotion labels to their respective scores.
    
    Returns:
    str: The emotion label with the highest score.
    """
    max_moral = max(text, key=text.get)
    return max_moral

# DepecheMood++ Lexicon

In [None]:
# Read the DepecheMood lexicon
#lexicon=pd.read_csv('DATASETS/DepecheMood_english_lemma_full.tsv',sep='\t',index_col=[0])
#lexicon.to_csv('DATASETS/DepecheMood_english_lemma_full.csv')

#Filter lexicon to include only rows with 'freq' >= , 134278 values were discarded (23%), 41314 lemmas
lexicon=pd.read_csv('DATASETS/DepecheMood_english_lemma_full.csv',index_col=[0])
lexicon=lexicon[lexicon['freq'] >= 10] 

#Convert the lexicon to a dictionary
lexicon=lexicon.drop('freq',axis=1)
lexicon=lexicon.reset_index()
lexicon_dict = lexicon.set_index('index').T.to_dict('list')
lexicon_dict
lexicon.loc[200:250,:]

In [None]:
#example 
emo_vocab = set(lexicon_dict.keys())
emo_values = {}
text='''My cat is not loyal.'''
data=extract_emo_representation(text.split(' '), emo_vocab, lexicon_dict, 8) 
data=dictionary_emotion(data)
data


# MFRC

In [None]:
#create a single dataset with MoralStrength and DepecheMood++ lexicons

reddit1=pd.read_csv('DATASETS/REDDIT_moralstrength_estimate.csv')
reddit1=reddit1[['text','prompt']]
reddit1=reddit1.rename(columns={'prompt': 'moralstrength'}) 
reddit1.drop_duplicates(subset=['text'],inplace=True)
reddit1

reddit2=pd.read_csv('DATASETS/REDDIT_moralstrength_estimate_low_medium_high.csv')
reddit2=reddit2[['text','label','prompt']]
reddit2=reddit2.rename(columns={'prompt': 'moralstrength_i'}) 
reddit2.drop_duplicates(subset=['text'],inplace=True)
reddit2


reddit3=reddit2.merge(reddit1,on='text',how='outer')
reddit3


reddit3['depechemood'] = emo_transform(reddit, lexicon_dict, 8)
reddit3

reddit3['dictionary_emotions']=reddit3['depechemood'].apply(lambda x: dictionary_emotion(x))
reddit3['emotion_word']=reddit3['dictionary_emotions'].apply(max_emotion)
reddit3['prompt']=reddit3['dictionary_emotions'].apply(lambda x:prompt_function(x))
reddit3
reddit3.to_csv('DATASETS/REDDIT_dataset.csv',index=False)

# MFTC

In [None]:
#create a single dataset

datasets = ['ALM', 'BLM', 'BALTIMORE', 'DAVIDSON', 'ELECTION', 'SANDY']

# Function to process each dataset
def process_mftc(name):
    # Load the CSV (1 moral value)
    data1 = pd.read_csv(f'DATASETS/{name}_moralstrength_estimate_polarity.csv')
    data1 = data1[['text', 'prompt']]
    data1 = data1.rename(columns={'prompt': 'moralstrength'})
    data1.drop_duplicates(subset=['text'], inplace=True)

    # Load the CSV (several moral values and intensities)
    data2 = pd.read_csv(f'DATASETS/{name}_moralstrength_estimate_low_medium_high_polarity.csv')
    data2 = data2[['text', 'label', 'prompt']]
    data2 = data2.rename(columns={'prompt': 'moralstrength_i'})
    data2.drop_duplicates(subset=['text'], inplace=True)

    data3 = data2.merge(data1, on='text', how='outer')

    # # Apply dictionary emotion function
    data3['depechemood'] = emo_transform(data3, lexicon_dict, 8)
    data3['dictionary_emotions'] = data3['depechemood'].apply(lambda x: dictionary_emotion(x))
    data3['emotion_word'] = data3['dictionary_emotions'].apply(max_emotion)
    data3['prompt'] = data3['dictionary_emotions'].apply(lambda x: prompt_function(x))

    #data3.to_csv(f'DATASETS/{name}_dataset.csv', index=False)

for dataset in datasets:
    process_mftc(dataset)