# ISEAR and NRC Labels

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import spacy
import re

In [2]:
# from py_isear import 
from py_isear.isear_loader import IsearLoader
attributes = ['EMOT','SIT']
target = ['TROPHO','TEMPER']
loader = IsearLoader(attributes, target, True)
data = loader.load_isear('py_isear/isear.csv')
# data.get_data() # returns attributes
# data.get_target() # returns target
# data.get_freetext_content() # returns the text content of the database

In [3]:
# turn attributes to list
attributes = data.get_data()
attributes = sum(attributes,[])
# attributes

In [4]:
isear_text = list(data.get_freetext_content())
# isear_text

In [5]:
# create df from attribute and the data;
df = pd.DataFrame({'isear_text':isear_text,'isear_label':attributes})

In [6]:
df = df.set_index('isear_text')

In [7]:
d = {1:'joy',2:'fear',3:'anger',4:'sadness',5:'disgust',6:'shame',7:'guilt'}

In [8]:
df = df.replace({"isear_label": d})

In [9]:
df.head()

Unnamed: 0_level_0,isear_label
isear_text,Unnamed: 1_level_1
"During the period of falling in love, each time that we met and á especially when we had not met for a long time.",joy
When I was involved in a traffic accident.,fear
"When I was driving home after several days of hard work, there á was a motorist ahead of me who was driving at 50 km/hour and á refused, despite his low speeed to let me overtake.",anger
When I lost the person who meant the most to me.,sadness
"The time I knocked a deer down - the sight of the animal's á injuries and helplessness. The realization that the animal was á so badly hurt that it had to be put down, and when the animal á screamed at the moment of death.",disgust


In [10]:
import pandas as pd
from nltk import word_tokenize

filepath = ('NRC-Emotion-Lexicon-Wordlevel-v0.92.txt')

emolex_df = pd.read_csv(filepath,
                            names=["word", "emotion", "association"],
                            sep='\t')


emolex_words = emolex_df.pivot(index='word',
                                   columns='emotion',
                                   values='association').reset_index()

# emolex_df = emolex_df.drop('positive','negative')

emolex_words = emolex_words.drop(['positive','negative'],axis=1)
emotions = emolex_words.columns.drop('word')
emo_df = pd.DataFrame(0, index=df.index, columns=emotions)

In [11]:
emo_df

emotion,anger,anticipation,disgust,fear,joy,sadness,surprise,trust
isear_text,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"During the period of falling in love, each time that we met and á especially when we had not met for a long time.",0,0,0,0,0,0,0,0
When I was involved in a traffic accident.,0,0,0,0,0,0,0,0
"When I was driving home after several days of hard work, there á was a motorist ahead of me who was driving at 50 km/hour and á refused, despite his low speeed to let me overtake.",0,0,0,0,0,0,0,0
When I lost the person who meant the most to me.,0,0,0,0,0,0,0,0
"The time I knocked a deer down - the sight of the animal's á injuries and helplessness. The realization that the animal was á so badly hurt that it had to be put down, and when the animal á screamed at the moment of death.",0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
Two years back someone invited me to be the tutor of her á grand-daughter. The grand-daughter asked me some questions in á mathematics so I taught her. However she did not listen to me á and this made me feel unhappy. The second year it was the same. á When I entered university the girl's parents suggested that I be á employed as their daghters tutor. They told me that at university á there was no homework and I would have a lot of time so they á made a time-table for me which required me to be the tutor five á days a week. They did not respect me and anyway I had another á child to teach.,0,0,0,0,0,0,0,0
I had taken the responsibility to do something and I had á prepared for it. However I failed because of my timidity. After á three attempts I still could not adapt to the atmosphere and á failed as before. I felt imcompetent and felt that the others á would think that I had not prepared for it.,0,0,0,0,0,0,0,0
I was at home and I heard a loud sound of spitting outside the á door. I thought that one of my family members would step on the spit á and bring the germs in the house.,0,0,0,0,0,0,0,0
I did not do the homework that the teacher had asked us to do. I á was scolded immediately.,0,0,0,0,0,0,0,0


In [12]:
# count whether a token appears in the index and then count 
# must remove stopwords and tokenize the isear_text

# reference to nltk

# remove puncutations
# tokenize
# remove stopwords
import spacy


def isear_nrc_loop():
    new_df = df.copy()
    emo_df2 = emo_df.copy()
    nlp = spacy.load("en_core_web_lg")
    for i, row in new_df.iterrows():
        doc = nlp(i)
        document = [token.text for token in doc if token.is_stop != True and token.is_punct != True]
        for word in document:
            emo_score = emolex_words[emolex_words.word == word]
            if not emo_score.empty:
                for emotion in list(emotions):
                    emo_df2.at[i, emotion] += emo_score[emotion].values # add the score under the emotion cat
    nrc_label_df = emo_df2.idxmax(axis=1).to_frame('nrc_label')
    final = pd.concat([new_df, nrc_label_df], axis=1)
    return final

In [13]:
output = isear_nrc_loop()
output.head()

Unnamed: 0_level_0,isear_label,nrc_label
isear_text,Unnamed: 1_level_1,Unnamed: 2_level_1
"During the period of falling in love, each time that we met and á especially when we had not met for a long time.",joy,anticipation
When I was involved in a traffic accident.,fear,fear
"When I was driving home after several days of hard work, there á was a motorist ahead of me who was driving at 50 km/hour and á refused, despite his low speeed to let me overtake.",anger,sadness
When I lost the person who meant the most to me.,sadness,sadness
"The time I knocked a deer down - the sight of the animal's á injuries and helplessness. The realization that the animal was á so badly hurt that it had to be put down, and when the animal á screamed at the moment of death.",disgust,sadness


In [14]:
output.shape

(7666, 2)

# Find the F-1 Scores -- NRC and DM

## sadness

In [41]:
# the case of sad:

In [42]:
# true positives
tp = final_master2.loc[(final_master2['isear_label'] == "sadness") & (final_master2['nrc_label'] == "sadness")].shape[0]
# false positives
fp = final_master2.loc[(final_master2['isear_label'] != "sadness") & (final_master2['nrc_label'] == "sadness")].shape[0]
# true negatives
tn = final_master2.loc[(final_master2['isear_label'] != "sadness") & (final_master2['nrc_label'] != "sadness")].shape[0]
# false negatives
fn = final_master2.loc[(final_master2['isear_label'] == "sadness") & (final_master2['nrc_label'] != "sadness")].shape[0]

In [43]:
# precision, accuracy, recall 

def performance():
    nrc_pf_sad = {}
    nrc_pf_sad['precision'] = tp/(tp+fp)
    nrc_pf_sad['recall'] = tp/(tp+fn)
    nrc_pf_sad['f1_score'] = 2*(nrc_pf_sad['precision']*nrc_pf_sad['recall'])/(nrc_pf_sad['precision']+nrc_pf_sad['recall'])
    return nrc_pf_sad

nrc_pf_sad = performance()
nrc_pf_sad

{'precision': 0.21006944444444445,
 'recall': 0.1104014598540146,
 'f1_score': 0.14473684210526316}

In [44]:
# true positives
tp = final_master2.loc[(final_master2['isear_label'] == "sadness") & (final_master2['dm_label'] == "sadness")].shape[0]
# false positives
fp = final_master2.loc[(final_master2['isear_label'] != "sadness") & (final_master2['dm_label'] == "sadness")].shape[0]
# true negatives
tn = final_master2.loc[(final_master2['isear_label'] != "sadness") & (final_master2['dm_label'] != "sadness")].shape[0]
# false negatives
fn = final_master2.loc[(final_master2['isear_label'] == "sadness") & (final_master2['dm_label'] != "sadness")].shape[0]

In [45]:
# precision, accuracy, recall 
def performance():
    dm_pf_sad = {}
    dm_pf_sad['precision'] = tp/(tp+fp)
    dm_pf_sad['recall'] = tp/(tp+fn)
    dm_pf_sad['f1_score'] = 2*(dm_pf_sad['precision']*dm_pf_sad['recall'])/(dm_pf_sad['precision']+dm_pf_sad['recall'])
    return dm_pf_sad

dm_pf_sad = performance()
dm_pf_sad

{'precision': 0.5051813471502591,
 'recall': 0.17791970802919707,
 'f1_score': 0.2631578947368421}

## angry

In [46]:
# true positives
tp = final_master2.loc[(final_master2['isear_label'] == "anger") & (final_master2['nrc_label'] == "anger")].shape[0]
# false positives
fp = final_master2.loc[(final_master2['isear_label'] != "anger") & (final_master2['nrc_label'] == "anger")].shape[0]
# true negatives
tn = final_master2.loc[(final_master2['isear_label'] != "anger") & (final_master2['nrc_label'] != "anger")].shape[0]
# false negatives
fn = final_master2.loc[(final_master2['isear_label'] == "anger") & (final_master2['nrc_label'] != "anger")].shape[0]

In [47]:
def performance():
    nrc_pf_anger = {}
    nrc_pf_anger['precision'] = tp/(tp+fp)
    nrc_pf_anger['recall'] = tp/(tp+fn)
    nrc_pf_anger['f1_score'] = 2*(nrc_pf_anger['precision']*nrc_pf_anger['recall'])/(nrc_pf_anger['precision']+nrc_pf_anger['recall'])
    return nrc_pf_anger

nrc_pf_anger = performance()
nrc_pf_anger

{'precision': 0.18701586130579123,
 'recall': 0.4625912408759124,
 'f1_score': 0.26635145784081954}

In [48]:
# true positives
tp = final_master2.loc[(final_master2['isear_label'] == "anger") & (final_master2['dm_label'] == "anger")].shape[0]
# false positives
fp = final_master2.loc[(final_master2['isear_label'] != "anger") & (final_master2['dm_label'] == "anger")].shape[0]
# true negatives
tn = final_master2.loc[(final_master2['isear_label'] != "anger") & (final_master2['dm_label'] != "anger")].shape[0]
# false negatives
fn = final_master2.loc[(final_master2['isear_label'] == "anger") & (final_master2['dm_label'] != "anger")].shape[0]

In [49]:
def performance():
    dm_pf_anger = {}
    dm_pf_anger['precision'] = tp/(tp+fp)
    dm_pf_anger['recall'] = tp/(tp+fn)
    dm_pf_anger['f1_score'] = 2*(dm_pf_anger['precision']*dm_pf_anger['recall'])/(dm_pf_anger['precision']+dm_pf_anger['recall'])
    return dm_pf_anger
    
dm_pf_anger = performance()
dm_pf_anger

{'precision': 0.18952234206471494,
 'recall': 0.11222627737226278,
 'f1_score': 0.14097421203438393}

## joy

In [50]:
# true positives
tp = final_master2.loc[(final_master2['isear_label'] == "joy") & (final_master2['nrc_label'] == "joy")].shape[0]
# false positives
fp = final_master2.loc[(final_master2['isear_label'] != "joy") & (final_master2['nrc_label'] == "joy")].shape[0]
# true negatives
tn = final_master2.loc[(final_master2['isear_label'] != "joy") & (final_master2['nrc_label'] != "joy")].shape[0]
# false negatives
fn = final_master2.loc[(final_master2['isear_label'] == "joy") & (final_master2['nrc_label'] != "joy")].shape[0]

In [51]:
def performance():
    nrc_pf_joy = {}
    nrc_pf_joy['precision'] = tp/(tp+fp)
    nrc_pf_joy['recall'] = tp/(tp+fn)
    nrc_pf_joy['f1_score'] = 2*(nrc_pf_joy['precision']*nrc_pf_joy['recall'])/(nrc_pf_joy['precision']+nrc_pf_joy['recall'])
    return nrc_pf_joy


nrc_pf_joy = performance()
nrc_pf_joy

{'precision': 0.28,
 'recall': 0.20475319926873858,
 'f1_score': 0.2365364308342133}