In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None
from sklearn import preprocessing
import numpy as np
from statsmodels.stats import inter_rater as irr
pd.options.display.max_colwidth = 240

In [2]:
juanjo = pd.read_csv('../Data/Data_Annotated/latest_juanjo_annotations.csv', sep = '\t')
juanma = pd.read_csv('../Data/Data_Annotated/latest_juanma_annotations.csv', sep = '\t')
ger = pd.read_csv('../Data/Data_Annotated/latest_ger_annotations.csv', sep = '\t')

In [3]:
juanjo = juanjo.rename(columns = {'sentimento':'sentiment_juanjo', 'relevancia': 'relevancia_juanjo', 'emocion': 'emocion_juanjo', 'ironia': 'ironia_juanjo'})
juanma = juanma.rename(columns = {'sentimento':'sentiment_juanma', 'relevancia': 'relevancia_juanma', 'emocion': 'emocion_juanma', 'ironia': 'ironia_juanma'})
ger = ger.rename(columns = {'sentimento':'sentiment_ger', 'relevancia': 'relevancia_ger', 'emocion': 'emocion_ger', 'ironia': 'ironia_ger'})

In [15]:
merged_df = pd.merge(pd.merge(juanjo[['text', 'tweet_id', 'sentiment_juanjo']],
                     juanma[['tweet_id', 'sentiment_juanma']], on=['tweet_id']),
                    ger[['tweet_id', 'sentiment_ger']], on=['tweet_id'])
merged_relevancia = pd.merge(pd.merge(juanjo[['text', 'tweet_id', 'relevancia_juanjo']],
                             juanma[['tweet_id', 'relevancia_juanma']], on=['tweet_id']),
                            ger[['tweet_id', 'relevancia_ger']], on=['tweet_id'])
merged_emocion = pd.merge(pd.merge(juanjo[['text', 'tweet_id', 'emocion_juanjo']],
                          juanma[['tweet_id', 'emocion_juanma']], on=['tweet_id']),
                         ger[['tweet_id', 'emocion_ger']], on=['tweet_id'])
merged_ironia = pd.merge(pd.merge(juanjo[['text', 'tweet_id', 'ironia_juanjo']],
                         juanma[['tweet_id', 'ironia_juanma']], on=['tweet_id']),
                        ger[['tweet_id', 'ironia_ger']], on=['tweet_id'])

In [16]:
dict_emociones = {
    'alegria' : ['Alegria', 'Confianza', 'Admiración','Agrado'],
    'miedo' : ['Miedo', 'Incertidumbre'],
    'sorpresa' : ['Sorpresa', 'Asombro'],
    'tristeza' : ['Tristeza' , 'Decepción'],
    'asco' : ['Asco', 'Desagrado'],
    'ira' : ['Ira', 'Odio'],
    'otra' : ['Otra'] }

In [17]:
for i  in dict_emociones.keys():
    merged_df[i + '_juanma'] = merged_df['sentiment_juanma'].str.contains('|'.join(dict_emociones[i]))
    merged_df[i + '_juanjo'] = merged_df['sentiment_juanjo'].str.contains('|'.join(dict_emociones[i]))
    merged_df[i + '_ger'] = merged_df['sentiment_ger'].str.contains('|'.join(dict_emociones[i]))

In [5]:
merged_df = merged_df.fillna(False)
merged_df['labels'] = merged_df.apply(lambda x: ', '.join(x.index[x == True]), axis=1)

In [59]:
df_list = []
emotions_count = {}
for i in dict_emociones.keys():
    emotion_df = merged_df[(merged_df[i + '_juanma']== True) | 
                           (merged_df[i + '_juanjo']== True)| 
                           (merged_df[i + '_ger']== True)][['text', 'labels', 'tweet_id' , i + '_juanma', i +'_juanjo', i +'_ger', 'sentiment_juanma', 'sentiment_juanjo' , 'sentiment_ger']]
    emotion_df = emotion_df.rename(columns= {i + '_juanma': 'juanma', i +'_juanjo': 'juanjo',  i +'_ger': 'ger'})
    emotion_df['emocion'] = i
    emotions_count[i] = len(emotion_df)
    df_list.append(emotion_df)
full_df = pd.concat(df_list, ignore_index=True)
#full_df_disagreement = full_df[full_df['juanma'] != full_df['juanjo']]
#full_df_agreement = full_df[full_df['juanma'] == full_df['juanjo']]

In [56]:
agreements = {}
for i in dict_emociones.keys():
    arr = np.array(merged_df[[i + '_juanjo',i + '_juanma', i + '_ger' ]])
    agg = irr.aggregate_raters(arr)
    agreements[i] = irr.fleiss_kappa(agg[0], method='fleiss')

In [57]:
agreements

{'alegria': 0.7101458385717879,
 'miedo': 0.4488669905998816,
 'sorpresa': -0.012936610608019742,
 'tristeza': 0.3410939691444601,
 'asco': 0.6088536391755399,
 'ira': 0.1456628477905069,
 'otra': -0.006426735218517274}

In [60]:
total = sum(emotions_count.values())
avgd_agreement = 0
mean_agreement = 0
for i in agreements.keys():
    avgd_agreement+= agreements[i]*(emotions_count[i]/total)
    mean_agreement+= agreements[i]
mean_agreement = mean_agreement/len(agreements)

In [61]:
avgd_agreement

0.5051707025502078

In [62]:
mean_agreement

0.3193228484936627

In [23]:
merged_relevancia = merged_relevancia.fillna('No')
merged_relevancia['relevancia_juanjo_bool'] = merged_relevancia['relevancia_juanjo'].map({'Si': True, 'No': False})
merged_relevancia['relevancia_juanma_bool'] = merged_relevancia['relevancia_juanma'].map({'Si': True, 'No': False})
merged_relevancia['relevancia_ger_bool'] = merged_relevancia['relevancia_ger'].map({'Si': True, 'No': False})

In [30]:
arr = np.array(merged_relevancia[['relevancia_juanjo_bool', 'relevancia_juanma_bool', 'relevancia_ger_bool']])
agg = irr.aggregate_raters(arr)
irr.fleiss_kappa(agg[0], method='fleiss')

0.7906417112299454

In [31]:
merged_emocion = merged_emocion.fillna('No')
merged_emocion['emocion_juanjo_bool'] = merged_emocion['emocion_juanjo'].map({'Si': True, 'No': False})
merged_emocion['emocion_juanma_bool'] = merged_emocion['emocion_juanma'].map({'Si': True, 'No': False})
merged_emocion['emocion_ger_bool'] = merged_emocion['emocion_ger'].map({'Si': True, 'No': False})

In [32]:
arr_num = np.array(merged_emocion[['emocion_juanjo_bool','emocion_juanma_bool', 'emocion_ger_bool' ]])
agg_num = irr.aggregate_raters(arr_num)
irr.fleiss_kappa(agg_num[0], method='fleiss')

0.49757449757449823

In [33]:
merged_ironia = merged_ironia.fillna('No')
merged_ironia['ironia_juanjo_bool'] = merged_ironia['ironia_juanjo'].map({'Si': True, 'No': False})
merged_ironia['ironia_juanma_bool'] = merged_ironia['ironia_juanma'].map({'Si': True, 'No': False})
merged_ironia['ironia_ger_bool'] = merged_ironia['ironia_ger'].map({'Si': True, 'No': False})

In [34]:
arr_num = np.array(merged_ironia[['ironia_juanjo_bool', 'ironia_juanma_bool', 'ironia_ger_bool']])
agg_num = irr.aggregate_raters(arr_num)
irr.fleiss_kappa(agg_num[0], method='fleiss')

0.27140198511166136

In [47]:
dict_sentimientos = {
    'positivo' : ['Alegria', 'Confianza', 'Admiración','Agrado'],
    'negativo' : ['Miedo', 'Incertidumbre','Tristeza' , 'Decepción', 'Asco', 'Desagrado','Ira', 'Odio'],
    'otra_sorpresa' : ['Sorpresa', 'Asombro', 'Otra'],
     }

In [48]:
for i  in dict_sentimientos.keys():
    merged_df[i + '_juanma'] = merged_df['sentiment_juanma'].str.contains('|'.join(dict_sentimientos[i]))
    merged_df[i + '_juanjo'] = merged_df['sentiment_juanjo'].str.contains('|'.join(dict_sentimientos[i]))
    merged_df[i + '_ger'] = merged_df['sentiment_ger'].str.contains('|'.join(dict_sentimientos[i]))

In [65]:
agreements = {}
for i in dict_sentimientos.keys():
    arr = np.array(merged_df[[i + '_juanjo',i + '_juanma', i + '_ger' ]])
    agg = irr.aggregate_raters(arr)
    agreements[i] = irr.fleiss_kappa(agg[0], method='fleiss')

In [66]:
agreements

{'positivo': 0.5897863164085473,
 'negativo': 0.5606850095262633,
 'otra_sorpresa': 0.6146653543307087}

In [67]:
df_list = []
emotions_count = {}
for i in dict_sentimientos.keys():
    emotion_df = merged_df[(merged_df[i + '_juanma']== True) | 
                           (merged_df[i + '_juanjo']== True)| 
                           (merged_df[i + '_ger']== True)][['text', 'labels', 'tweet_id' , i + '_juanma', i +'_juanjo', i +'_ger', 'sentiment_juanma', 'sentiment_juanjo' , 'sentiment_ger']]
    emotion_df = emotion_df.rename(columns= {i + '_juanma': 'juanma', i +'_juanjo': 'juanjo',  i +'_ger': 'ger'})
    emotion_df['emocion'] = i
    emotions_count[i] = len(emotion_df)
    df_list.append(emotion_df)
full_df = pd.concat(df_list, ignore_index=True)
#full_df_disagreement = full_df[full_df['juanma'] != full_df['juanjo']]
#full_df_agreement = full_df[full_df['juanma'] == full_df['juanjo']]

In [68]:
total = sum(emotions_count.values())
avgd_agreement = 0
mean_agreement = 0
for i in agreements.keys():
    avgd_agreement+= agreements[i]*(emotions_count[i]/total)
    mean_agreement+= agreements[i]
mean_agreement = mean_agreement/len(agreements)

In [69]:
avgd_agreement

0.5755635380638484

In [70]:
mean_agreement

0.5883788934218398