# Annotation analysis
Utility functions that support the analysis step of the annotations carried out by the two raters

In [33]:
from sklearn.metrics import cohen_kappa_score

def annotation_analysis (df1,df2):
    df1 = df1.fillna('no')
    df2 = df2.fillna('no')
    #Positive k
    df1_pos = df1['Positive'].to_list()
    df2_pos = df2['Positive'].to_list()
    print("Positive k: ","{:.2f}".format(cohen_kappa_score(df1_pos, df2_pos)))
    print("Observed Agreement: ","{:.2f}".format(observed_agreement(df1_pos,df2_pos)))
    #Positive k
    df1_neg = df1['Negative'].to_list()
    df2_neg = df2['Negative'].to_list()
    print("Negative k: ","{:.2f}".format(cohen_kappa_score(df1_neg, df2_neg)))
    print("Observed Agreement: ","{:.2f}".format(observed_agreement(df1_neg,df2_neg)))
    #Positive k
    df1_neut = df1['Neutral'].to_list()
    df2_neut = df2['Neutral'].to_list()
    print("Neutral k: ","{:.2f}".format(cohen_kappa_score(df1_neut, df2_neut)))
    print("Observed Agreement: ","{:.2f}".format(observed_agreement(df1_neut,df2_neut)))
    #Positive k
    df1_ir = df1['Irony'].to_list()
    df2_ir = df2['Irony'].to_list()
    print("Irony k: ","{:.2f}".format(cohen_kappa_score(df1_ir, df2_ir)))
    print("Observed Agreement: ","{:.2f}".format(observed_agreement(df1_ir,df2_ir)))

def observed_agreement (df1,df2):
    data = {'Rater#1': df1,'Rater#2': df2}
    df = pd.DataFrame(data, columns=['Rater#1','Rater#2'])
    confusion_matrix = pd.crosstab(df['Rater#1'], df['Rater#2'], rownames=['Rater#1'], colnames=['Rater#2'])
    res = (confusion_matrix['no'].iloc[0] + confusion_matrix['yes'].iloc[1]) / (confusion_matrix['no'].iloc[0] + confusion_matrix['yes'].iloc[1] + confusion_matrix['yes'].iloc[0] + confusion_matrix['no'].iloc[1])
    return res


## Annotation - Round 1

In [1]:
import pandas as pd

rater1_1 = pd.read_csv("../data/interim/Annotazione_Rater#1_1.csv")
rater2_1 = pd.read_csv("../data/interim/Annotazione_Rater#2_1.csv")

In [34]:
annotation_analysis(rater1_1,rater2_1)

Positive k:  0.37
Observed Agreement:  0.88
Negative k:  0.79
Observed Agreement:  0.90
Neutral k:  0.79
Observed Agreement:  0.89
Irony k:  0.65
Observed Agreement:  0.88


## Annotation - Round 2

In [35]:
rater1_2 = pd.read_csv("../data/interim/Annotazione_Rater#1_2.csv")
rater2_2 = pd.read_csv("../data/interim/Annotazione_Rater#2_2.csv")

In [36]:
annotation_analysis(rater1_2,rater2_2)

Positive k:  0.73
Observed Agreement:  0.95
Negative k:  0.91
Observed Agreement:  0.96
Neutral k:  0.87
Observed Agreement:  0.94
Irony k:  0.83
Observed Agreement:  0.94


# Gold Standard
**Final_Class** column represents the result of the raters annotation and can assume the following values:
- pos: positive tweet
- neg: negative tweet
- neut: neutral tweet
- pos_ir: positive tweet with the presence of irony
- neg_ir: negative tweet with the presence of irony
- mix: tweet with presence of both positive and negative sentiment

From Final_Class derive the columns **Class** and **Irony** which contain respectively the information relating to the sentiment classes (pos, neg, neut, mix) and to the presence of irony (yes, no)

In [2]:
g_s = pd.read_csv("../data/processed/Gold_Standard.csv")
g_s.head()

Unnamed: 0,tweet_id,tweet,Final_Class,Class,Irony
0,1462002288835403777,In arrivo un nuovo #bonus #inps! Scopri chi pu...,pos,pos,no
1,1354381987507744771,"Allora, riepiloghiamo;\nAi politici la pension...",neg_ir,neg,yes
2,1454050817821003783,Caro @INPS_it e cari @Europarl_IT fate bene i...,neg_ir,neg,yes
3,1393675898960982016,"FOTO - A #napoli, dopo l'apertura di una #vora...",neut,neut,no
4,1417876270705164289,Maxi esercitazione di #protezionecivile. Lo sc...,neut,neut,no
