In [1]:
import krippendorff
import numpy as np
import pandas as pd

## Problem 6: Calculate the agree measurements in our group

In [2]:
our_data = pd.read_csv('group25_submit/group_25.csv')
hc_data = our_data[our_data['annotator'] == 'hcwon']
el_data = our_data[our_data['annotator'] == 'ellali']

In [3]:
rela_data = [hc_data['rating'].tolist(), el_data['rating'].tolist()]
print("Krippendorff's alpha for nominal metric: ", krippendorff.alpha(reliability_data=rela_data,
                                                                      level_of_measurement="nominal"))
print("Krippendorff's alpha for ordinal metric: ", krippendorff.alpha(reliability_data=rela_data,
                                                                      level_of_measurement='ordinal'))

corr = np.corrcoef(hc_data['rating'], el_data['rating'])
print('Correlation:\n', corr)

Krippendorff's alpha for nominal metric:  0.7285563972275997
Krippendorff's alpha for ordinal metric:  0.877237764754488
Correlation:
 [[1.         0.81942049]
 [0.81942049 1.        ]]


## Problem 7: Compute the agreement of all other annotators on our group's items

In [4]:
# get the data from total train and dev
data_total = pd.read_csv('data_train/si630w22-hw3-data.csv')
other_train = pd.read_csv('data_train/si630w22-hw3-train.csv')
other_dev = pd.read_csv('data_train/si630w22-hw3-dev.csv')
other = pd.concat([other_train, other_dev])

# get all the annotator_id and all our items' ids
user_list = other['annotator_id'].unique().tolist()
item_list = other[other['group'] == 'group_25']['id'].unique().tolist()

In [5]:
# Randomly check the result
other[other['id'] == other[other['group'] == 'group_25']['id'].iloc[25]]

Unnamed: 0,id,annotator_id,rating,group
10305,t3_n3gydl,user_34,5.0,group_25
10306,t3_n3gydl,user_35,5.0,group_25
13102,t3_n3gydl,user_43,5.0,group_18
13103,t3_n3gydl,user_44,5.0,group_18
13104,t3_n3gydl,user_45,5.0,group_18


In [6]:
# put all the relevant annotation in an array for calculation
n, m = len(user_list), len(item_list)
ann_arr = np.full([n, m], np.nan)

for i in item_list:
    matched = other[other['id'] == i]
    matched_len = len(matched)
    for a in range(matched_len):
        matched_user = matched['annotator_id'].iloc[a]
        matched_rate = matched['rating'].iloc[a]
        
        index_n = user_list.index(matched_user)
        index_m = item_list.index(i)
        
        ann_arr[index_n, index_m] = matched_rate

In [7]:
# Number34 and number35 are annatation id of our group
print("Krippendorff's alpha for ordinal metric in our group: ", krippendorff.alpha(reliability_data=ann_arr[34:36],
                                                                      level_of_measurement='ordinal'))


print("Krippendorff's alpha for ordinal metric of other groups: ", krippendorff.alpha(reliability_data=np.vstack([ann_arr[0:34,:],ann_arr[36:,:]]),
                                                                      level_of_measurement='ordinal'))

# Here, the slight difference between our goups agreement and the score calculated in problm6 is because we used our raw data previously

Krippendorff's alpha for ordinal metric in our group:  0.8758645131495496
Krippendorff's alpha for ordinal metric of other groups:  0.6157414535501715


## Problem 8: Examine disagreements

In [8]:
# Mean score of our group
our_label = ann_arr[34:36, :]
our_mean=np.mean(our_label, axis=0)

# Mean score for another groups
other_label = np.vstack([ann_arr[0:34,:],ann_arr[36:,:]])
other_label[np.isnan(other_label)] = 0

n,m = other_label.shape
other_mean = np.zeros(m)

for i in range(m):
    L = other_label[:, i]
    L_mean = np.sum(L) / np.sum(L>0)
    other_mean[i] = L_mean

differ = other_mean - our_mean
differ[np.isnan(differ)] = 0

  


In [9]:
# get the top10 items with biggest difference
differ_index = np.argsort(-abs(differ), axis=-1, kind='quicksort', order=None)[:10]

differ_list = []
for d in differ_index:
    differ_list.append(item_list[d])
    
result = []
for dif in differ_list:
    result.append(other[other['id'] == dif])
    
bad_result = pd.concat(result)
bad_result.head(4)

Unnamed: 0,id,annotator_id,rating,group
4116,t3_n68hwz,user_13,1.0,group_01
4117,t3_n68hwz,user_14,2.0,group_01
10381,t3_n68hwz,user_34,4.0,group_25
10382,t3_n68hwz,user_35,4.0,group_25


In [10]:
# Match the Original Text
df_out = pd.merge(left=bad_result, right=data_total, how='left',
               left_on='id', right_on='question_id')
df_out.to_csv('big10_differ.csv', index=False, encoding='utf8')

df_out

Unnamed: 0,id,annotator_id,rating,group,question_id,question_text,reply_id,reply_text,rlen
0,t3_n68hwz,user_13,1.0,group_01,t3_n68hwz,What are some unique and harmless pranks to pl...,gx5l1aw,Replace every m key with the n key and replace...,136
1,t3_n68hwz,user_14,2.0,group_01,t3_n68hwz,What are some unique and harmless pranks to pl...,gx5l1aw,Replace every m key with the n key and replace...,136
2,t3_n68hwz,user_34,4.0,group_25,t3_n68hwz,What are some unique and harmless pranks to pl...,gx5l1aw,Replace every m key with the n key and replace...,136
3,t3_n68hwz,user_35,4.0,group_25,t3_n68hwz,What are some unique and harmless pranks to pl...,gx5l1aw,Replace every m key with the n key and replace...,136
4,t3_n46meh,user_34,3.0,group_25,t3_n46meh,LPT Request: How to be more participative in c...,gwu3k44,If somebody else says something you can always...,185
5,t3_n46meh,user_35,2.0,group_25,t3_n46meh,LPT Request: How to be more participative in c...,gwu3k44,If somebody else says something you can always...,185
6,t3_n46meh,user_40,5.0,group_07,t3_n46meh,LPT Request: How to be more participative in c...,gwu3k44,If somebody else says something you can always...,185
7,t3_n46meh,user_41,5.0,group_07,t3_n46meh,LPT Request: How to be more participative in c...,gwu3k44,If somebody else says something you can always...,185
8,t3_n46meh,user_42,5.0,group_07,t3_n46meh,LPT Request: How to be more participative in c...,gwu3k44,If somebody else says something you can always...,185
9,t3_ni01ho,user_14,4.0,group_01,t3_ni01ho,People who lie to embellish stories about how ...,gyz3vbs,"Pfft, so anyway I’m reading Reddit this user u...",419
