In [1]:
#Import packages and data
import itertools
import pandas as pd
import numpy as np
import csv
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import confusion_matrix

#import csvs to df
nyt_stance = []
twitter_stance = []


for i in range(3):
    nyt_stance.append(pd.read_csv('original_csvs/nyt_stance_'+ str(i) +'.csv'))
    twitter_stance.append(pd.read_csv('original_csvs/twitter_stance_'+ str(i) +'.csv'))

In [2]:
#Replace NaN values with 'missing' for all csvs in nyt_stance
for nyt in nyt_stance:
    for i in list(nyt.columns[1:]):
        nyt[i].fillna('missing', inplace=True)
        
for twit in twitter_stance:
    for i in list(twit.columns[1:]):
        twit[i].fillna('missing', inplace=True)


In [3]:
#Compute Cohen's Kappa for each pair of observers. 
# k=(p_0 - p_e)/(1 - p_e) where p_0 is empirical probability and p_e is expected agreement.
# p_0 = num in agreement/ total
# p_e = p_correct + p_incorrect which is:
# = (a_correct/total * b_correct/total) + (a_incorrect/total * b_incorrect/total)

In [4]:
#Compute Cohen's Kappa for each pair of observers and then find the score for each annotator.
    # k=(p_0 - p_e)/(1 - p_e) where p_0 is empirical probability and p_e is expected agreement.
    # p_0 = num in agreement/ total
    # p_e = p_correct + p_incorrect 


#nyt_stance work

nyt_pair_dict ={}
for nyt in nyt_stance:
    
    #Creat a confusion matrix for each pair of annotators
    for pair in itertools.combinations(list(nyt.columns[1:]),2):
        #print(pair)
        cm = confusion_matrix(nyt[pair[0]],nyt[pair[1]])
        #print(cm)

        # Find number of rows in df to equal the total count
        total = len(nyt.text)

        #Find p0   
        p0 = sum(cm[m][m] for m in range(len(cm)))/ total
        #print('p0 = ', p0)

        #Find pe
        #pe = p of each class sum verticle/total * each class sum horizontal/total
        pe = 0
        for col in range(len(cm)):
            pe += sum(cm[m][col] for m in range(len(cm)))*sum(cm[col][m] for m in range(len(cm)))

        pe = pe/(total*total)
        #print('pe = ', pe)

        #Calculate Cohen's Kappa k=(p_0 - p_e)/(1 - p_e)
        k = (p0 - pe) / (1 - pe)
        #print('k = ',k)
        
        #Add the annotator pair and their Cohen's Kappa value to the dictionary
        nyt_pair_dict[pair] = k
        
    


Cohen's Kappa values for nyt_stance data

In [5]:
#annotator pair Cohen's Kappa values for nyt_stance data
print(nyt_pair_dict)

{('annotation_64', 'annotation_60'): 0.4766392841425847, ('annotation_64', 'annotation_61'): 0.37361510053344266, ('annotation_64', 'annotation_63'): 0.22998553963454713, ('annotation_60', 'annotation_61'): 0.4495505617977528, ('annotation_60', 'annotation_63'): 0.3249869406233677, ('annotation_61', 'annotation_63'): 0.3003073010490623, ('annotation_34', 'annotation_5'): 0.26157741130138196, ('annotation_34', 'annotation_20'): 0.3498383779018513, ('annotation_34', 'annotation_4'): 0.5355299722111949, ('annotation_5', 'annotation_20'): 0.30353430353430355, ('annotation_5', 'annotation_4'): 0.3226017918275184, ('annotation_20', 'annotation_4'): 0.37449258247841166, ('annotation_7', 'annotation_8'): -0.0043363965218954316, ('annotation_7', 'annotation_9'): 0.2823912756860603, ('annotation_7', 'annotation_78'): 0.5118937287611987, ('annotation_7', 'annotation_62'): 0.44581280788177335, ('annotation_8', 'annotation_9'): -0.0033332587637860444, ('annotation_8', 'annotation_78'): 0.0028331139

In [6]:
#Iterate through the complete list of annotators in all three nyt_stance csvs to
#calculate each annotator's score
#which is the average kappa score for that annotator in all the annotator pairs.  

nyt_annotator_dict ={}

for nyt in nyt_stance:
    for annotator in list(nyt.columns[1:]):
        #find the occurance of the annotator in the dictionary
        k =0
        counter = 0
        for pair in nyt_pair_dict.keys():
            if annotator in pair:
                k += nyt_pair_dict[pair]
                counter +=1
        score = k/counter
        nyt_annotator_dict[annotator] = score    

Annotator Scores (average k) for nyt_stance data

In [7]:
print(nyt_annotator_dict)

{'annotation_64': 0.3600799747701915, 'annotation_60': 0.41705892885456836, 'annotation_61': 0.37449098779341927, 'annotation_63': 0.285093260435659, 'annotation_34': 0.38231525380480935, 'annotation_5': 0.29590450222106796, 'annotation_20': 0.34262175463818884, 'annotation_4': 0.41087478217237505, 'annotation_7': 0.3089403539517842, 'annotation_8': -0.0005676090538629544, 'annotation_9': 0.1879002150809514, 'annotation_78': 0.31627575307191846, 'annotation_62': 0.30450723707881616}


In [8]:
#twitter_stance work

twitter_pair_dict ={}
for twit in twitter_stance:
    
    #Creat a confusion matrix for each pair of annotators
    for pair in itertools.combinations(list(twit.columns[1:]),2):
        #print(pair)
        cm = confusion_matrix(twit[pair[0]],twit[pair[1]])
        #print(cm)

        # Find number of rows in df to equal the total count
        total = len(twit.text)

        #Find p0   
        p0 = sum(cm[m][m] for m in range(len(cm)))/ total
        #print('p0 = ', p0)

        #Find pe
        #pe = p of each class sum verticle/total * each class sum horizontal/total
        pe = 0
        for col in range(len(cm)):
            pe += sum(cm[m][col] for m in range(len(cm)))*sum(cm[col][m] for m in range(len(cm)))

        pe = pe/(total*total)
        #print('pe = ', pe)

        #Calculate Cohen's Kappa k=(p_0 - p_e)/(1 - p_e)
        k = (p0 - pe) / (1 - pe)
        #print('k = ',k)
        
        #Add the annotator pair and their Cohen's Kappa value to the dictionary
        twitter_pair_dict[pair] = k
        
    

Cohen's Kappa values for twitter_stance data

In [9]:
print(twitter_pair_dict)

{('annotation_32', 'annotation_33'): 0.2534050781906845, ('annotation_32', 'annotation_35'): 0.4085410679676536, ('annotation_32', 'annotation_17'): 0.23917483660130712, ('annotation_33', 'annotation_35'): 0.3273542600896861, ('annotation_33', 'annotation_17'): 0.39112611526404634, ('annotation_35', 'annotation_17'): 0.4136460554371002, ('annotation_26', 'annotation_37'): 0.14795244385733158, ('annotation_26', 'annotation_38'): 0.2274502447260262, ('annotation_26', 'annotation_39'): 0.16167664670658682, ('annotation_37', 'annotation_38'): 0.31476016605812035, ('annotation_37', 'annotation_39'): 0.4467213114754098, ('annotation_38', 'annotation_39'): 0.28344746951728744, ('annotation_113', 'annotation_114'): 0.23612454070779346, ('annotation_113', 'annotation_115'): 0.23195876288659786, ('annotation_113', 'annotation_116'): 0.24873096446700513, ('annotation_114', 'annotation_115'): 0.16709511568123395, ('annotation_114', 'annotation_116'): 0.2042306723747167, ('annotation_115', 'annotat

In [10]:
#Iterate through the complete list of annotators in all three nyt_stance csvs to
#calculate each annotator's score
#which is the average kappa score for that annotator in all the annotator pairs.  

twitter_annotator_dict ={}

for twit in twitter_stance:
    for annotator in list(twit.columns[1:]):
        #find the occurance of the annotator in the dictionary
        k =0
        counter = 0
        for pair in twitter_pair_dict.keys():
            if annotator in pair:
                k += twitter_pair_dict[pair]
                counter +=1
        score = k/counter
        twitter_annotator_dict[annotator] = score    

Annotator Scores (average k) for twitter_stance data

In [11]:
print(twitter_annotator_dict)

{'annotation_32': 0.3003736609198817, 'annotation_33': 0.323961817848139, 'annotation_35': 0.3831804611648133, 'annotation_17': 0.34798233576748455, 'annotation_26': 0.1790264450966482, 'annotation_37': 0.3031446404636206, 'annotation_38': 0.27521929343381135, 'annotation_39': 0.29728180923309466, 'annotation_113': 0.23893808935379882, 'annotation_114': 0.202483442921248, 'annotation_115': 0.22375870026335135, 'annotation_116': 0.2417279530213147}


In [12]:
#Assemble the Dataset
#    1. Assign a final label to each text, according to the following logic: 
#        - First, eliminate any labels for annotators whose average kappa score is less 
#        than 0.2 (unreliable annotators) 
#        - Second, assign the final label to each text as the most frequent label among 
#        the remainiing annotators 
#        - If there are ties (the same number of annotators for different labels), 
#        use the label with higher-reliability annotators (higher kappa scores on average) 
#    2. Combine all of the text/label pairs for the PRIMARY dataset into a single CSV file, 
#    with the columns "text" and "label" 
#    3. Combine all of the text/label pairs for the SECONDARY dataset into a single CSV file, 
#    with the columns "text" and "label" 

In [13]:
#First, eliminate any labels for annotators whose average kappa score is less 
#than 0.2 (unreliable annotators) 

#nyt_stance
for annotator,score in nyt_annotator_dict.items():
    if score < 0.2:
        for nyt in nyt_stance:
            if annotator in list(nyt.columns[1:]):
                del nyt[annotator]
                
                        
#twitter_stance
for annotator,score in twitter_annotator_dict.items():
    if score < 0.2:
        for twit in twitter_stance:
            if annotator in list(twit.columns[1:]):
                del twit[annotator]

#Check that only annotators at score of 0.2 or above remain
#for twit in twitter_stance:
#    for annotator in list(twit.columns[1:]):
#        print(annotator)

In [14]:
def majority_vote(votes):
    d={}
    for e in votes:
        d[e]=d.get(e,0)+1
    
    answers = sorted([(k,v,) for k,v in d.items()],key=lambda x:-x[1])
    if len(answers)==1:
        return answers[0][0]
    if answers[0][1]> answers[1][1]:
        return answers[0][0]
    majority=answers[0][1]
    ties=[]
    for ans in answers:
        if ans[1] ==majority: #if votes are the same as currently labeled majority
            ties.append(ans[0]) #ans[0] is one of tied labelS
        else:
            break
    for vote in votes:
        if vote in ties:
            return vote #returns the first vote that matches in ties, which is also the vote of the highest scoring annotator
    

In [15]:
# Second, assign the final label to each text as the most frequent label among 
# the remainiing annotators 
with open("secondary_labels.csv",'w') as secondary_labels:
    csvwriter = csv.writer(secondary_labels)
    csvwriter.writerow(["text","label"])
    
    #nyt_stance
    for nyt in nyt_stance:
        #put annotators in order of highest score 
        #to be able to use mode later and select the one with the highest score
        #(b/c mode returns the first most frequent mode in the case of a tie, we need annotators in order of highest score)
        annotator_tup =[]
        labels = []
        for annotator in list(nyt.columns[1:]):
            annotator_tup.append([annotator,nyt_annotator_dict[annotator]])   

        #Reorder annotators in score order
        newOrder = ['text']+[ ann for ann,other in sorted(annotator_tup,key=lambda x:-x[1]) ]
        nyt_new=nyt[newOrder]

        #iterate through rows to find count and determine labels
        #call majority_vote function to determine final vote on label
        for row in nyt_new.itertuples():
            #write to file
            #    2. Combine all of the text/label pairs for the PRIMARY dataset into a single CSV file, 
            #    with the columns "text" and "label" 
            csvwriter.writerow([row[1],majority_vote(row[2:])])
     
                        
#twitter_stance

with open("primary_labels.csv",'w') as primary_labels:
    csvwriter = csv.writer(primary_labels)
    csvwriter.writerow(["text","label"])
    
    #twitter_stance
    for twit in twitter_stance:
        #put annotators in order of highest score 
        #to be able to use mode later and select the one with the highest score
        #(b/c mode returns the first most frequent mode in the case of a tie, we need annotators in order of highest score)
        annotator_tup =[]
        labels = []
        for annotator in list(twit.columns[1:]):
            annotator_tup.append([annotator,twitter_annotator_dict[annotator]])   

        #Reorder annotators in score order
        newOrder = ['text']+[ ann for ann,other in sorted(annotator_tup,key=lambda x:-x[1]) ]
        twitter_new=twit[newOrder]

        #iterate through rows to find count and determine labels
        #call majority_vote function to determine final vote on label
        for row in twitter_new.itertuples():
            #write to file
            #    3. Combine all of the text/label pairs for the SECONDARY dataset into a single CSV file, 
            #    with the columns "text" and "label
            csvwriter.writerow([row[1],majority_vote(row[2:])])