In [25]:
import pandas as pd
import json
from sklearn.metrics import cohen_kappa_score
import ast

In [11]:
annotator_columns_before_discussion = ["alex", "louisa"]
annotator_columns_after_discussion = ["alex_after", "louisa_after"]

In [15]:
# read annotated data
anno_1_annotated = pd.read_csv('../../data/annotation_results/anno_01_annotated.csv')
anno_2_annotated = pd.read_csv('../../data/annotation_results/anno_02_annotated.csv')
anno_3_annotated = pd.read_csv('../../data/annotation_results/anno_03_annotated.csv')

# read task data
with open('../../data/tasks/task_v1.json') as f:
    anno_1_task = json.load(f)

with open('../../data/tasks/task_v2.json') as f:
    anno_2_task = json.load(f)

anno_3_task = anno_2_task

In [64]:
"""
Get rumor text for a given index
"""
def rumor_index_to_rumor(index, task):
    return task['rumors'][index]

"""
Display general statistics about the annotations of a dataframe
"""
def display_annotation_info(df, annotator_columns):

  # get samples with any annotation, samples where annotators agree and samples where annotators disagree
  any_annotation = df[df[annotator_columns[0]].apply(ast.literal_eval).apply(lambda x: len(x) > 0) | df[annotator_columns[1]].apply(ast.literal_eval).apply(lambda x: len(x) > 0)]
  differences = df[df[annotator_columns[0]] != df[annotator_columns[1]]]

  # get set of all used labels
  anno_vals = pd.concat([df[col] for col in annotator_columns]).apply(ast.literal_eval)
  anno_vals = [rumor for sublist in anno_vals.values for rumor in sublist]
  used_labels = list(set(anno_vals))
  kappa_scores = {}

  # calculate kappa for each label
  for label in used_labels:
    annotator_1 = df[annotator_columns[0]].apply(ast.literal_eval).apply(lambda x: 1 if label in x else 0)
    annotator_2 = df[annotator_columns[1]].apply(ast.literal_eval).apply(lambda x: 1 if label in x else 0)

    kappa_scores[label] = cohen_kappa_score(annotator_1, annotator_2)


  mean_kappa = sum(kappa_scores.values()) / len(kappa_scores)
  kappa_std = pd.Series(kappa_scores).std()

  print(f"Samples: {len(df)}")
  print(f"Samples with any annotation: {len(any_annotation)}")
  print(f"Samples with different annotations: {len(differences)}")

  print(f"Mean Cohen Kappa: {mean_kappa:.2f} (standard deviation: {kappa_std:.2f})")

"""
Create a dataframe with information about the per-rumor agreement and annotation distribution in a dataframe
"""
def get_annotation_info_df(df, annotator_columns, task):
    # Get samples with agreement and disagreement
    agreement = df[df[annotator_columns[0]] == df[annotator_columns[1]]]
    differences = df[df[annotator_columns[0]] != df[annotator_columns[1]]]

    # Calculate how often annotators agree on each rumor
    agreed_vals = agreement[annotator_columns[0]].apply(ast.literal_eval)
    agreed_vals = [rumor for sublist in agreed_vals.values for rumor in sublist]
    agreed_label_counts = pd.Series(agreed_vals).value_counts()

    # Calculate how often annotators disagree on each rumor
    disagreed_vals = []
    for index, row in differences.iterrows():
        sym_diff = set(ast.literal_eval(row[annotator_columns[0]])).symmetric_difference(
            set(ast.literal_eval(row[annotator_columns[1]]))
        )
        disagreed_vals.extend(list(sym_diff))
    disagreed_label_counts = pd.Series(disagreed_vals).value_counts()

    # Get set of all used labels
    anno_vals = pd.concat([df[col] for col in annotator_columns]).apply(ast.literal_eval)
    anno_vals = [rumor for sublist in anno_vals.values for rumor in sublist]
    used_labels = list(set(anno_vals))

    # Calculate Cohen's Kappa for each label
    kappa_scores = {}
    for label in used_labels:
        annotator_1 = df[annotator_columns[0]].apply(ast.literal_eval).apply(lambda x: 1 if label in x else 0)
        annotator_2 = df[annotator_columns[1]].apply(ast.literal_eval).apply(lambda x: 1 if label in x else 0)
        kappa_scores[label] = cohen_kappa_score(annotator_1, annotator_2)

    # Create DataFrame with results
    data = {
        'rumor': [rumor_index_to_rumor(label, task) for label in used_labels],
        'agreed': [agreed_label_counts.get(label, 0) for label in used_labels],
        'disputed': [disagreed_label_counts.get(label, 0) for label in used_labels],
        'kappa': [kappa_scores[label] for label in used_labels]
    }
    
    result_df = pd.DataFrame(data, index=used_labels)
    result_df.index.name = 'label'
    
    return result_df


In [66]:
print("ANNO 1 BEFORE:")
display_annotation_info(anno_1_annotated, annotator_columns_before_discussion)
display(get_annotation_info_df(anno_1_annotated, annotator_columns_before_discussion, anno_1_task))
print()

print("ANNO 1 AFTER:")
display_annotation_info(anno_1_annotated, annotator_columns_after_discussion)
display(get_annotation_info_df(anno_1_annotated, annotator_columns_after_discussion, anno_1_task))
print()

print("ANNO 2 BEFORE:")
display_annotation_info(anno_2_annotated, annotator_columns_before_discussion)
display(get_annotation_info_df(anno_2_annotated, annotator_columns_before_discussion, anno_2_task))
print()

print("ANNO 2 AFTER:")
display_annotation_info(anno_2_annotated, annotator_columns_after_discussion)
display(get_annotation_info_df(anno_2_annotated, annotator_columns_after_discussion, anno_2_task))
print()

print("ANNO 3 BEFORE:")
display_annotation_info(anno_3_annotated, annotator_columns_before_discussion)
display(get_annotation_info_df(anno_3_annotated, annotator_columns_before_discussion, anno_3_task))
print()

print("ANNO 3 AFTER:")
display_annotation_info(anno_3_annotated, annotator_columns_after_discussion)
display(get_annotation_info_df(anno_3_annotated, annotator_columns_after_discussion, anno_3_task))

ANNO 1 BEFORE:
Samples: 600
Samples with any annotation: 93
Samples with different annotations: 56
Mean Cohen Kappa: 0.47 (standard deviation: 0.37)


Unnamed: 0_level_0,rumor,agreed,disputed,kappa
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Gezielte Propaganda gegen die AfD ist schuld a...,15,14,0.696268
1,Bei den Wahlen in Brandenburg gab es Wahlbetrug.,6,11,0.512411
2,Transsexuallität ist ein durch psychologische ...,1,0,1.0
3,Trans-Propaganda wird gezielt an Schulen verbr...,1,1,0.665924
4,Die Konfrontation mit dem Thema Transsexualitä...,0,1,0.0
5,Die finanzielle Unterstützung von Migrant:inne...,0,8,-0.002925
6,Messerangriffe durch Migrant:innen stellen ein...,6,10,0.538106
7,"Die Türkei ist bereit, bis zu 500 türkische St...",3,1,0.856322
8,Die Mocro-Mafia war für die Explosion in Köln ...,0,5,0.0
9,Ärzt:innen werden gezielt an der Aufklärung üb...,0,5,-0.002674



ANNO 1 AFTER:
Samples: 600
Samples with any annotation: 82
Samples with different annotations: 18
Mean Cohen Kappa: 0.93 (standard deviation: 0.13)


Unnamed: 0_level_0,rumor,agreed,disputed,kappa
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Gezielte Propaganda gegen die AfD ist schuld a...,26,3,0.946524
1,Bei den Wahlen in Brandenburg gab es Wahlbetrug.,8,6,0.744898
2,Transsexuallität ist ein durch psychologische ...,1,0,1.0
3,Trans-Propaganda wird gezielt an Schulen verbr...,1,0,1.0
5,Die finanzielle Unterstützung von Migrant:inne...,5,0,1.0
6,Messerangriffe durch Migrant:innen stellen ein...,7,8,0.630485
7,"Die Türkei ist bereit, bis zu 500 türkische St...",3,0,1.0
8,Die Mocro-Mafia war für die Explosion in Köln ...,3,0,1.0
9,Ärzt:innen werden gezielt an der Aufklärung üb...,3,1,0.856322
11,Die Corona-Impfung löst Herzschäden bei Kinder...,6,0,1.0



ANNO 2 BEFORE:
Samples: 664
Samples with any annotation: 96
Samples with different annotations: 35
Mean Cohen Kappa: 0.77 (standard deviation: 0.22)


Unnamed: 0_level_0,rumor,agreed,disputed,kappa
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Der öffentlich rechtliche Rundfunk betreibt ge...,15,5,0.853279
1,Bei den Wahlen in Brandenburg gab es Wahlbetrug.,6,4,0.746999
2,Messerangriffe durch Migrant:innen stellen ein...,15,0,1.0
3,"Die Türkei ist bereit, bis zu 500 türkische St...",5,0,1.0
4,Die Mocro-Mafia war für die Explosion in Köln ...,2,4,0.49697
5,"Das System versucht, Aufklärung über die Gefah...",5,16,0.37594
6,Die Corona-Impfung löst Long COVID aus.,4,3,0.797808
7,Die Corona-Impfung löst Herzschäden aus.,10,4,0.854082



ANNO 2 AFTER:
Samples: 664
Samples with any annotation: 82
Samples with different annotations: 6
Mean Cohen Kappa: 0.95 (standard deviation: 0.05)


Unnamed: 0_level_0,rumor,agreed,disputed,kappa
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Der öffentlich rechtliche Rundfunk betreibt ge...,16,2,0.939636
1,Bei den Wahlen in Brandenburg gab es Wahlbetrug.,7,0,1.0
2,Messerangriffe durch Migrant:innen stellen ein...,15,0,1.0
3,"Die Türkei ist bereit, bis zu 500 türkische St...",5,0,1.0
4,Die Mocro-Mafia war für die Explosion in Köln ...,3,1,0.856401
5,"Das System versucht, Aufklärung über die Gefah...",14,2,0.931793
6,Die Corona-Impfung löst Long COVID aus.,6,1,0.922321
7,Die Corona-Impfung löst Herzschäden aus.,14,1,0.964748



ANNO 3 BEFORE:
Samples: 600
Samples with any annotation: 131
Samples with different annotations: 38
Mean Cohen Kappa: 0.86 (standard deviation: 0.13)


Unnamed: 0_level_0,rumor,agreed,disputed,kappa
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Der öffentlich rechtliche Rundfunk betreibt ge...,24,6,0.883653
1,Bei den Wahlen in Brandenburg gab es Wahlbetrug.,11,3,0.877451
2,Messerangriffe durch Migrant:innen stellen ein...,29,8,0.875312
3,"Die Türkei ist bereit, bis zu 500 türkische St...",3,1,0.856322
4,Die Mocro-Mafia war für die Explosion in Köln ...,1,0,1.0
5,"Das System versucht, Aufklärung über die Gefah...",12,18,0.575372
6,Die Corona-Impfung löst Long COVID aus.,4,2,0.855491
7,Die Corona-Impfung löst Herzschäden aus.,14,1,0.968841



ANNO 3 AFTER:
Samples: 600
Samples with any annotation: 117
Samples with different annotations: 6
Mean Cohen Kappa: 0.98 (standard deviation: 0.04)


Unnamed: 0_level_0,rumor,agreed,disputed,kappa
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Der öffentlich rechtliche Rundfunk betreibt ge...,26,0,1.0
1,Bei den Wahlen in Brandenburg gab es Wahlbetrug.,12,1,0.95915
2,Messerangriffe durch Migrant:innen stellen ein...,34,0,1.0
3,"Die Türkei ist bereit, bis zu 500 türkische St...",4,0,1.0
4,Die Mocro-Mafia war für die Explosion in Köln ...,1,0,1.0
5,"Das System versucht, Aufklärung über die Gefah...",19,5,0.879402
6,Die Corona-Impfung löst Long COVID aus.,6,0,1.0
7,Die Corona-Impfung löst Herzschäden aus.,16,0,1.0
