### Imports

In [280]:
import pandas as pd
import os
import json
from tqdm import tqdm 
import numpy as np

In [281]:
# connect with google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [282]:
# redirect the working directory of this script to the data folder
%cd /content/drive/MyDrive/Work/Frontline/data/

/content/drive/.shortcut-targets-by-id/1WfnZsqpG1r110J63sMbfS5TpsDOkveiV/data


### Import Annotations

Merge all annotation files

In [283]:
dfs=[]
for doc in os.listdir("annotated"):
  if doc.startswith("annotations"):
    #read json data
    json_data=json.load(open("annotated/"+doc, encoding="utf-8"))
    #convert to dataframe
    data=pd.DataFrame(json_data["documents"])
    #for now: filter out paragraphs that have not been annotated 
    data=data[data["annotations"].apply(len)>0]   
    data["file"]=doc 
    dfs.append(data)


In [284]:
# merge jsons
data=pd.concat(dfs)
data=data.reset_index(drop=True)

### Extract Labels by Annotator
Extract each label by annotator to compare the intraclass correlation coefficient of our own annotations

In [285]:
def extract_annotations(test):
  ann_dict=dict()
  for ann in test:
    ### FOR NOW: later change to id
    name=ann["annotator"]["name"][0]
    # Remane old label names
    label=update_label(ann["concept"]["preferred_label"]["name"])
    if name in ann_dict.keys():
      ann_dict[name].append(label)
    else:
      ann_dict[name]=[label]
  #remove duplicates
  for key in ann_dict.keys():
    ann_dict[key]=set(ann_dict[key])
  return ann_dict

In [286]:
def update_label(label):
  if label=="NA":
    return "Domestic Violence"
  elif label=="Victim blaming":
    return 'Statement of responsibility'
  else:
    return label

In [287]:
data["annotations"]=data["annotations"].apply(extract_annotations)

In [288]:
data.head()

Unnamed: 0,id,text,annotations,attributes_flat,file
0,4572dea4-6a08-4f1e-b312-5821112bb5f5,Ein Mann (25) ist jetzt vom Schöffengericht am...,{'J': {'Domestic Violence'}},{'artikel_id': 'F3A4578D33A8603DF0573D3DE3CB26...,annotations_05_18.json
1,0bcada32-8dc5-41cf-b83b-67d2e742bada,Als Zeugin trat die Ex-Lebensgefährtin des Syr...,{'J': {'Statement of responsibility'}},{'artikel_id': 'F3A4578D33A8603DF0573D3DE3CB26...,annotations_05_18.json
2,a30791b9-522e-45c1-8b33-79d4165282af,"Zunächst leugnete der Angeklagte, dass es über...",{'J': {'Statement of responsibility'}},{'artikel_id': 'F3A4578D33A8603DF0573D3DE3CB26...,annotations_05_18.json
3,043e3909-bcdd-4c6b-a54f-f947d46ad18e,Das Schöffengericht hatte es in diesem Fall of...,"{'J': {'Statement of responsibility', 'Sensati...",{'artikel_id': 'F3A4578D33A8603DF0573D3DE3CB26...,annotations_05_18.json
4,08cef91c-6d73-472c-8349-07a5b72009d1,"""Gewalt in der Familie ist weder Privatsache n...",{'J': {'Domestic Violence'}},"{'artikel_id': 'IRA-82182598', 'name': 'SÜDWES...",annotations_05_18.json


### Similarity of (own) annotations
Compare all co-assigned paragraphs using three different measures:
1. jaccard similarity: is the fraction of labels that were given by all annotators divided by all given annotations, ie. the intersection of given annotations divided by the union of the given annotations
2. dice: is the averaged, pairwise similarity of annotations, considering the number of labels given 

In [289]:
def jaccard_similarity(sets):
    # Calculate the intersection
    intersection = set.intersection(*sets)
    
    # Calculate the union
    union = set.union(*sets)
    
    # Compute the Jaccard similarity
    similarity = len(intersection) / len(union)
    
    return similarity

In [290]:
def calculate_similarity(annotations: dict, sim="jaccard"):
  #if no co-annotation
  if len(annotations)==1:
    return np.nan
  else:
    if sim=="jaccard":
      return jaccard_similarity(list(annotations.values()))
    elif sim=="dice":
      return dice_similarity_multiple(list(annotations.values()))

In [291]:
def dice_similarity_multiple(sets):
    num_sets = len(sets)
    similarity_sum = 0

    # Pairwise comparisons
    for i in range(num_sets - 1):
        for j in range(i + 1, num_sets):
            set1 = sets[i]
            set2 = sets[j]
            
            # Calculate the intersection
            intersection = set1.intersection(set2)
            
            # Calculate the sum of set sizes
            set_sum = len(set1) + len(set2)
            
            # Compute the Dice similarity coefficient
            similarity = 2 * len(intersection) / set_sum
            
            similarity_sum += similarity

    # Calculate the average similarity
    average_similarity = similarity_sum / (num_sets * (num_sets - 1) / 2)
    
    return average_similarity

In [292]:
data["jaccard"]=data["annotations"].apply(calculate_similarity)
data["dice"]=data["annotations"].apply(calculate_similarity,args=["dice"])

In [None]:
data[~data["jaccard"].isnull()]

In [294]:
print("Average jaccard similarity: ", round(data["jaccard"].mean(),2))
print("Average dice similarity: ", round(data["dice"].mean(),2))

Average jaccard similarity:  0.35
Average dice similarity:  0.51


In [295]:
data.groupby("file").mean()[["jaccard","dice"]]

Unnamed: 0_level_0,jaccard,dice
file,Unnamed: 1_level_1,Unnamed: 2_level_1
annotations_05_18.json,0.7,0.777778
annotations_06_09_part1.json,,
annotations_06_09_part2.json,,
annotations_06_09_part3.json,,
annotations_50sample_06_09.json,0.276596,0.449173


###ICC for whole dataframe
Note:
- ICC is not suited for categorical data -->  work-around: turn categories into dummies variables
- ICC expects the same number of raters and measures for each object, ie. only annotations with 3 annotators are considered

In [296]:
def calculate_icc(labels):
    """
    Berechnet den ICC für mehrere Messungen pro Objekt.
    
    :param labels: Eine numpy.ndarray oder eine ähnliche Datenstruktur, die die Bewertungen enthält.
                   Die Dimensionen sollten die Form (Objekte, Messungen, Beobachter) haben.
    :return: Der berechnete ICC-Wert.
    """
    
    # objects (n), labels (m) und annotator (k)
    n, m, k = labels.shape
    
    # calculate total variance
    mean_ratings = np.mean(labels, axis=2)
    
    # Berechnung der Gesamtvarianzanteile (Variance Total)
    variance_total = np.var(labels.flatten(), ddof=1)
    
    # calculate variance between classes
    variance_between = np.mean(np.var(mean_ratings, axis=1, ddof=1))
    
    # calculate variance within classes
    variance_within = np.mean(np.var(labels, axis=2, ddof=1))
    
    # calculating ICC
    icc = (variance_between - variance_within) / (variance_total + variance_within)
    
    return icc

In [297]:
# filtering paragraphs that were labelled by all annotators
data_sample=data[data.file=="annotations_50sample_06_09.json"]
mask=[len(i)==3 for i in data_sample.annotations]

In [298]:
# creating a np.array of shape (n,m,k) where objects (n), labels (m) und annotator (k)

dummies=pd.DataFrame(columns=["Sensationalist",	"Statement of responsibility",	"Graphic",	"Domestic Violence"])
ann_dummies_list=[]
for i in data_sample[mask].annotations:
  temp_df=pd.DataFrame.from_dict(i,orient='index')
  temp_df=pd.get_dummies(temp_df, prefix='', prefix_sep='').max(level=0, axis=1)
  temp_df=pd.concat([temp_df,dummies])
  temp_df=temp_df.fillna(0)
  ann_dummies_list.append(np.array(temp_df))


In [299]:
ann_dummies_list=np.array(ann_dummies_list)

In [300]:
calculate_icc(ann_dummies_list)

-0.5672298029734648

### Export Paragraphs that were differently annotated
-> based on dice similarity

In [301]:
sample=data[data.file=="annotations_50sample_06_09.json"]

In [302]:
sample[sample.dice<0.8][["id","text","annotations","dice","jaccard"]].to_csv("annotated/differently_annotated/amazon_sample_differently_annotated.csv")

### Evaluating Amazons Annotations

Extracting Ground Truth

In [303]:
def ground_truth_filter(entry, min_coannotation=1, min_similarity=0.5, similarity="jaccard"):
  """
      Extracts ground truth value of the annotated sample based on two filters:
      - a minimum number of people that annotated a text
      - a minimum of similarity between all annotations of a text

      Args:
      - annotations (dict): a dictionary containing all annotations of a text with the annotator initial as key
      - min_coannotation (int): minimum number of co-annotations of a text, by default 1, so all annotations are considered
      - min_similarity (int): if more than one annotator, the value minimum value of similarity so that a value is considered ground truth

      Returns:
      - either:
        - all_values (set): containing a set of values that are considered ground truth
        - NaN: if the annotation does not fulfill the conditions set for ground truth
  """
  if len(entry["annotations"])<min_coannotation or entry[similarity]<min_similarity:
    return np.nan
  else:
    all_values = [value for s in entry["annotations"].values() for value in s]
    return set(all_values)

In [304]:
ground_truth=data
ground_truth["annotations"]=ground_truth.apply(ground_truth_filter, 1, min_coannotation=2,min_similarity=0.6, similarity="dice")
ground_truth=ground_truth[~ground_truth["annotations"].isnull()][["id","annotations"]]


In [305]:
# this variable contains all values that are considered ground truth based on the previous filter
ground_truth=ground_truth.rename(columns={"annotations":"ground truth"})
ground_truth.head()

Unnamed: 0,id,ground truth
20,4dfd6faf-e631-4f0e-83cc-cee4b69a9632,"{Statement of responsibility, Sensationalist}"
23,0cb62255-3781-4466-b9ac-e29c29714d9f,"{Sensationalist, Statement of responsibility}"
44,0964c70c-687f-4d9b-9c9d-555b3853a456,{Domestic Violence}
50,0b701b26-4e70-4514-8c9b-355bf042acbc,{Domestic Violence}
82,1c2f201d-b408-4c6f-a455-f45a947b41f6,{Sensationalist}


Importing Annotations

In [306]:
# import amazon data
# pd.read_csv()
#for testing

amazon_data_all=pd.concat(dfs)
amazon_data_all["annotations"]=amazon_data_all["annotations"].apply(extract_annotations)
amazon_data=amazon_data_all[["id","annotations"]]

In [307]:
all_annotators=set([y for x in amazon_data["annotations"]for y in list(x.keys())])


In [308]:
for annotator in all_annotators:
  by_annotator=amazon_data.explode("annotations")[amazon_data.explode("annotations").annotations==annotator]
  by_annotator=by_annotator[["id"]].merge(amazon_data,left_on="id", right_on="id")
  by_annotator["annotations"]=[x[annotator]for x in by_annotator["annotations"]]
  common_ann=by_annotator.merge(ground_truth, left_on="id", right_on="id")
  common_ann["similarity"]=common_ann[["annotations","ground truth"]].apply(jaccard_similarity,axis=1)
  print(annotator,common_ann["similarity"].mean())

B 1.0
J 0.975
K 0.9705882352941176


### Amazons Annotations

Check annotations that do not have a ground truth value

In [309]:
mask=[ x not in list(ground_truth.id) for x in amazon_data_all.id]
amazon_no_gtruth=amazon_data_all[mask]
amazon_no_gtruth[["id","text","annotations"]].sample(5)

Unnamed: 0,id,text,annotations
237,13b74bf9-3155-4c40-b3e9-9a52ca033ae0,Bornheim. Zu einem Fall häuslicher Gewalt ist ...,{'K': {'Domestic Violence'}}
34,accfde2f-22ac-4d67-a19c-629047dcf8b6,Ein 34-Jähriger soll im hessischen Limburg sei...,"{'K': {'Graphic'}, 'J': {'Domestic Violence'},..."
1382,37da3a63-abc8-413e-95cd-a7607e08364c,".] so treffend: ,Vor ziemlich genau einem Jahr...",{'K': {'Domestic Violence'}}
405,23b73213-12ea-4f20-a063-f150e9856ebb,Anlässlich des Internationalen Frauentags biet...,{'K': {'Domestic Violence'}}
1951,becced3c-9109-4923-88ef-6a35b8177a0c,Mit der Lösung namens Werner Martinato sollen ...,{'K': {'Domestic Violence'}}
