# RoB annotations Inter Annotator Agreement

In [1]:
# Generic imports
from os import listdir, path
from os.path import isfile, join
import json
import string
import itertools
from collections import defaultdict
from collections import Counter

# Connect to tagtog API
from lxml import html
from bs4 import BeautifulSoup
from requests import get
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
import urllib

# Specific imports
import numpy
from nltk.tokenize import WhitespaceTokenizer
from sklearn.metrics import f1_score, cohen_kappa_score


# pytorch
from transformers import AutoTokenizer, AutoModelForMaskedLM

In [2]:
def find_whitespace(st):
    for index, character in enumerate(st):
        if character in string.whitespace:
            yield index

In [3]:
def get_combinations(l):
    yield from itertools.product(l, l)

In [4]:
project_name = 'RoB_preliminary_annotation'
owner = 'anjDhr'
users = ['rahel-caliesch', 'roger-annotation', 'martin-annotation', 'katia-giacomino', 'simone-annotation']
all_user_combinations = list(itertools.combinations(users, 2)) 
all_user_combinations = get_combinations(users)
all_user_combinations = list( all_user_combinations )

In [5]:
'''
exact_v1 = Stringent inter-annotator agreement
overlapping_v1 = Relaxed inter-annotator agreement
'''
metrics = ['exact_v1', 'overlapping_v1', 'documentlevel_v1'] 

In [6]:
# Get all the entities
all_annotations_data = 'https://www.tagtog.com/-api/metrics/v0/search_stats?project=RoB_annotations_IAA&owner=anjDhr&search=*'
all_annotations_data_response = get(all_annotations_data, auth=('anjDhr', '9J@NiScMhUy9LbR'))
print('The response for query is: ', all_annotations_data_response)
all_annotations_data_response = json.loads(all_annotations_data_response.text)

The response for query is:  <Response [200]>


In [7]:
# Get the IAA metrics
# IAA_hardcoded = 'https://www.tagtog.net/-api/metrics/v0/iaa?project=RoB_preliminary_annotation&owner=anjDhr&member1=rahel-caliesch&member2=roger-annotation&anntaskId=e_109&metric=exact_v1'
IAA = 'https://www.tagtog.com/-api/metrics/v0/iaa?'

In [8]:
entity2label = {}
doc2label = {}

for eachEntry in all_annotations_data_response:
    
    if 'e_' in eachEntry:

        entry_name =  all_annotations_data_response[eachEntry]['name']       
        entity2label[eachEntry] = entry_name
        
    if 'm_' in eachEntry:

        entry_name =  all_annotations_data_response[eachEntry]['name']       
        doc2label[eachEntry] = entry_name

### Parse annotation project from local machine

In [9]:
# Get plain texts
plain_text = '/mnt/nas2/results/Results/systematicReview/RoB_annotation/IAA_annot/RoB_annotations_IAA_1/plain.html/pool'
member_annotations = '/mnt/nas2/results/Results/systematicReview/RoB_annotation/IAA_annot/RoB_annotations_IAA_1/ann.json/members/'

# list down all the annotators
projet_admin = ['anjDhr']
member_dir = listdir( member_annotations )

In [10]:
print('This project has annotations from:', ', '.join( member_dir ))

This project has annotations from: roger-annotation, katia-giacomino, rahel-caliesch


In [11]:
# Initialize tokenizer
tk = WhitespaceTokenizer()

In [12]:
# Parse plain text files for the trial annotation project

plain_text_dict = dict()
plain_text_doclen = dict()

plain_texts = [f for f in listdir(plain_text) if isfile(join(plain_text, f))]


for plaintext_file in plain_texts:
   
    plaintext_path = path.join(plain_text, plaintext_file)

    #with open(plaintext_path, "r") as f:
    #    page = f.read()
    page = open(plaintext_path, encoding="utf8")  

    soup = BeautifulSoup(page)

    #print(soup.head.title.text)
    trial_doc_number = str(soup.head.title.text).split('-')[-1].replace('.pdf', '')

    text_list = soup.find_all("pre")

    document_parts = {}
    doc_tok_len = 0
    for l in text_list:

        document_parts[ l.get('id') ] = l.text
        token_text = tk.tokenize(l.text)
        doc_tok_len = doc_tok_len + len(token_text)

    plain_text_dict[trial_doc_number] = document_parts
    plain_text_doclen[trial_doc_number] = doc_tok_len

In [121]:
# List member annotation files

doc_annot_count = 0
document_annotations = dict()

member_annot_files = {}

for m, member in enumerate(member_dir):
    
    print('Fetching annotations from ', member)
    
    if member not in projet_admin:
        
        member_annot_dir = member_annotations + str(member) + '/'
        
        member_annot_dir = path.join(member_annotations, member, 'pool')
                
        annotation_files = [path.join(member_annot_dir, f) for f in listdir(member_annot_dir) if isfile(join(member_annot_dir, f))]
        
        annotation_dict = {}
        
        # generate a dictionary of document_number : annotations parsed
        for annotation_file in annotation_files:
            
            #print( annotation_file )
            
            trial_doc_number = str(annotation_file).split('/')[-1].replace('.pdf.ann.json', '')
            trial_doc_number = trial_doc_number.split('-')[-1]

            with open(annotation_file, 'r') as af:
                data = json.load(af)
                                
                if trial_doc_number not in document_annotations:
                    document_annotations[trial_doc_number] =  [ data['metas'] ]
                else:
                    document_annotations[trial_doc_number].append( data['metas'] )
                    
                doc_annot_count = doc_annot_count + len(list(data['metas'].keys()))
                
                annotation_dict[trial_doc_number] = data['entities']
        
        
        member_annot_files[member] = annotation_dict

Fetching annotations from  roger-annotation
Fetching annotations from  katia-giacomino
Fetching annotations from  rahel-caliesch


In [14]:
member2lastname = { 'roger-annotation': 'Hilfiker', 'katia-giacomino': 'Giacomino',  'rahel-caliesch': 'Caliesch'}

In [15]:
def parseAnnotations(member_annotations):

    annotation_docs = dict()
    annotation_docs_text = dict()

    for k,v in member_annotations.items():
        
        annotation_dict = dict()
        annotation_text = []

        for counter, a in enumerate(v):

            document_part = a['part']
            offset_start = a['offsets'][0]['start']
            offset_end = offset_start + len(a['offsets'][0]['text'])
            entity_text = a['offsets'][0]['text']
            document = plain_text_dict[k][document_part]

            document_entity_match = document[offset_start:offset_end]    
            document_entity_match_label = a['classId']
            assert len(entity_text) == len(document_entity_match)

            match_label_list = len( entity_text ) * [document_entity_match_label]

            document_char_labels = [0] * len(document)
            document_char_labels[offset_start:offset_end] = match_label_list

            annotation_text.append( ( entity_text, [entity2label[x] for x in list(set(document_char_labels)) if x in entity2label ] ) )

            if document_part not in annotation_dict:
                annotation_dict[document_part] = [ document_char_labels ]
            else:
                annotation_dict[document_part].append( document_char_labels )

        annotation_docs[k] = annotation_dict
        annotation_docs_text[k] = annotation_text
        
    return annotation_docs, annotation_docs_text

In [16]:
def char2tokAnnot(char_annotations):
    
    token_annotations = {}
    
    for k_a, v_a in char_annotations.items():
        #print(k_a) # document number
        
        token_annotations[k_a] = {}

        for k_a_, v_a_ in v_a.items(): # convert char annot to tok annot
            #print(k_a_) # document part

            text =  plain_text_dict[k_a][k_a_]
            white_space_spans = list(WhitespaceTokenizer().span_tokenize(plain_text_dict[k_a][k_a_]))
            tokens = [ text[ ws[0] : ws[1] ] for ws in  white_space_spans ]
            
            for v_a_i in v_a_:
            
                labels = [ list( set(v_a_i[ ws[0] : ws[1] ]) ) for ws in  white_space_spans ]
                #labels_clean = [ list(filter(lambda num: num != 0, l))[0] if len( l ) > 1 else l[0] for l in labels ]
                labels_clean = [ list(filter(lambda num: num != 0, l)) if len( l ) > 1 else l for l in labels ]
                #print('Clean labels: ' , labels_clean)
            
                assert len(v_a_i) == len(plain_text_dict[k_a][k_a_])                
                assert len( tokens ) == len( labels )
            
                if k_a_ not in token_annotations[k_a]:
                    token_annotations[k_a][k_a_] = {}
                    token_annotations[k_a][k_a_]['tokens'] = tokens
                    token_annotations[k_a][k_a_]['labels'] = labels_clean
                                        
                elif k_a_ in token_annotations[k_a]:
                    old_labels = token_annotations[k_a][k_a_]['labels']
                    new_labels = [ list(set(old_labels[n] + l)) for n, l in enumerate(labels_clean)]
                    
                    #new_labels = [ old_labels[n].append( l ) for n, l in enumerate(labels_clean) ]
                    #print( new_labels )
                    
                    assert len( old_labels ) == len( new_labels )
                    
                    token_annotations[k_a][k_a_]['labels'] = new_labels
            #break
                
    return token_annotations

In [17]:
annotations = dict()
annotation_text = dict()

for k,v in member_annot_files.items():

    #if len( list(v.keys()) ) >= 8: # restricts it to those who completed annotations
    #    print(k)
    char_annotations, annot_text = parseAnnotations(v)
    tok_annotations = char2tokAnnot(char_annotations)
    annotations[k] = tok_annotations
    annotation_text[k] = annot_text

In [18]:
annotations.keys()

dict_keys(['roger-annotation', 'katia-giacomino', 'rahel-caliesch'])

In [103]:
for k,v in annotations.items():
    
    print(k)
    
    for k_, v_ in v.items():
        
        print(v_.keys())
        
        break

roger-annotation
dict_keys(['s2v1', 's3v1'])
katia-giacomino
dict_keys(['s2v1', 's3v1'])
rahel-caliesch
dict_keys(['s5v1', 's13v1'])


## Calculate Inter Annotator Agreement (Exact) <br>
Inter Annotator Agreement or IAA for Named Entity Recognition is usually calculated as F1-score rather than Cohen's kappa or Fleiss' kappa.

In [29]:
def flatenAnnotations(annotator1, annotator2, annotations_all):
      
    annot1 = annotations_all[annotator1]
    annot2 = annotations_all[annotator2]
    annot1_flattened = []
    annot2_flattened = []
    
    if len( list(annot1.keys()) ) > len( list(annot2.keys()) ):
        docs = list(annot2.keys())
    else:
        docs = list(annot1.keys())
    
    #print( annot1.keys() )
    
    for doc in docs:
        #print('Document number: ', doc)
        
        if member2lastname[annotator1] in doc and member2lastname[annotator2] in doc:
            
            doc_parts = list(annot1[doc].keys())

            for doc_part in doc_parts:
                if doc_part in annot1[doc] and doc_part in annot2[doc]:
                    annot1_flattened.extend( annot1[doc][doc_part]['labels'] )
                    annot2_flattened.extend( annot2[doc][doc_part]['labels'] )

                elif doc_part in annot1[doc] and doc_part not in annot2[doc]:
                    annot1_flattened.extend( annot1[doc][doc_part]['labels'] )

                    temp2 = [[0]] * len( annot1[doc][doc_part]['labels'] )
                    annot2_flattened.extend( temp2 )

                elif doc_part not in annot1[doc] and doc_part in annot2[doc]:
                    temp1 = [[0]] * len( annot2[doc][doc_part]['labels'] )
                    annot1_flattened.extend( temp1 )

                    annot2_flattened.extend( annot2[doc][doc_part]['labels'] )

    
    assert len( annot1_flattened ) == len( annot2_flattened )
    
    return annot1_flattened, annot2_flattened

In [56]:
# Calculate exact IAA

# agreement_type = {'exact', 'judgeless'}

def calculate_exact_IAA(annotator1, annotator2, annotations_all):
    
    # Calculate different levels of inter-annotator agreement between the two annotators
    
    print( 'Calculating IAA between ', annotator1, ' and ', annotator2 )
  
    annot1_flat, annot2_flat = flatenAnnotations(annotator1, annotator2, annotations_all)
    
    assert len(annot1_flat) == len(annot2_flat)
    
    a1 = []
    a2 = []
    
    for i, j in zip(annot1_flat, annot2_flat):
        
        out_of_span_condition = (len(i) == 1 and len(j) == 1 and i[0] == 0 and j[0] == 0)
        
        if out_of_span_condition:
            continue
        else:
            a1.append( i )
            a2.append( j )
            
     
    for k, v in entity2label.items():
        
        p1 = []
        p2 = []
        
        for i, j in zip(a1, a2):
            
            if k in i or k in j:

                if k in i:
                    p1.append( 1 )
                else:
                    p1.append( 0 )

                if k in j:
                    p2.append( 1 )
                else:
                    p2.append( 0 )
        
        k_f1 = f1_score(p1, p2, average=None)
        
        if len(k_f1) > 1:
            print( 'The agreement for entity ', entity2label[k], ' is ', k_f1[1] )
        else:
            print( 'NO AGREEMENT for entity ', entity2label[k], ' is ', k_f1 )

In [57]:
calculate_exact_IAA('roger-annotation', 'katia-giacomino', annotations)

Calculating IAA between  roger-annotation  and  katia-giacomino
The agreement for entity  1_3_No_Information  is  0.0
NO AGREEMENT for entity  2_1_No_Good  is  []
The agreement for entity  1_1_No_Information  is  0.0
The agreement for entity  1_1_No_Bad  is  0.0
The agreement for entity  1_3_No_Good  is  0.43270476464661006
The agreement for entity  1_2_No_Bad  is  0.0
NO AGREEMENT for entity  1_2_No_Information  is  []
The agreement for entity  1_3_Yes_Bad  is  0.0
The agreement for entity  1_2_Yes_Good  is  0.41681901279707495
The agreement for entity  1_1_Yes_Good  is  0.5675213675213674


In [58]:
calculate_exact_IAA('roger-annotation', 'rahel-caliesch', annotations)

Calculating IAA between  roger-annotation  and  rahel-caliesch
NO AGREEMENT for entity  1_3_No_Information  is  []
NO AGREEMENT for entity  2_1_No_Good  is  []
NO AGREEMENT for entity  1_1_No_Information  is  []
NO AGREEMENT for entity  1_1_No_Bad  is  []
NO AGREEMENT for entity  1_3_No_Good  is  [1.]
The agreement for entity  1_2_No_Bad  is  0.0
NO AGREEMENT for entity  1_2_No_Information  is  []
NO AGREEMENT for entity  1_3_Yes_Bad  is  []
The agreement for entity  1_2_Yes_Good  is  0.0
The agreement for entity  1_1_Yes_Good  is  0.34375000000000006


## Calculate Inter Annotator Agreement (Judgementless) <br>

In [45]:
# Define RoB signalling questions

RoB_signalling_list = ['1_1', '1_2', '1_3']

In [77]:
# Calculate exact IAA

# agreement_type = {'exact', 'judgeless'}

def calculate_judgeless_IAA(annotator1, annotator2, annotations_all, rob:str):
    
    # Calculate different levels of inter-annotator agreement between the two annotators
    
    print( 'Calculating IAA between ', annotator1, ' and ', annotator2 )
  
    annot1_flat, annot2_flat = flatenAnnotations(annotator1, annotator2, annotations_all)
    
    assert len(annot1_flat) == len(annot2_flat)
    
    a1 = []
    a2 = []
    
    for i, j in zip(annot1_flat, annot2_flat):
        
        out_of_span_condition = (len(i) == 1 and len(j) == 1 and i[0] == 0 and j[0] == 0)
        
        if out_of_span_condition:
            continue
        else:
            a1.append( i )
            a2.append( j )
            
     
    for k in [i for i in RoB_signalling_list if i.startswith(rob)]:
        
        print('Calculating IAA for risk domain: ', k )
        
        p1 = []
        p2 = []
        
        for i, j in zip(a1, a2):
                      
            i_mod = [ str(entity2label[ele])[0:3] if ele in entity2label else 0 for ele in i ]
            j_mod = [ str(entity2label[ele])[0:3] if ele in entity2label else 0 for ele in j ] 
            
            if k in i_mod or k in j_mod:

                if k in i_mod:
                    p1.append( 1 )
                else:
                    p1.append( 0 )

                if k in j_mod:
                    p2.append( 1 )
                else:
                    p2.append( 0 )
        
        k_f1 = f1_score(p1, p2, average=None)
        
        if len(k_f1) > 1:
            print( 'The agreement for signalling question ', k, ' is ', k_f1[1] )
        else:
            print( 'NO AGREEMENT for signalling question ', k, ' is ', k_f1 )

In [79]:
calculate_judgeless_IAA('roger-annotation', 'katia-giacomino', annotations, '1')

Calculating IAA between  roger-annotation  and  katia-giacomino
Calculating IAA for risk domain:  1_1
The agreement for signalling question  1_1  is  0.6698412698412698
Calculating IAA for risk domain:  1_2
The agreement for signalling question  1_2  is  0.6620498614958449
Calculating IAA for risk domain:  1_3
The agreement for signalling question  1_3  is  0.9364071505323827


In [78]:
calculate_judgeless_IAA('roger-annotation', 'rahel-caliesch', annotations, '1')

Calculating IAA between  roger-annotation  and  rahel-caliesch
Calculating IAA for risk domain:  1_1
The agreement for signalling question  1_1  is  0.34375000000000006
Calculating IAA for risk domain:  1_2
The agreement for signalling question  1_2  is  0.0
Calculating IAA for risk domain:  1_3
NO AGREEMENT for signalling question  1_3  is  [1.]


## Calculate Inter Annotator Agreement (document level) <br>

In [131]:
doc2label

{'m_13': 'RoB4',
 'm_10': 'RoB1',
 'm_14': 'RoB5',
 'm_9': 'overall_RoB',
 'm_11': 'RoB2',
 'm_12': 'RoB3'}

In [153]:
docannot2label = {'Some-concerns': 0, 'Low-risk': 1, 'High-risk': 2}

In [171]:
def calculate_doc_IAA(annotator1, annotator2, annotations_all, b_d:str):
    
    # Calculate different levels of inter-annotator agreement between the two annotators
    
    print( 'Calculating document IAA between ', annotator1, ' and ', annotator2 )
    
    doc_annot_1 = []
    doc_annot_2 = []
                

    for k, v in annotations_all.items():

        #print( k )

        if member2lastname[annotator1] in k and member2lastname[annotator2] in k and len(v) == 2 and b_d in v[0] and b_d in v[1]:
            
            a1 = v[0][b_d]['value']
            a2 = v[1][b_d]['value']
                        
            doc_annot_1.append(docannot2label[a1])
            doc_annot_2.append(docannot2label[a2])
            
            
    kappa_b_d = cohen_kappa_score(doc_annot_1, doc_annot_2, labels=None, weights=None)
    
    print( doc_annot_1 )
    print( doc_annot_2 )
    
    print( kappa_b_d )

In [172]:
calculate_doc_IAA('roger-annotation', 'katia-giacomino', document_annotations, 'm_10')

Calculating document IAA between  roger-annotation  and  katia-giacomino
[0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0]
[2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1]
0.07284768211920534


In [169]:
calculate_doc_IAA('roger-annotation', 'rahel-caliesch', document_annotations, 'm_10')

Calculating document IAA between  roger-annotation  and  rahel-caliesch
[{'m_10': {'value': 'Low-risk', 'confidence': {'state': 'pre-added', 'who': ['user:roger-annotation'], 'prob': 1}}}, {'m_10': {'value': 'Low-risk', 'confidence': {'state': 'pre-added', 'who': ['user:rahel-caliesch'], 'prob': 1}}}]
[1]
[1]
nan


  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
