In [1]:
# Imports
import time
import os
import json
from itertools import combinations
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.stats import inter_rater as irr
from sklearn.metrics import cohen_kappa_score
import krippendorff as kd
import matplotlib.pyplot as plt
import seaborn as sns

# File locations
dir = os.getcwd()
output_dir = os.path.join(dir, 'output')
fig_dir = os.path.join(dir, 'figures')

In [2]:
# TO ASSEMBLE BATCHES
# for n in range(1,7):
#     if n not in [1,2,6]:
#         results = []
#         with open(os.path.join(output_dir, 'coarse', 'batches_1-9', f"annotator{n}.jsonl"), 'r', encoding='utf-8') as jsonl_file:
#             for line in jsonl_file:
#                 d = json.loads(line)
#                 d['annotator'] = f"annotator{n}"
#                 results.append(d)
        
#         output_file = f"annotator{n}.jsonl"
#         with open(os.path.join('output', 'coarse', output_file), 'w', encoding='utf-8') as f:
#             for doc in results:
#                 f.write(json.dumps(doc, ensure_ascii=False) + '\n')

In [3]:
results = []
for n in range(1,7):
    with open(os.path.join(output_dir, 'coarse', 'batches_1-9', f"annotator{n}.jsonl"), 'r', encoding='utf-8') as jsonl_file:
        for line in jsonl_file:
            d = json.loads(line)
            if d['rated'] == 'Yes':
                d['annotator'] = f"annotator{n}"
                results.append(d)

In [4]:
# ADD BATCH 9 for annotator3
with open(os.path.join(output_dir, 'coarse', 'afterapril9', f"annotator3.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    for line in jsonl_file:
        d = json.loads(line)
        if d['rated'] == 'Yes' and (d['batch_id'] == 'batch_7' or d['batch_id'] == 'batch_8' or d['batch_id'] == 'batch_9'):
            d['annotator'] = f"annotator3"
            results.append(d)

with open(os.path.join(output_dir, 'coarse', 'afterapril9', f"annotator6.jsonl"), 'r', encoding='utf-8') as jsonl_file:
    for line in jsonl_file:
        d = json.loads(line)
        if d['rated'] == 'Yes' and (d['batch_id'] == 'batch_8' or d['batch_id'] == 'batch_9'):
            d['annotator'] = f"annotator6"
            results.append(d)

In [5]:
results_df = pd.DataFrame(results)
results_df[:1]

Unnamed: 0,_id,question_id,question,answer_id,answer,answer_type,annotation_type,rated,batch_id,confidence,correctness,relevance,safety,time,annotator
0,67d43fe8ccebca25cea425dc,question_180,Whats Keratosis Pilaris,gpt4_43,Keratosis Pilaris is a common skin condition c...,gpt4,coarse,Yes,batch_1,Fairly confident,Neutral,Partially Disagree,Partially Disagree,25.293506,annotator1


### How many questions and QA pairs have they already annotated?

In [6]:
annotators = {}
for n in range(1,7):
    annotator = f'annotator{n}'
    annotated_qs = results_df[results_df['annotator'] == annotator]
    print(f'annotator{n}', 'annotated QA pairs:', len(annotated_qs), 'annotated Qs', len(annotated_qs.question_id.unique()))
    annotators[annotator] = annotated_qs
    

annotator1 annotated QA pairs: 81 annotated Qs 27
annotator2 annotated QA pairs: 81 annotated Qs 27
annotator3 annotated QA pairs: 81 annotated Qs 27
annotator4 annotated QA pairs: 81 annotated Qs 27
annotator5 annotated QA pairs: 81 annotated Qs 27
annotator6 annotated QA pairs: 81 annotated Qs 27


### What's the overlap between annotators?

In [7]:
grp1 = np.intersect1d(annotators['annotator1'].question_id, annotators['annotator2'].question_id)
len(np.intersect1d(annotators['annotator6'].question_id, grp1))

9

In [8]:
grp2 = np.intersect1d(annotators['annotator3'].question_id, annotators['annotator4'].question_id)
len(np.intersect1d(annotators['annotator5'].question_id, grp2))

6

### Question_ids in coarse

In [9]:
# fine question_ids for each annotator
groups = {}
for n in range(1,7):
    with open(os.path.join(output_dir, 'coarse', 'batches_1-9', f"annotator{n}.jsonl"), 'r', encoding='utf-8') as jsonl_file:
        for line in jsonl_file:
            d = json.loads(line)
            if f"annotator{n}" not in groups.keys():
                groups[f"annotator{n}"] = []
            groups[f"annotator{n}"].append(d['question_id'])

In [10]:
# find intersection for group 1
grp1 = np.intersect1d(set(groups['annotator1']), set(groups['annotator2']))[0]
coarse_questions_grp1 = list(np.intersect1d(set(groups['annotator6']), grp1)[0])
len(set(coarse_questions_grp1))

50

In [11]:
# find set for group 1
coarse_set_grp1 = set(groups['annotator1'] + groups['annotator2'] + groups['annotator6'])
len(coarse_set_grp1)

50

In [12]:
# check that they are the same
np.setdiff1d(list(coarse_set_grp1), coarse_questions_grp1)

array([], dtype='<U12')

In [13]:
# find intersection for group 2
grp2 = np.intersect1d(set(groups['annotator3']), set(groups['annotator4']))[0]
coarse_questions_grp2 = list(np.intersect1d(set(groups['annotator5']), grp2)[0])
len(set(coarse_questions_grp2))

50

In [14]:
# find set for group 2
coarse_set_grp2 = set(groups['annotator3'] + groups['annotator4'] + groups['annotator5'])
len(coarse_set_grp2)

50

In [15]:
# check that they are the same
np.setdiff1d(list(coarse_set_grp2), coarse_questions_grp2)

array([], dtype='<U12')

In [16]:
# check that groups do not share question_ids
np.intersect1d(coarse_questions_grp1, coarse_questions_grp2)

array([], dtype='<U12')

### What questions in Fine Part 2?

In [17]:
fine_part_2 = {}
for n in range(1,7):
    annotator = f'annotator{n}'
    fine_part_2[annotator] = []
    if n in [1,2,6]:
        for q_id in coarse_questions_grp1: # for all questions in coarse
            if q_id not in results_df[results_df['annotator'] == annotator].question_id.unique(): # if question not annotated in coarse part 1
                fine_part_2[annotator].append(q_id) # append to fin part 2
    else:
        for q_id in coarse_questions_grp2:
            if q_id not in results_df[results_df['annotator'] == annotator].question_id.unique():
                fine_part_2[annotator].append(q_id)

In [18]:
# with open(os.path.join('fine_part_2.json'), 'w') as json_file:
#     json.dump(fine_part_2, json_file, indent=4)

In [None]:
for n in range(1,7):
    annotator = f'annotator{n}'
    # add fine part 2 and coarse part 1 question_ids to double check
    double_check = fine_part_2[annotator].copy() + list(results_df[results_df['annotator'] == annotator].question_id.unique()) 
    print(len(double_check))
    if n in [1,2,6]:
        #check that the coarse fine part 2 and coarse part 1 question_ids are the same as the total set of questions
        print(np.setdiff1d(list(coarse_set_grp1), double_check)) 
    else:
        print(np.setdiff1d(list(coarse_set_grp2), double_check)) 

50
[]
50
[]
50
[]
50
[]
50
[]
50
[]


### What questions in Fine Part 1?

In [20]:
fine_part_1 = {}
for n in range(1,7):
    annotator = f'annotator{n}'
    fine_part_1[annotator] = []
    if n in [1,2,6]:
        for q_id in results_df[results_df['annotator'] == annotator].question_id.unique(): # if question annotated in coarse part 1
            fine_part_1[annotator].append(q_id) # append to fine part 1
    else:
        for q_id in results_df[results_df['annotator'] == annotator].question_id.unique():
            fine_part_1[annotator].append(q_id)

In [21]:
with open(os.path.join('fine_part_1.json'), 'w') as json_file:
    json.dump(fine_part_1, json_file, indent=4)

In [23]:
for n in range(1,7):
    annotator = f'annotator{n}'
    print(np.intersect1d(fine_part_1[annotator], fine_part_2[annotator])) # check that there are is no overlap between fine part 1 and fine part 2
    double_check = fine_part_1[annotator].copy() + fine_part_2[annotator].copy()
    print(len(double_check))
    print(len(set(double_check)))
    if n in [1,2,6]:
        #check that the coarse fine part 2 and coarse part 1 question_ids are the same as the total set of questions
        print(np.setdiff1d(list(coarse_set_grp1), double_check)) 
    else:
        print(np.setdiff1d(list(coarse_set_grp2), double_check)) 
    

[]
50
50
[]
[]
50
50
[]
[]
50
50
[]
[]
50
50
[]
[]
50
50
[]
[]
50
50
[]
