In [6]:
import pandas as pd
import json

with open("kinyarwanda-afrihate-pilot-g2_annotations.json", "r") as file:
    data = json.load(file)
data_list = []

for example in data["examples"]:
    row_data = {"text": example["content"], "text_id": example["example_id"]}
    annotator_data = {}
    for classification in example["classifications"]:
        for classified_by in classification["classified_by"]:
            annotator_id = classified_by["annotator_id"]
            if annotator_id not in annotator_data:
                annotator_data[annotator_id] = classification["classname"]
    for i, (annotator_id, label) in enumerate(annotator_data.items()):
        annotator_num = f"annotator_{i+1}"
        label_num = f"label_{i+1}"
        row_data[annotator_num] = annotator_id
        row_data[label_num] = label
    data_list.append(row_data)

# Create the updated DataFrame
updated_df = pd.DataFrame(data_list)

# Ensure that there are four annotators for each text item
for i in range(1, 5):
    annotator_col = f"annotator_{i}"
    label_col = f"label_{i}"
    if annotator_col not in updated_df.columns:
        updated_df[annotator_col] = None
    if label_col not in updated_df.columns:
        updated_df[label_col] = None

updated_df = updated_df[['text', 'text_id', 'annotator_1', 'label_1', 'annotator_2', 'label_2', 'annotator_3', 'label_3', 'annotator_4', 'label_4']]
#updated_df.head()

In [7]:
from collections import Counter

# 'final
def get_final_label(row):
    labels = [row[f'label_{i}'] for i in range(1, 5) if row[f'label_{i}'] is not None]
    label_counts = Counter(labels)
    final_label = [label for label, count in label_counts.items() if count >= 3]
    return final_label[0] if final_label else None



def get_updated_re_adjudicate_label(row):
    labels = [row[f'label_{i}'] for i in range(1, 5) if row[f'label_{i}'] is not None]
    label_counts = Counter(labels)
    max_label_count = max(label_counts.values())
    
    # Check the conditions for re-adjudication
    if max_label_count >= 3:
        return None
    unique_labels = set(labels)
    if len(unique_labels) >= 2:
        return '_'.join(sorted(map(str, unique_labels)))
    return None


updated_df['final'] = updated_df.apply(get_final_label, axis=1)
updated_df['re-adjudicate'] = updated_df.apply(get_re_adjudicate_label, axis=1)
#updated_df.head()

NameError: name 'get_re_adjudicate_label' is not defined

In [20]:
#Filter the DataFrame for final labels where the 'final' column is not None
final_labels_df = updated_df[updated_df['final'].notnull()]
final_labels_df.head(5)

Unnamed: 0,text,text_id,annotator_1,label_1,annotator_2,label_2,annotator_3,label_3,annotator_4,label_4,re-adjudicate,final
1,"@RwandaLocalGov Mwiriwe, muri iki gihe tugifit...",102776ba-5fb5-43e1-9660-216bc4cbf5b6,93,Normal,96,Normal,92,Normal,99.0,Normal,,Normal
4,@ClaudeKarangwa Ngo imbwa ndagaswi,332520d4-6252-417e-bccc-835742c2f073,96,Offensive,92,Offensive,93,Offensive,99.0,Offensive,,Offensive
5,Umunsi umwe igituba kizavuza induru ????????,291d29ce-bc5d-4547-b3ce-715a1878d5cc,96,Offensive,93,Offensive,99,Offensive,92.0,Offensive,,Offensive
6,@rayon_sports yasinyanye amasezerano y�ubufat...,8d04d3eb-aab0-42b8-8d2d-fa17c33000e4,99,Normal,93,Normal,96,Normal,92.0,Normal,,Normal
7,@lavishleey @teemcey1 Igituba cyawe ubanza kir...,b325717e-8fce-429a-a314-e04d3187280a,96,Offensive,93,Offensive,99,Offensive,92.0,Offensive,,Offensive


In [21]:
# Filter the DataFrame for re-adjudication where the 're-adjudicate' column is not None
re_adjudicate_df = updated_df[updated_df['re-adjudicate'].notnull()]
re_adjudicate_df.head(5)

Unnamed: 0,text,text_id,annotator_1,label_1,annotator_2,label_2,annotator_3,label_3,annotator_4,label_4,re-adjudicate,final
0,@akayezuja @ziggyfikiri but this guy ko mbona ...,b1bacb87-3bdd-47df-9608-4b1978bbc535,92,Offensive,93,Hate,96,Hate,99.0,Normal,Hate_Normal_Offensive,
2,@AY_kennedy Gaswere man harukuntu ubeshyera im...,303eb44e-16c1-4ad9-9457-f3204cfe6f81,93,Offensive,92,Offensive,96,Hate,99.0,Normal,Hate_Normal_Offensive,
3,@mukizaclement @patos00 cg iyo nzoka bayisanze...,5809f576-8efb-4268-b3a8-156c55a401d9,93,Offensive,92,Offensive,99,Normal,96.0,Normal,Normal_Offensive,
9,urukundo ni impumyi kweli ??,09563467-6731-4085-b2f5-5db0db7deb5b,96,Inderminate,92,Offensive,99,Normal,93.0,Normal,Inderminate_Normal_Offensive,
10,Ndi ikigarasha ariko ntibinkuraho ko ndi umuny...,14fed357-fc22-462b-8a75-73cfb183eee5,99,Offensive,93,Hate,96,Hate,92.0,Hate,Hate_Offensive,Hate


In [25]:
re_adjudicate_df['re-adjudicate'].value_counts()

re-adjudicate
Hate_Offensive                  26
Hate_Normal_Offensive            9
Hate_Normal                      7
Normal_Offensive                 4
Inderminate_Normal_Offensive     1
Hate_Normal_Offensive_nan        1
Hate_Inderminate_Offensive       1
Hate_Normal_nan                  1
Inderminate_Offensive            1
Inderminate_Normal_nan           1
Hate_Offensive_nan               1
Name: count, dtype: int64

In [None]:
# how many re-adjudicate

import pandas as pd
import numpy as np

# read in the data

df = pd.read_csv('data/afrihate_annotation.csv')

# how many re-adjudicate

