# Load data from annotators and reviewer

In [2]:
import pandas as pd
import gradio as gr


def get_BFCs(results_df):
    df = results_df[['hash', 'bfc']]
    return df

results_A = pd.read_csv('annotations_Michel.csv')
results_B = pd.read_csv('annotations_Abhishek.csv')
results_C = pd.read_csv('annotations_David.csv')

BFCs_A = get_BFCs(results_A)
BFCs_B = get_BFCs(results_B)
BFCs_C = get_BFCs(results_C)

BFCs = pd.merge(BFCs_A, BFCs_B, on='hash', how='inner', suffixes=('A', 'B'))

BFCs = pd.merge(BFCs, BFCs_C, on='hash', how='inner', suffixes=('', 'C'))
BFCs = BFCs[['hash', 'bfcA', 'bfcB', 'bfc']]
BFCs = BFCs.rename(columns={'bfc': 'bfcC'})

In [4]:
BFCs_R = pd.read_csv('review_jesus.csv')
BFCs = pd.merge(BFCs, BFCs_R, on='hash', how='inner')
BFCs['bfcR'] = BFCs['bfc']
BFCs = BFCs[['hash', 'bfcA', 'bfcB', 'bfcC', 'bfcR']]

In [5]:
BFCs

Unnamed: 0,hash,bfcA,bfcB,bfcC,bfcR
0,0704a8586f,0,0,0,0
1,c5e97ed154,0,0,0,0
2,4f9f531e15,0,0,0,0
3,258030acc9,0,0,0,0
4,5b2c5540b8,4,4,3,4
...,...,...,...,...,...
120,cc1049ccee,3,4,4,4
121,9f22f95951,3,4,0,0
122,aa303964bc,1,0,0,0
123,cff35798fa,4,0,0,4


# Comparison

In [6]:
def compute_disagreement(BFCs):
    BFCs_disagree = BFCs.query("(bfcA != bfcB) or (bfcA != bfcC)")
    total_disagree = len(BFCs_disagree)
    print(f"Total commits with disagreement: {total_disagree}")
    return BFCs_disagree

BFCs_disagree = compute_disagreement(BFCs)
BFCs_disagree

Total commits with disagreement: 102


Unnamed: 0,hash,bfcA,bfcB,bfcC,bfcR
4,5b2c5540b8,4,4,3,4
10,9a10064f56,3,0,3,0
12,016017a195,3,4,3,4
13,11c2bf4a20,2,4,2,2
14,088c840599,0,4,0,0
...,...,...,...,...,...
120,cc1049ccee,3,4,4,4
121,9f22f95951,3,4,0,0
122,aa303964bc,1,0,0,0
123,cff35798fa,4,0,0,4


In [7]:
BFCs_disagree_1 = BFCs.query("(abs(bfcA - bfcB) > 1) or (abs(bfcA - bfcC) > 1)")
total_disagree_1 = len(BFCs_disagree_1)
print(f"Total commits with disagreement: {total_disagree_1}")
#BFCs_disagree_1

Total commits with disagreement: 65


In [8]:
def reduce3(value):
    if type(value) is not int:
        return value
    if value <= 1:
        return 0
    elif value >= 3:
        return 2
    else:
        return 1

BFCs3 = BFCs.map(reduce3)
#BFCs3

In [9]:
BFCs3_disagree = compute_disagreement(BFCs3)
#BFCs3_disagree

Total commits with disagreement: 69


In [10]:
def disagreement_matrix(BFCs_disagree):
    persons = ['A', 'B', 'C', 'R']
    print("    ", "    ".join(persons))
    for row in persons:
        print(f"{row}", end=" ")
        for column in persons:
            if column > row:
                dis = len(BFCs_disagree.query(f"(bfc{column} != bfc{row})"))
                print(f"{dis:>4}", end=" ")
            else:
                print("    ", end=" ")
        print()
disagreement_matrix(BFCs_disagree)

     A    B    C    R
A        91   65   70 
B             65   63 
C                  56 
R                     


In [11]:
disagreement_matrix(BFCs3_disagree)

     A    B    C    R
A        57   39   32 
B             46   44 
C                  36 
R                     


In [12]:
# Agreement as a distance between values
def compute_dist(df, persons):
    agree_dist = pd.DataFrame()
    for index1 in range(0, len(persons)-1):
        for index2 in range(index1+1,len(persons)):
            label1 = persons[index1]
            label2 = persons[index2]
            agree_dist[f'{label1}-{label2}'] = abs(BFCs[f'bfc{label1}'] - BFCs[f'bfc{label2}'])    
    return agree_dist
    
agree_dist = compute_dist(df=BFCs, persons=['A', 'B', 'C', 'R'])

In [13]:
agree_dist

Unnamed: 0,A-B,A-C,A-R,B-C,B-R,C-R
0,0,0,0,0,0,0
1,0,0,0,0,0,0
2,0,0,0,0,0,0
3,0,0,0,0,0,0
4,0,1,0,1,0,1
...,...,...,...,...,...,...
120,1,1,1,0,0,0
121,1,3,3,4,4,0
122,1,1,1,0,0,0
123,4,4,0,0,4,4


In [14]:
def compute_dist_matrix(df, persons):
    print("    ", "    ".join(persons))
    for row in persons:
        print(f"{row}", end=" ")
        for column in persons:
            if column > row:
                dist = abs(BFCs[f'bfc{column}'] - BFCs[f'bfc{row}']).mean()
                print(f"{dist:.2f}", end=" ")
            else:
                print("    ", end=" ")
        print()
compute_dist_matrix(df=BFCs, persons=['A', 'B', 'C', 'R'])


     A    B    C    R
A      1.66 1.14 1.41 
B           1.28 1.61 
C                1.40 
R                     
