In [8]:
import geojson
import os
import pandas as pd
import cv2
import numpy as np
from sklearn.metrics import cohen_kappa_score
from glob import glob

In [9]:
def json_load(json_dir,geofile1):
    with open(os.path.join(json_dir,geofile1)) as f:
        gj = geojson.load(f)
    features = gj['features']
    return features


def polygon_rater_data_load(json_dir,geofile1):
    features1 = json_load(json_dir,geofile1)
    feature1_points = [f for f in features1 if (f["geometry"]["type"]=="Polygon") & ("isLocked" not in f["properties"].keys())]
    feature1_coords = [cords["geometry"]["coordinates"] for cords in feature1_points]
    feature1_name = []
    feature1_class = []
    for cords in feature1_points:
        if "name" not in cords["properties"].keys():
            feature1_name.append(None)
        else:
            feature1_name.append(cords["properties"]["name"])
        if "classification" not in cords["properties"].keys():
            feature1_class.append(None)
        else:
            feature1_class.append(cords["properties"]["classification"]["name"])
    feature1_df = pd.DataFrame({"coordinates":feature1_coords,"class":feature1_class, "name":feature1_name})
    return feature1_coords, feature1_class, feature1_name, feature1_df


def point_rater_data_load(json_dir,geofile2):
    features2 = json_load(json_dir,geofile2)
    features2_points = [f for f in features2 if f["geometry"]["type"]=="Point"]
    feature2_coords = [cords["geometry"]["coordinates"] for cords in features2_points]
    feature2_class = [cords["properties"]["classification"]["name"] if "classification" in (cords["properties"].keys()) else "None" for cords in features2_points ]
    feature2_name = [cords["properties"]["name"] if "name" in (cords["properties"].keys()) else "None" for cords in features2_points]
    feature2_df = pd.DataFrame({"coordinates":feature2_coords,"class":feature2_class, "name":feature2_name})
    return feature2_coords, feature2_class, feature2_name, feature2_df

def match_main_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name, radius):
    match = []
    i=0
    for c, class_var, name_var in zip(feature1_coords,feature1_class,feature1_name):
        #print(contours,class_var,name_var)
        j=0
        for cnt, class_var1, name1 in zip(feature2_coords,feature2_class,feature2_name):
            #print(c, class_var1, name1)
            #print((int(c[0]),int(c[1])))
            
            contours = [[int(c[0])+radius,int(c[1])],[int(c[0])+0.5*radius,int(c[1])+0.5*radius], [int(c[0]),int(c[1])+radius], [int(c[0])-0.5*radius,int(c[1])+0.5*radius], [int(c[0])-radius,int(c[1])], 
                        [int(c[0])-0.5*radius,int(c[1])-0.5*radius], [int(c[0])-radius,int(c[1])],[int(c[0])+0.5*radius,int(c[1])-0.5*radius]]
            #print(len(cnt[0]))
            #if (len(cnt[0])>2):
            #    cnt = np.mean(cnt[0], axis=0)
            #print(len(cnt))
            dist = cv2.pointPolygonTest(np.int32(np.array(contours).round()),(int(cnt[0]),int(cnt[1])),False)
            if dist>=1:
                match.append([i,class_var,name_var,j,class_var1,name1, contours, cnt])
            j=j+1
        i=i+1 
    df = pd.DataFrame(match, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return df



def match_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name):
    match = []
    i=0
    for contours, class_var, name_var in zip(feature1_coords,feature1_class,feature1_name):
        #print(contours,class_var,name_var)
        j=0
        for c, class_var1, name1 in zip(feature2_coords,feature2_class,feature2_name):
            #print(c, class_var1, name1)
            #print((int(c[0]),int(c[1])))
            dist = cv2.pointPolygonTest(np.int32(np.array(contours).round()),(int(c[0]),int(c[1])),False)
            if dist>=1:
                match.append([i,class_var,name_var,j,class_var1,name1, contours, c])
            j=j+1
        i=i+1 
    df = pd.DataFrame(match, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return df




def compute_kappa_score(df, rater1_column, rater2_column):
    #df1= df[~((df["main_rater_class"]=='None') & (df["main_rater_class"]=='None'))]
    df1=df
    print(len(df1))
    labeler1 = df1[rater1_column]
    labeler2 = df1[rater2_column]
    return cohen_kappa_score(labeler1, labeler2)

def find_missing_main_rater(df, feature1_coords,feature1_class,feature1_name):
    missing_main_rater_list = []
    for i in range(len(feature1_coords)):
        if i not in df["main_rater_index"].values:
            print(i,feature1_name[i], feature1_coords[i])
            missing_main_rater_list.append([i,feature1_class[i],feature1_name[i],None, None,None, feature1_coords[i],None])
    missing_main_rater = pd.DataFrame(missing_main_rater_list, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return missing_main_rater

def find_missing_rater1(df, feature2_coords,feature2_class,feature2_name):
    missing_rater1_list = []
    for i in range(len(feature2_class)):
        if i not in df["rater1_index"].values:
            print(i,feature2_class[i],feature2_coords[i] )
            missing_rater1_list.append([None, None,None,i,feature2_class[i],feature2_name[i], None, feature2_coords[i]])
    missing_rater1 = pd.DataFrame(missing_rater1_list,columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return missing_rater1

def map_class(l):
    if l=="Cored":
        return "(C)"
    if l=="Diffuse":
        return "(D)"
    if l=="Mature":
        return "(M)"
    if l=="Pre":
        return "(P)"
    if l=="Ghost":
        return "(G)"
    if l=="Coarse-Grained":
        return "(CG)"
    return None


def find_match(json_dir1,json_dir2, geojsons_names,radius):
    all_geojson_df = pd.DataFrame()
    for geofile1 in geojsons_names:
        print("------------------",geofile1,"---------------------")
        feature1_coords, feature1_class, feature1_name, feature1_df = point_rater_data_load(json_dir1,geofile1)
        print(len(feature1_coords))
        feature2_coords, feature2_class, feature2_name, feature2_df = point_rater_data_load(json_dir2,geofile1)
        #feature2_coords, feature2_class, feature2_name, feature2_df = polygon_rater_data_load(json_dir2,geofile1)
        print(len(feature2_coords))
        df = match_main_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name, radius)
        missing_main_rater = find_missing_main_rater(df, feature1_coords,feature1_class,feature1_name)
        missing_rater1 = find_missing_rater1(df, feature2_coords,feature2_class,feature2_name)
        df_final = pd.concat([df,missing_rater1,missing_main_rater], axis=0, ignore_index=True) 
        df_final["main_rater_class"] = np.where(df_final["main_rater_class"].isna(),"None",df_final["main_rater_class"])
        df_final["rater1_class"] = np.where(df_final["rater1_class"].isna(),"None",df_final["rater1_class"])
        df_final["main_rater_annotation"] = df_final["main_rater_class"].apply(lambda l: "Polygon"+ map_class(l) if l!='None' else "")
        df_final["rater1_annotation"] = df_final["rater1_class"].apply(lambda l: "Point"+ map_class(l) if l!='None' else "")
        df_final["geojson_file"] = geofile1
        if len(all_geojson_df)==0:
            all_geojson_df = df_final
        else:
            all_geojson_df =  pd.concat([all_geojson_df,df_final], ignore_index=True)
    return all_geojson_df
        

In [10]:
json_dir1= "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/monika"
json_dir2= "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/vivek"
json_dir3 = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/Max_calibrated"
json_dir4 = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/Ceren_calibrated"
#geofile1 = "94213_7_Sil_1.mrxs (1).geojson"
#geofile2 = "94213_7_Sil_1.mrxs.geojson"

In [11]:
geojsons_main_rater =  glob(os.path.join(json_dir1,"*.geojson"))
geojsons_names =  [x.split("/")[-1] for x in geojsons_main_rater]

In [12]:
geojsons_names

['XE11-039_1_AmyB_1.mrxs.geojson',
 'XE07-049_1_AmyB_1.mrxs.geojson',
 'XE19-037_1_AmyB_1.mrxs.geojson',
 'XE18-003_1_AmyB_1.mrxs.geojson',
 'XE18-066_1_AmyB_1.mrxs.geojson']

In [6]:
monika_output = find_match(json_dir3,json_dir1, geojsons_names,250)


------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
30
0
0 4:Coarse-Grained [57763.58, 147461.95]
1 13:Diffuse [58905.22, 147372.88]
2 5:Coarse-Grained [58606.06, 147366.17]
3 11:Diffuse [59016.81, 147193.41]
4 3:Coarse-Grained [57726.39, 147042.58]
5 0:Cored [55483.64, 146959.36]
6 10:Diffuse [58946.14, 146854.94]
7 1:Coarse-Grained [55686.36, 146791.05]
8 12:Coarse-Grained [59020.53, 146573.17]
9 2:Coarse-Grained [55099.6, 146207.08]
10 7:Coarse-Grained [57599.12, 145253.8]
11 8:Diffuse [57010.5, 144954.38]
12 6:Coarse-Grained [57905.98, 144415.97]
13 9:Coarse-Grained [55744, 143828.28]
14 22:Diffuse [19392.6, 130713.2]
15 21:Coarse-Grained [19241.04, 130525.27]
16 19:Cored [18524.48, 130279.14]
17 23:Cored [21000.68, 130212.56]
18 20:Coarse-Grained [19014.32, 129950.56]
19 18:Cored [17429.63, 129647.45]
20 17:Coarse-Grained [19407.15, 129436.48]
21 24:Coarse-Grained [20755.64, 129173.91]
22 16:Coarse-Grained [18034.65, 128764.78]
23 15:Coarse-Grained [17184.72, 1

In [56]:
monika_output.columns

Index(['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name', 'polygon_coords',
       'point_coords', 'main_rater_annotation', 'rater1_annotation',
       'geojson_file'],
      dtype='object')

In [62]:
compute_kappa_score(monika_output[~monika_output["main_rater_index"].isna()], "main_rater_class", "rater1_class")

316


0.35434527507420455

In [11]:
vivek_output = find_match(json_dir3,json_dir2, geojsons_names,250)

------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
30
16
1 13:Diffuse [58905.22, 147372.88]
3 11:Diffuse [59016.81, 147193.41]
8 12:Coarse-Grained [59020.53, 146573.17]
9 2:Coarse-Grained [55099.6, 146207.08]
10 7:Coarse-Grained [57599.12, 145253.8]
11 8:Diffuse [57010.5, 144954.38]
14 22:Diffuse [19392.6, 130713.2]
15 21:Coarse-Grained [19241.04, 130525.27]
17 23:Cored [21000.68, 130212.56]
18 20:Coarse-Grained [19014.32, 129950.56]
21 24:Coarse-Grained [20755.64, 129173.91]
26 26:Coarse-Grained [20197.58, 128127.36]
27 29:Coarse-Grained [20512.11, 127614.35]
29 28:Diffuse [19217.87, 127081.68]
------------------ XE07-049_1_AmyB_1.mrxs.geojson ---------------------
50
15
0 21:Diffuse [9240.76, 71619.85]
1 34:Diffuse [11276.82, 71582.64]
2 25:Diffuse [11170.99, 71568.21]
3 20:Diffuse [9583.9, 71541.28]
4 28:Diffuse [12312.66, 71428.71]
5 29:Diffuse [12101, 71423.9]
6 18:Diffuse [8629.84, 71418.21]
8 15:Diffuse [9482.88, 71084.69]
9 31:Diffuse [12551.58, 71029.45]
1

In [63]:
compute_kappa_score(vivek_output[~vivek_output["main_rater_index"].isna()], "main_rater_class", "rater1_class")

303


0.2242969688172185

In [12]:
ceren_output = find_match(json_dir3,json_dir4, geojsons_names,250)

------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
30
34
1 13:Diffuse [58905.22, 147372.88]
3 11:Diffuse [59016.81, 147193.41]
20 17:Coarse-Grained [19407.15, 129436.48]
27 29:Coarse-Grained [20512.11, 127614.35]
7 Diffuse [55938.46, 146358.66]
9 Coarse-Grained [59017.78, 145800.11]
10 Coarse-Grained [59006.15, 145600.52]
14 None [58899.41, 144132.34]
20 None [18286.48, 130060.66]
24 Diffuse [16912.63, 129659.55]
25 Coarse-Grained [19359.78, 129367.2]
------------------ XE07-049_1_AmyB_1.mrxs.geojson ---------------------
50
50
15 12:Diffuse [10743.21, 70614.88]
21 11:Diffuse [10547.59, 70334.27]
31 0:Cored [11017.89, 69121.3]
42 37:Cored [69130.13, 120459.13]
21 Diffuse [12157.59, 71110.27]
31 Cored [12627.89, 69897.3]
42 Cored [70646.13, 121914.13]
------------------ XE19-037_1_AmyB_1.mrxs.geojson ---------------------
73
73
------------------ XE18-003_1_AmyB_1.mrxs.geojson ---------------------
57
57
------------------ XE18-066_1_AmyB_1.mrxs.geojson ------------

In [65]:
ceren_output.columns

Index(['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name', 'polygon_coords',
       'point_coords', 'main_rater_annotation', 'rater1_annotation',
       'geojson_file'],
      dtype='object')

In [66]:
compute_kappa_score(ceren_output[~ceren_output["main_rater_index"].isna()], "main_rater_class", "rater1_class")

346


0.7558413046887251

In [41]:
ceren_output["main_rater_class"].isna().sum()

0

In [43]:
vivek_output.columns

Index(['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name', 'polygon_coords',
       'point_coords', 'main_rater_annotation', 'rater1_annotation',
       'geojson_file'],
      dtype='object')

In [13]:
vivek_output_short = vivek_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
vivek_output_short.columns = ['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater2_index', 'rater2_class', 'rater2_object_name','geojson_file']

ceren_output_short = ceren_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
ceren_output_short.columns = ['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater3_index', 'rater3_class', 'rater3_object_name','geojson_file']

In [14]:
monika_output_short = monika_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]

In [15]:
output_all = pd.merge(monika_output_short,ceren_output_short, how='left', on=['main_rater_index', 'main_rater_class', 'main_rater_object_name','geojson_file'])

In [16]:
output_all = pd.merge(output_all,vivek_output_short, how='left', on=['main_rater_index', 'main_rater_class', 'main_rater_object_name','geojson_file'])

In [17]:
output_all

Unnamed: 0,main_rater_index,main_rater_class,main_rater_object_name,rater1_index,rater1_class,rater1_object_name,geojson_file,rater3_index,rater3_class,rater3_object_name,rater2_index,rater2_class,rater2_object_name
0,0,Coarse-Grained,4:Coarse-Grained,2,Coarse-Grained,15:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,0,Coarse-Grained,16:Coarse-Grained,0,Coarse-Grained,11:Coarse-Grained
1,1,Diffuse,13:Diffuse,12,Coarse-Grained,24:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,,,,,,
2,2,Coarse-Grained,5:Coarse-Grained,7,Coarse-Grained,22:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,1,Coarse-Grained,17:Coarse-Grained,1,Coarse-Grained,14:Coarse-Grained
3,4,Coarse-Grained,3:Coarse-Grained,3,Coarse-Grained,14:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,2,Coarse-Grained,23:Coarse-Grained,2,Coarse-Grained,10:Coarse-Grained
4,5,Cored,0:Cored,4,Coarse-Grained,17:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,3,Cored,13:Cored,3,Cored,12:Cored
...,...,...,...,...,...,...,...,...,...,...,...,...,...
387,76,Diffuse,86:Diffuse,,,,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,
388,79,Coarse-Grained,39:Coarse-Grained,,,,XE18-066_1_AmyB_1.mrxs.geojson,59,Coarse-Grained,65:Coarse-Grained,,,
389,83,Diffuse,34:Diffuse,,,,XE18-066_1_AmyB_1.mrxs.geojson,61,Diffuse,33:Diffuse,,,
390,87,Coarse-Grained,83:Coarse-Grained,,,,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,


In [18]:
table = output_all[["main_rater_class","rater1_class","rater3_class","rater2_class"]]

In [19]:
dict1 = {"Coarse-Grained":1, "Diffuse":2, "Cored":3, "None":4}

In [20]:
table= table.fillna("None")

In [21]:
table["main_rater_class"]=table["main_rater_class"].apply(lambda l: dict1[l])
table["rater1_class"]=table["rater1_class"].apply(lambda l:  dict1[l])
table["rater3_class"]=table["rater3_class"].apply(lambda l:  dict1[l])
table["rater2_class"]=table["rater2_class"].apply(lambda l: dict1[l])

In [22]:
table

Unnamed: 0,main_rater_class,rater1_class,rater3_class,rater2_class
0,1,1,1,1
1,2,1,4,4
2,1,1,1,1
3,1,1,1,1
4,3,1,3,3
...,...,...,...,...
387,2,4,4,4
388,1,4,1,4
389,2,4,2,4
390,1,4,4,4


In [36]:
table =  table[~((table["main_rater_class"]==4) & (table["rater1_class"]==4) & (table["rater2_class"]==4) & (table["rater3_class"]==4))]

In [37]:
def fleiss_kappa1(lists, classes):
    n = len(lists)
    N = len(lists[0])
    k = len(classes)
    
    nij = []
    for i in range(N):
        nij.append([0]*k)
        
    
    for i in range(len(lists)):
        for j in range(len(lists[i])):
            nij[j][classes.index(lists[i][j])] += 1 
    
    P = []
    for i in nij:
        P.append(1/(n*(n-1))*(sum([j*j for j in i])-n))
    return (((sum(P)/N)-(sum([y*y for y in [x/(N*n) for x in[sum(i) for i in zip(*nij)]]])))/(1-sum([y*y for y in [x/(N*n) for x in[sum(i) for i in zip(*nij)]]]))+1)/2

In [38]:
rater1 = table["main_rater_class"].values
rater2 = table["rater1_class"].values
rater3 = table["rater2_class"].values
rater4 = table["rater3_class"].values

In [39]:
lists = [rater1, rater2, rater3,rater4]

In [40]:
kappa = fleiss_kappa1(lists,[1,2,3,4])

In [41]:
kappa

0.6710926206343779

In [50]:
cohen_kappa_score(rater1,rater2)

0.292923629023214

In [51]:
cohen_kappa_score(rater1,rater3)

0.21665052607389645

In [52]:
cohen_kappa_score(rater1,rater4)

0.7179734795385397