In [1]:
import geojson
import os
import pandas as pd
import cv2
import numpy as np
from sklearn.metrics import cohen_kappa_score
from glob import glob
from functools import reduce

In [7]:
def json_load(json_dir,geofile1):
    with open(os.path.join(json_dir,geofile1)) as f:
        gj = geojson.load(f)
    features = gj['features']
    return features


def polygon_rater_data_load(json_dir,geofile1):
    features1 = json_load(json_dir,geofile1)
    feature1_points = [f for f in features1 if (f["geometry"]["type"]=="Polygon") & ("isLocked" not in f["properties"].keys())]
    feature1_coords = [cords["geometry"]["coordinates"] for cords in feature1_points]
    feature1_name = []
    feature1_class = []
    for cords in feature1_points:
        if "name" not in cords["properties"].keys():
            feature1_name.append(None)
        else:
            feature1_name.append(cords["properties"]["name"])
        if "classification" not in cords["properties"].keys():
            feature1_class.append(None)
        elif "name" not in cords["properties"]["classification"].keys():
            feature1_class.append(None)
        else:
            feature1_class.append(cords["properties"]["classification"]["name"])
    feature1_df = pd.DataFrame({"coordinates":feature1_coords,"class":feature1_class, "name":feature1_name})
    return feature1_coords, feature1_class, feature1_name, feature1_df


def point_rater_data_load(json_dir,geofile2):
    features2 = json_load(json_dir,geofile2)
    features2_points = [f for f in features2 if f["geometry"]["type"]=="Point"]
    feature2_coords = [cords["geometry"]["coordinates"] for cords in features2_points]
    feature2_class = [cords["properties"]["classification"]["name"] if "classification" in (cords["properties"].keys()) and "name" in cords["properties"]["classification"].keys() else "None" for cords in features2_points ]
    feature2_name = [cords["properties"]["name"] if "name" in (cords["properties"].keys()) else "None" for cords in features2_points]
    feature2_df = pd.DataFrame({"coordinates":feature2_coords,"class":feature2_class, "name":feature2_name})
    return feature2_coords, feature2_class, feature2_name, feature2_df

def match_main_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name, radius, rater2_type):
    match = []
    i=0
    for c, class_var, name_var in zip(feature1_coords,feature1_class,feature1_name):
        #print(contours,class_var,name_var)
        j=0
        for cnt, class_var1, name1 in zip(feature2_coords,feature2_class,feature2_name):
            #print(c, class_var1, name1)
            #print((int(c[0]),int(c[1])))
            
            contours = [[int(c[0])+radius,int(c[1])],[int(c[0])+0.5*radius,int(c[1])+0.5*radius], [int(c[0]),int(c[1])+radius], [int(c[0])-0.5*radius,int(c[1])+0.5*radius], [int(c[0])-radius,int(c[1])], 
                        [int(c[0])-0.5*radius,int(c[1])-0.5*radius], [int(c[0])-radius,int(c[1])],[int(c[0])+0.5*radius,int(c[1])-0.5*radius]]
            
            if (rater2_type=="Polygon"):
                #print("cnt[0])>2")
                cnt = np.mean(cnt[0], axis=0)
            #print(len(cnt))
            dist = cv2.pointPolygonTest(np.int32(np.array(contours).round()),(int(cnt[0]),int(cnt[1])),False)
            if dist>=1:
                match.append([i,class_var,name_var,j,class_var1,name1, contours, cnt])
            j=j+1
        i=i+1 
    df = pd.DataFrame(match, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return df



def match_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name):
    match = []
    i=0
    for contours, class_var, name_var in zip(feature1_coords,feature1_class,feature1_name):
        #print(contours,class_var,name_var)
        j=0
        for c, class_var1, name1 in zip(feature2_coords,feature2_class,feature2_name):
            #print(c, class_var1, name1)
            #print((int(c[0]),int(c[1])))
            dist = cv2.pointPolygonTest(np.int32(np.array(contours).round()),(int(c[0]),int(c[1])),False)
            if dist>=1:
                match.append([i,class_var,name_var,j,class_var1,name1, contours, c])
            j=j+1
        i=i+1 
    df = pd.DataFrame(match, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return df




def compute_kappa_score(df, rater1_column, rater2_column):
    #df1= df[~((df["main_rater_class"]=='None') & (df["main_rater_class"]=='None'))]
    df1=df
    print(len(df1))
    labeler1 = df1[rater1_column]
    labeler2 = df1[rater2_column]
    return cohen_kappa_score(labeler1, labeler2)

def find_missing_main_rater(df, feature1_coords,feature1_class,feature1_name):
    missing_main_rater_list = []
    for i in range(len(feature1_coords)):
        if i not in df["main_rater_index"].values:
            print(i,feature1_name[i], feature1_coords[i])
            missing_main_rater_list.append([i,feature1_class[i],feature1_name[i],None, None,None, feature1_coords[i],None])
    missing_main_rater = pd.DataFrame(missing_main_rater_list, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return missing_main_rater

def find_missing_rater1(df, feature2_coords,feature2_class,feature2_name):
    missing_rater1_list = []
    for i in range(len(feature2_class)):
        if i not in df["rater1_index"].values:
            #print(i,feature2_class[i],feature2_coords[i] )
            missing_rater1_list.append([None, None,None,i,feature2_class[i],feature2_name[i], None, feature2_coords[i]])
    missing_rater1 = pd.DataFrame(missing_rater1_list,columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return missing_rater1

def map_class(l):
    if l=="Cored":
        return "(C)"
    if l=="Diffuse":
        return "(D)"
    if l=="Mature":
        return "(M)"
    if l=="Pre":
        return "(P)"
    if l=="Ghost":
        return "(G)"
    if l=="Coarse-Grained":
        return "(CG)"
    return "None"


def find_match(json_dir1,json_dir2, geojsons_names,radius,main_rater_type, rater2_type):
    all_geojson_df = pd.DataFrame()
    for geofile1 in geojsons_names:
        print("------------------",geofile1,"---------------------")
        
        if rater2_type=="Polygon":
            feature2_coords, feature2_class, feature2_name, feature2_df = polygon_rater_data_load(json_dir2,geofile1)
        if rater2_type=="Point":
            feature2_coords, feature2_class, feature2_name, feature2_df = point_rater_data_load(json_dir2,geofile1)
            
            
        if main_rater_type=="Point":
            feature1_coords, feature1_class, feature1_name, feature1_df = point_rater_data_load(json_dir1,geofile1)
            df = match_main_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name, radius,rater2_type)
            
        if main_rater_type=="Polygon": 
            feature1_coords, feature1_class, feature1_name, feature1_df = polygon_rater_data_load(json_dir1,geofile1)
            df = match_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name)
        
        missing_main_rater = find_missing_main_rater(df, feature1_coords,feature1_class,feature1_name)
        #missing_rater1 = find_missing_rater1(df, feature2_coords,feature2_class,feature2_name)
        #df_final = pd.concat([df,missing_rater1,missing_main_rater], axis=0, ignore_index=True) 
        df_final = pd.concat([df,missing_main_rater], axis=0, ignore_index=True) 
        df_final["main_rater_class"] = np.where(df_final["main_rater_class"].isna(),"None",df_final["main_rater_class"])
        df_final["rater1_class"] = np.where(df_final["rater1_class"].isna(),"None",df_final["rater1_class"])
        df_final["main_rater_annotation"] = df_final["main_rater_class"].apply(lambda l: "Polygon"+ map_class(l) if l!='None' else "")
        df_final["rater1_annotation"] = df_final["rater1_class"].apply(lambda l: "Point"+ map_class(l) if l!='None' else "")
        df_final["geojson_file"] = geofile1
        if len(all_geojson_df)==0:
            all_geojson_df = df_final
        else:
            all_geojson_df =  pd.concat([all_geojson_df,df_final], ignore_index=True)
    return all_geojson_df
        

In [7]:
json_dir1= "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/monika"
json_dir2= "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/vivek"
json_dir3 = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/Max_calibrated"
json_dir4 = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/Ceren_calibrated"
#geofile1 = "94213_7_Sil_1.mrxs (1).geojson"
#geofile2 = "94213_7_Sil_1.mrxs.geojson"

In [4]:
geojsons_main_rater =  glob(os.path.join(json_dir3,"*.geojson"))
geojsons_names =  [x.split("/")[-1] for x in geojsons_main_rater]

In [5]:
geojsons_names

['XE11-039_1_AmyB_1.mrxs.geojson',
 'XE07-049_1_AmyB_1.mrxs.geojson',
 'XE19-037_1_AmyB_1.mrxs.geojson',
 'XE18-003_1_AmyB_1.mrxs.geojson',
 'XE18-066_1_AmyB_1.mrxs.geojson']

## Main Rater : Monika

In [6]:
## main rater monika
max_output = find_match(json_dir1,json_dir3, geojsons_names,200,"Polygon","Point")
vivek_output = find_match(json_dir1,json_dir2, geojsons_names,200,"Polygon","Point")
ceren_output = find_match(json_dir1,json_dir4, geojsons_names,200,"Polygon","Point")

vivek_output_short = vivek_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
vivek_output_short.columns = ['monika_index', 'monika_class', 'monika_object_name',
       'vivek_index', 'vivek_class', 'vivek_object_name','geojson_file']

ceren_output_short = ceren_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
ceren_output_short.columns = ['monika_index', 'monika_class', 'monika_object_name',
       'ceren_index', 'ceren_class', 'ceren_object_name','geojson_file']

max_output_short = max_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
max_output_short.columns = ['monika_index', 'monika_class', 'monika_object_name',
       'max_index', 'max_class', 'max_object_name','geojson_file']

Monika_main_all = pd.merge(max_output_short,ceren_output_short, how='left', on=['monika_index', 'monika_class', 'monika_object_name','geojson_file'])

Monika_main_all = pd.merge(Monika_main_all,vivek_output_short, how='left', on=['monika_index', 'monika_class', 'monika_object_name','geojson_file'])


------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
27
30
10 20:Cored [[[58889, 144057], [58886, 144058], [58885.02, 144058.66], [58885, 144058.67], [58884, 144059], [58881, 144061], [58878, 144063], [58875, 144065], [58874, 144066], [58871, 144068], [58868, 144070], [58867, 144071], [58865.86, 144072.7], [58865, 144073], [58862, 144075], [58861, 144075.67], [58860, 144076], [58857, 144078], [58856.6, 144078.59], [58856, 144079], [58855.71, 144079.44], [58854, 144080], [58848, 144084], [58845, 144086], [58844.6, 144086.59], [58844, 144087], [58842, 144090], [58841.73, 144090.42], [58840, 144091], [58834, 144095], [58833, 144096], [58832, 144097], [58830, 144100], [58828, 144103], [58827.33, 144105], [58826, 144107], [58825.66, 144108.02], [58825, 144109], [58824.67, 144109.98], [58824, 144111], [58824, 144112], [58824, 144114], [58824, 144115], [58824, 144117], [58824, 144118], [58824, 144119], [58824, 144120], [58824, 144121], [58824, 144122], [58824, 144123], [588

In [7]:
Monika_main_all

Unnamed: 0,monika_index,monika_class,monika_object_name,max_index,max_class,max_object_name,geojson_file,ceren_index,ceren_class,ceren_object_name,vivek_index,vivek_class,vivek_object_name
0,0,Coarse-Grained,13:Coarse-Grained,12,Coarse-Grained,6:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,13,Coarse-Grained,12:Coarse-Grained,6,Coarse-Grained,9:Coarse-Grained
1,1,Coarse-Grained,12:Coarse-Grained,13,Coarse-Grained,9:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,15,Coarse-Grained,14:Coarse-Grained,7,Coarse-Grained,8:Coarse-Grained
2,2,Coarse-Grained,15:Coarse-Grained,0,Coarse-Grained,4:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,0,Coarse-Grained,16:Coarse-Grained,0,Coarse-Grained,11:Coarse-Grained
3,3,Coarse-Grained,14:Coarse-Grained,4,Coarse-Grained,3:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,2,Coarse-Grained,23:Coarse-Grained,2,Coarse-Grained,10:Coarse-Grained
4,4,Coarse-Grained,17:Coarse-Grained,5,Cored,0:Cored,XE11-039_1_AmyB_1.mrxs.geojson,3,Cored,13:Cored,3,Cored,12:Cored
...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,54,Coarse-Grained,42:Coarse-Grained,81,Coarse-Grained,79:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,,38,Coarse-Grained,26:Coarse-Grained
210,55,Diffuse,47:Diffuse,82,Diffuse,82:Diffuse,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,
211,56,Coarse-Grained,43:Coarse-Grained,77,Coarse-Grained,84:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,
212,57,Coarse-Grained,37:Coarse-Grained,86,Coarse-Grained,80:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,


## Main Rater : Max

In [8]:
print("-----match monika + Max ----------")
monika_output = find_match(json_dir3,json_dir1, geojsons_names,200, "Point","Polygon")
print("-----match Vivek + Max ----------")
vivek_output = find_match(json_dir3,json_dir2, geojsons_names,200, "Point","Point")
print("-----match Ceren + Max ----------")
ceren_output = find_match(json_dir3,json_dir4, geojsons_names,200, "Point","Point")

vivek_output_short = vivek_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
vivek_output_short.columns = ['max_index', 'max_class', 'max_object_name',
      'vivek_index', 'vivek_class', 'vivek_object_name','geojson_file']

ceren_output_short = ceren_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
ceren_output_short.columns = ['max_index', 'max_class', 'max_object_name',
       'ceren_index', 'ceren_class', 'ceren_object_name','geojson_file']

monika_output_short = monika_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
monika_output_short.columns = ['max_index', 'max_class', 'max_object_name',
       'monika_index', 'monika_class', 'monika_object_name','geojson_file']

Max_main_all = pd.merge(monika_output_short,ceren_output_short, how='left', on=['max_index', 'max_class', 'max_object_name','geojson_file'])

Max_main_all = pd.merge(Max_main_all,vivek_output_short, how='left', on=['max_index', 'max_class', 'max_object_name','geojson_file'])

-----match monika + Max ----------
------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
30
27
3 11:Diffuse [59016.81, 147193.41]
8 12:Coarse-Grained [59020.53, 146573.17]
14 22:Diffuse [19392.6, 130713.2]
17 23:Cored [21000.68, 130212.56]
26 26:Coarse-Grained [20197.58, 128127.36]
27 29:Coarse-Grained [20512.11, 127614.35]
------------------ XE07-049_1_AmyB_1.mrxs.geojson ---------------------
50
31
0 21:Diffuse [9240.76, 71619.85]
1 34:Diffuse [11276.82, 71582.64]
2 25:Diffuse [11170.99, 71568.21]
5 29:Diffuse [12101, 71423.9]
6 18:Diffuse [8629.84, 71418.21]
8 15:Diffuse [9482.88, 71084.69]
9 31:Diffuse [12551.58, 71029.45]
10 14:Diffuse [9370.64, 70938.77]
11 30:Diffuse [12582.04, 70880.32]
14 13:Diffuse [8928.08, 70711.08]
16 32:Diffuse [11589.5, 70564.44]
18 23:Diffuse [12079.79, 70446.54]
19 26:Diffuse [12266.54, 70428.63]
23 35:Diffuse [12157.13, 70203.2]
24 17:Diffuse [9710.58, 69976.69]
25 16:Diffuse [9191.05, 69891.7]
28 5:Diffuse [9219.92, 69285.59]
36 48:

In [9]:
Max_main_all

Unnamed: 0,max_index,max_class,max_object_name,monika_index,monika_class,monika_object_name,geojson_file,ceren_index,ceren_class,ceren_object_name,vivek_index,vivek_class,vivek_object_name
0,0,Coarse-Grained,4:Coarse-Grained,2,Coarse-Grained,15:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,0,Coarse-Grained,16:Coarse-Grained,0,Coarse-Grained,11:Coarse-Grained
1,1,Diffuse,13:Diffuse,12,Coarse-Grained,24:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,,,,,,
2,2,Coarse-Grained,5:Coarse-Grained,7,Coarse-Grained,22:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,1,Coarse-Grained,17:Coarse-Grained,1,Coarse-Grained,14:Coarse-Grained
3,4,Coarse-Grained,3:Coarse-Grained,3,Coarse-Grained,14:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,2,Coarse-Grained,23:Coarse-Grained,2,Coarse-Grained,10:Coarse-Grained
4,5,Cored,0:Cored,4,Coarse-Grained,17:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,3,Cored,13:Cored,3,Cored,12:Cored
...,...,...,...,...,...,...,...,...,...,...,...,...,...
327,76,Diffuse,86:Diffuse,,,,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,
328,79,Coarse-Grained,39:Coarse-Grained,,,,XE18-066_1_AmyB_1.mrxs.geojson,59,Coarse-Grained,65:Coarse-Grained,,,
329,83,Diffuse,34:Diffuse,,,,XE18-066_1_AmyB_1.mrxs.geojson,61,Diffuse,33:Diffuse,,,
330,87,Coarse-Grained,83:Coarse-Grained,,,,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,


## Main Rater : Vivek

In [10]:
print("-----match monika + Max ----------")
monika_output = find_match(json_dir2,json_dir1, geojsons_names,200, "Point","Polygon")
print("-----match Vivek + Max ----------")
max_output = find_match(json_dir2, json_dir3, geojsons_names,200, "Point","Point")
print("-----match Ceren + Max ----------")
ceren_output = find_match(json_dir2, json_dir4, geojsons_names,200, "Point","Point")

max_output_short = max_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
max_output_short.columns = ['vivek_index', 'vivek_class', 'vivek_object_name',
       'max_index', 'max_class', 'max_object_name','geojson_file']

ceren_output_short = ceren_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
ceren_output_short.columns = ['vivek_index', 'vivek_class', 'vivek_object_name',
       'ceren_index', 'ceren_class', 'ceren_object_name','geojson_file']

monika_output_short = monika_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
monika_output_short.columns = ['vivek_index', 'vivek_class', 'vivek_object_name',
       'monika_index', 'monika_class', 'monika_object_name','geojson_file']

Vivek_main_all = pd.merge(max_output_short,ceren_output_short, how='left', on=['vivek_index', 'vivek_class', 'vivek_object_name','geojson_file'])

Vivek_main_all = pd.merge(Vivek_main_all,monika_output_short, how='left', on=['vivek_index', 'vivek_class', 'vivek_object_name','geojson_file'])

-----match monika + Max ----------
------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
16
27
------------------ XE07-049_1_AmyB_1.mrxs.geojson ---------------------
15
31
------------------ XE19-037_1_AmyB_1.mrxs.geojson ---------------------
33
51
3 37:Diffuse [52833.46, 167626.38]
15 23:Diffuse [55010.91, 96910.71]
16 None [55187.02, 96125.41]
20 15:Diffuse [57661.78, 95090.56]
------------------ XE18-003_1_AmyB_1.mrxs.geojson ---------------------
15
26
5 2:Cored [47900.96, 46474.46]
------------------ XE18-066_1_AmyB_1.mrxs.geojson ---------------------
45
59
20 1:Diffuse [64981.16, 126763.24]
44 None [43656.71, 189110.06]
-----match Vivek + Max ----------
------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
16
30
------------------ XE07-049_1_AmyB_1.mrxs.geojson ---------------------
15
50
------------------ XE19-037_1_AmyB_1.mrxs.geojson ---------------------
33
73
16 None [55187.02, 96125.41]
22 11:Coarse-Grained [58871.58, 94306.93]
------

In [11]:
Vivek_main_all

Unnamed: 0,vivek_index,vivek_class,vivek_object_name,max_index,max_class,max_object_name,geojson_file,ceren_index,ceren_class,ceren_object_name,monika_index,monika_class,monika_object_name
0,0,Coarse-Grained,11:Coarse-Grained,0,Coarse-Grained,4:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,0,Coarse-Grained,16:Coarse-Grained,2,Coarse-Grained,15:Coarse-Grained
1,1,Coarse-Grained,14:Coarse-Grained,2,Coarse-Grained,5:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,1,Coarse-Grained,17:Coarse-Grained,7,Coarse-Grained,22:Coarse-Grained
2,2,Coarse-Grained,10:Coarse-Grained,4,Coarse-Grained,3:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,2,Coarse-Grained,23:Coarse-Grained,3,Coarse-Grained,14:Coarse-Grained
3,3,Cored,12:Cored,5,Cored,0:Cored,XE11-039_1_AmyB_1.mrxs.geojson,3,Cored,13:Cored,4,Coarse-Grained,17:Coarse-Grained
4,4,Coarse-Grained,15:Coarse-Grained,6,Diffuse,10:Diffuse,XE11-039_1_AmyB_1.mrxs.geojson,4,Coarse-Grained,18:Coarse-Grained,8,Coarse-Grained,23:Coarse-Grained
...,...,...,...,...,...,...,...,...,...,...,...,...,...
132,40,Cored,23:Cored,85,Cored,37:Cored,XE18-066_1_AmyB_1.mrxs.geojson,64,Cored,37:Cored,25,Cored,25:Cored
133,41,Coarse-Grained,25:Coarse-Grained,88,Cored,77:Cored,XE18-066_1_AmyB_1.mrxs.geojson,65,Coarse-Grained,38:Coarse-Grained,43,Coarse-Grained,35:Coarse-Grained
134,42,Diffuse,21:Diffuse,89,Diffuse,35:Diffuse,XE18-066_1_AmyB_1.mrxs.geojson,66,Diffuse,32:Diffuse,35,Diffuse,30:Diffuse
135,43,Cored,22:Cored,90,Cored,36:Cored,XE18-066_1_AmyB_1.mrxs.geojson,,,,32,Diffuse,24:Diffuse


## Main Rater : Ceren

In [12]:
print("-----match monika + Max ----------")
monika_output = find_match(json_dir4,json_dir1, geojsons_names,200, "Point","Polygon")
print("-----match Vivek + Max ----------")
max_output = find_match(json_dir4, json_dir3, geojsons_names,200, "Point","Point")
print("-----match Ceren + Max ----------")
vivek_output = find_match(json_dir4, json_dir2, geojsons_names,200, "Point","Point")

max_output_short = max_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
max_output_short.columns = ['ceren_index', 'ceren_class', 'ceren_object_name',
       'max_index', 'max_class', 'max_object_name','geojson_file']

vivek_output_short = vivek_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
vivek_output_short.columns = ['ceren_index', 'ceren_class', 'ceren_object_name',
       'vivek_index', 'vivek_class', 'vivek_object_name','geojson_file']

monika_output_short = monika_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
monika_output_short.columns= ['ceren_index', 'ceren_class', 'ceren_object_name',
       'monika_index', 'monika_class', 'monika_object_name','geojson_file']

Ceren_main_all = pd.merge(max_output_short,vivek_output_short, how='left', on=['ceren_index', 'ceren_class', 'ceren_object_name','geojson_file'])

Ceren_main_all = pd.merge(Ceren_main_all, monika_output_short, how='left', on=['ceren_index', 'ceren_class', 'ceren_object_name','geojson_file'])

-----match monika + Max ----------
------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
34
27
6 24:Coarse-Grained [59025.53, 146517.06]
7 22:Diffuse [55938.46, 146358.66]
9 25:Coarse-Grained [59017.78, 145800.11]
10 26:Coarse-Grained [59006.15, 145600.52]
16 29:Diffuse [19370.96, 130710.52]
19 10:Cored [21003.37, 130217.77]
20 None [18286.48, 130060.66]
24 30:Diffuse [16912.63, 129659.55]
31 11:Coarse-Grained [20171.98, 128113.73]
------------------ XE07-049_1_AmyB_1.mrxs.geojson ---------------------
50
31
0 21:Diffuse [9240.76, 71619.85]
1 34:Diffuse [11276.82, 71582.64]
2 25:Diffuse [11170.99, 71568.21]
5 29:Diffuse [12101, 71423.9]
6 18:Diffuse [8629.84, 71418.21]
8 15:Diffuse [9482.88, 71084.69]
9 31:Diffuse [12551.58, 71029.45]
10 14:Diffuse [9370.64, 70938.77]
11 30:Diffuse [12582.04, 70880.32]
14 13:Diffuse [8928.08, 70711.08]
16 32:Diffuse [11589.5, 70564.44]
18 23:Diffuse [12079.79, 70446.54]
19 26:Diffuse [12266.54, 70428.63]
21 11:Diffuse [12157.59, 71110

In [13]:
common = Monika_main_all[ ( Monika_main_all["ceren_class"]!="None" ) & (Monika_main_all["vivek_class"]!="None") & (Monika_main_all["max_class"]!="None")]

In [14]:
common

Unnamed: 0,monika_index,monika_class,monika_object_name,max_index,max_class,max_object_name,geojson_file,ceren_index,ceren_class,ceren_object_name,vivek_index,vivek_class,vivek_object_name
0,0,Coarse-Grained,13:Coarse-Grained,12,Coarse-Grained,6:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,13,Coarse-Grained,12:Coarse-Grained,6,Coarse-Grained,9:Coarse-Grained
1,1,Coarse-Grained,12:Coarse-Grained,13,Coarse-Grained,9:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,15,Coarse-Grained,14:Coarse-Grained,7,Coarse-Grained,8:Coarse-Grained
2,2,Coarse-Grained,15:Coarse-Grained,0,Coarse-Grained,4:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,0,Coarse-Grained,16:Coarse-Grained,0,Coarse-Grained,11:Coarse-Grained
3,3,Coarse-Grained,14:Coarse-Grained,4,Coarse-Grained,3:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,2,Coarse-Grained,23:Coarse-Grained,2,Coarse-Grained,10:Coarse-Grained
4,4,Coarse-Grained,17:Coarse-Grained,5,Cored,0:Cored,XE11-039_1_AmyB_1.mrxs.geojson,3,Cored,13:Cored,3,Cored,12:Cored
...,...,...,...,...,...,...,...,...,...,...,...,...,...
199,44,Coarse-Grained,56:Coarse-Grained,45,Coarse-Grained,54:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,39,Coarse-Grained,58:Coarse-Grained,27,Coarse-Grained,43:Coarse-Grained
201,46,Coarse-Grained,54:Coarse-Grained,41,Coarse-Grained,51:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,36,Coarse-Grained,61:Coarse-Grained,24,Coarse-Grained,42:Coarse-Grained
204,49,Coarse-Grained,50:Coarse-Grained,56,Cored,66:Cored,XE18-066_1_AmyB_1.mrxs.geojson,47,Coarse-Grained,46:Coarse-Grained,30,Coarse-Grained,39:Coarse-Grained
207,52,Coarse-Grained,36:Coarse-Grained,84,Coarse-Grained,78:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,62,Coarse-Grained,39:Coarse-Grained,39,Cored,24:Cored


In [15]:
common.groupby(["geojson_file"])["main_rater_index"].nunique().sum()

KeyError: 'Column not found: main_rater_index'

In [16]:
Noncommon_max = Max_main_all[ ( Max_main_all["monika_class"]=="None" ) | (Max_main_all["vivek_class"]=="None") | (Max_main_all["ceren_class"]=="None")]
Noncommon_vivek = Vivek_main_all[ ( Vivek_main_all["monika_class"]=="None" ) | (Vivek_main_all["max_class"]=="None") | (Vivek_main_all["ceren_class"]=="None")]
Noncommon_ceren = Ceren_main_all[ ( Ceren_main_all["monika_class"]=="None" ) | (Ceren_main_all["vivek_class"]=="None") | (Ceren_main_all["max_class"]=="None")]
Noncommon_monika = Monika_main_all[ ( Monika_main_all["ceren_class"]=="None" ) | (Monika_main_all["vivek_class"]=="None") | (Monika_main_all["max_class"]=="None")]

In [17]:
final_output = pd.concat([common,Noncommon_max,Noncommon_vivek,Noncommon_ceren,Noncommon_monika]).drop_duplicates()

In [18]:
final_output

Unnamed: 0,monika_index,monika_class,monika_object_name,max_index,max_class,max_object_name,geojson_file,ceren_index,ceren_class,ceren_object_name,vivek_index,vivek_class,vivek_object_name
0,0,Coarse-Grained,13:Coarse-Grained,12,Coarse-Grained,6:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,13,Coarse-Grained,12:Coarse-Grained,6,Coarse-Grained,9:Coarse-Grained
1,1,Coarse-Grained,12:Coarse-Grained,13,Coarse-Grained,9:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,15,Coarse-Grained,14:Coarse-Grained,7,Coarse-Grained,8:Coarse-Grained
2,2,Coarse-Grained,15:Coarse-Grained,0,Coarse-Grained,4:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,0,Coarse-Grained,16:Coarse-Grained,0,Coarse-Grained,11:Coarse-Grained
3,3,Coarse-Grained,14:Coarse-Grained,4,Coarse-Grained,3:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,2,Coarse-Grained,23:Coarse-Grained,2,Coarse-Grained,10:Coarse-Grained
4,4,Coarse-Grained,17:Coarse-Grained,5,Cored,0:Cored,XE11-039_1_AmyB_1.mrxs.geojson,3,Cored,13:Cored,3,Cored,12:Cored
...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,0,Diffuse,20:Diffuse,49,Diffuse,5:Diffuse,XE18-003_1_AmyB_1.mrxs.geojson,48,Diffuse,4:Diffuse,,,
175,21,Coarse-Grained,15:Coarse-Grained,8,Coarse-Grained,9:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,8,Coarse-Grained,15:Coarse-Grained,,,
177,23,Diffuse,45:Diffuse,79,Coarse-Grained,39:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,59,Coarse-Grained,65:Coarse-Grained,,,
178,23,Diffuse,45:Diffuse,79,Coarse-Grained,39:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,63,Coarse-Grained,56:Coarse-Grained,,,


In [19]:
final_output.to_csv("/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/all_objects_full.csv")

In [173]:
all_objects = final_output[["monika_class","max_class","ceren_class","vivek_class"]]

In [178]:
for col in ["monika_class","max_class","ceren_class","vivek_class"]:
    all_objects[col] = np.where(all_objects[col]=="None","",all_objects[col])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  all_objects[col] = np.where(all_objects[col]=="None","",all_objects[col])


In [183]:
all_objects= all_objects.reset_index()

In [184]:
all_objects["object"]=all_objects.index

In [186]:
all_objects.drop(["index"],axis=1,inplace=True)

In [188]:
all_objects

Unnamed: 0,monika_class,max_class,ceren_class,vivek_class,object
0,Coarse-Grained,Coarse-Grained,Coarse-Grained,Coarse-Grained,0
1,Coarse-Grained,Coarse-Grained,Coarse-Grained,Coarse-Grained,1
2,Coarse-Grained,Coarse-Grained,Coarse-Grained,Coarse-Grained,2
3,Coarse-Grained,Coarse-Grained,Coarse-Grained,Coarse-Grained,3
4,Coarse-Grained,Cored,Cored,Cored,4
...,...,...,...,...,...
378,Diffuse,Diffuse,Diffuse,,378
379,Coarse-Grained,Coarse-Grained,Coarse-Grained,,379
380,Diffuse,Coarse-Grained,Coarse-Grained,,380
381,Diffuse,Coarse-Grained,Coarse-Grained,,381


In [190]:
all_objects["max_class"].unique()

array(['Coarse-Grained', 'Cored', 'Diffuse', ''], dtype=object)

In [191]:
def map_class(l):
    if l=='Coarse-Grained':
        return "CG"
    if l=="Cored":
        return "C"
    if l=="Diffuse":
        return "D"
    return ""

for col in ["monika_class","max_class","ceren_class","vivek_class"]:
    all_objects[col] = all_objects[col].apply(lambda l: map_class(l) )

In [193]:
all_objects.to_csv("/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/all_objects.csv")

In [155]:
compute_kappa_score(final_output, "monika_class", "max_class")

409


0.33751294995971126

In [156]:
compute_kappa_score(final_output, "vivek_class", "max_class")

409


0.19671988578303112

In [157]:
compute_kappa_score(final_output, "ceren_class", "max_class")

409


0.7210450692954058

In [158]:
compute_kappa_score(final_output, "ceren_class", "vivek_class")

409


0.20049580958515256

In [159]:
table = final_output[["monika_class","vivek_class","ceren_class","max_class"]]

In [160]:
dict1 = {"Coarse-Grained":1, "Diffuse":2, "Cored":3, "None":4}

In [161]:
table= table.fillna("None")

In [162]:
table["monika_class"]=table["monika_class"].apply(lambda l: dict1[l])
table["vivek_class"]=table["vivek_class"].apply(lambda l:  dict1[l])
table["ceren_class"]=table["ceren_class"].apply(lambda l:  dict1[l])
table["max_class"]=table["max_class"].apply(lambda l: dict1[l])

In [163]:
table

Unnamed: 0,monika_class,vivek_class,ceren_class,max_class
0,1,1,1,1
1,1,1,1,1
2,1,1,1,1
3,1,1,1,1
4,1,3,3,3
...,...,...,...,...
175,1,4,1,1
176,2,4,1,1
177,2,4,1,1
178,2,4,1,1


In [164]:
rater1 = table["monika_class"].values
rater2 = table["vivek_class"].values
rater3 = table["ceren_class"].values
rater4 = table["max_class"].values
lists = [rater1, rater2, rater3,rater4]

In [165]:

kappa = fleiss_kappa1(lists,[1,2,3,4])
kappa

0.6679495310527408

In [79]:
table =  table[~((table["main_rater_class"]==4) & (table["rater1_class"]==4) & (table["rater2_class"]==4) & (table["rater3_class"]==4))]

In [30]:
def fleiss_kappa1(lists, classes):
    n = len(lists)
    N = len(lists[0])
    k = len(classes)
    
    nij = []
    for i in range(N):
        nij.append([0]*k)
        
    
    for i in range(len(lists)):
        for j in range(len(lists[i])):
            nij[j][classes.index(lists[i][j])] += 1 
    
    P = []
    for i in nij:
        P.append(1/(n*(n-1))*(sum([j*j for j in i])-n))
    return (((sum(P)/N)-(sum([y*y for y in [x/(N*n) for x in[sum(i) for i in zip(*nij)]]])))/(1-sum([y*y for y in [x/(N*n) for x in[sum(i) for i in zip(*nij)]]]))+1)/2

In [81]:
rater1 = table["main_rater_class"].values
rater2 = table["rater1_class"].values
rater3 = table["rater2_class"].values
rater4 = table["rater3_class"].values

In [82]:
lists = [rater1, rater2, rater3,rater4]

In [83]:
kappa = fleiss_kappa1(lists,[1,2,3,4])

In [84]:
kappa

0.7472464319544686

In [81]:
cohen_kappa_score(rater1,rater2)

0.33514090699719445

In [82]:
cohen_kappa_score(rater1,rater3)

0.1870969473091062

In [83]:
cohen_kappa_score(rater1,rater4)

0.748604364532734

## Class-wise Kappa

In [3]:
final_output = pd.read_csv("/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/all_objects_full.csv")

In [4]:
final_output.head(2)

Unnamed: 0.1,Unnamed: 0,monika_index,monika_class,monika_object_name,max_index,max_class,max_object_name,geojson_file,ceren_index,ceren_class,ceren_object_name,vivek_index,vivek_class,vivek_object_name
0,0,0.0,Coarse-Grained,13:Coarse-Grained,12.0,Coarse-Grained,6:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,13.0,Coarse-Grained,12:Coarse-Grained,6.0,Coarse-Grained,9:Coarse-Grained
1,1,1.0,Coarse-Grained,12:Coarse-Grained,13.0,Coarse-Grained,9:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,15.0,Coarse-Grained,14:Coarse-Grained,7.0,Coarse-Grained,8:Coarse-Grained


In [9]:
final_output["monika_class"].unique()

array(['Coarse-Grained', 'Cored', 'Diffuse', 'None'], dtype=object)

In [16]:
rater_class_list = [ "monika_class", "max_class","ceren_class","vivek_class"]

In [17]:
label_list = ['Coarse-Grained', 'Cored', 'Diffuse']

In [25]:
class_val_list1 = []
class_val_list2 = []
data_count = []
labels_list=[]
kappa_list = []
for class_val1 in rater_class_list:
    for class_val2 in rater_class_list:
        for label in label_list:
            tmp = final_output[final_output[class_val1]==label]
            print(tmp[class_val1].unique())
            print(tmp[class_val2].unique())
            #print(len(tmp))
            k = cohen_kappa_score(tmp[class_val1], tmp[class_val2])
            #k = compute_kappa_score(tmp, class_val, "max_class")
            #print(class_val1,class_val2,label,k)
            class_val_list1.append(class_val1)
            class_val_list2.append(class_val2)
            data_count.append(len(tmp))
            labels_list.append(label)
            kappa_list.append(k)

['Coarse-Grained']
['Coarse-Grained']
['Cored']
['Cored']
['Diffuse']
['Diffuse']
['Coarse-Grained']
['Coarse-Grained' 'Cored' 'Diffuse' 'None']
['Cored']
['Cored' 'Coarse-Grained' 'Diffuse' 'None']
['Diffuse']
['Diffuse' 'Coarse-Grained' 'Cored' 'None']
['Coarse-Grained']
['Coarse-Grained' 'Cored' 'Diffuse' 'None']
['Cored']
['Cored' 'Coarse-Grained' 'None' 'Diffuse']
['Diffuse']
['Diffuse' 'Coarse-Grained' 'None']
['Coarse-Grained']
['Coarse-Grained' 'Cored' 'Diffuse' 'None']
['Cored']
['Cored' 'Coarse-Grained' 'None']
['Diffuse']
['Coarse-Grained' 'Diffuse' 'Cored' 'None']
['Coarse-Grained']
['Coarse-Grained' 'Cored' 'Diffuse' 'None']
['Cored']
['Coarse-Grained' 'Cored' 'Diffuse' 'None']
['Diffuse']
['Coarse-Grained' 'Diffuse' 'Cored' 'None']
['Coarse-Grained']
['Coarse-Grained']
['Cored']
['Cored']
['Diffuse']
['Diffuse']
['Coarse-Grained']
['Coarse-Grained' 'Cored' 'None' 'Diffuse']
['Cored']
['Cored' 'Coarse-Grained' 'Diffuse' 'None']
['Diffuse']
['Coarse-Grained' 'Diffuse' 'None

  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
  k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)


In [21]:
class_wise_kappa = pd.DataFrame({"Rater_1":class_val_list1, "Rater_2":class_val_list2, "plaque_count":data_count, "label":labels_list, "kappa":kappa_list })

In [22]:
class_wise_kappa

Unnamed: 0,Rater_1,Rater_2,plaque_count,label,kappa
0,monika_class,monika_class,129,Coarse-Grained,
1,monika_class,monika_class,35,Cored,
2,monika_class,monika_class,79,Diffuse,
3,monika_class,max_class,129,Coarse-Grained,0.0
4,monika_class,max_class,35,Cored,0.0
5,monika_class,max_class,79,Diffuse,0.0
6,monika_class,ceren_class,129,Coarse-Grained,0.0
7,monika_class,ceren_class,35,Cored,0.0
8,monika_class,ceren_class,79,Diffuse,0.0
9,monika_class,vivek_class,129,Coarse-Grained,0.0


In [26]:
table = final_output[["monika_class","vivek_class","ceren_class","max_class"]]

In [27]:
table

Unnamed: 0,monika_class,vivek_class,ceren_class,max_class
0,Coarse-Grained,Coarse-Grained,Coarse-Grained,Coarse-Grained
1,Coarse-Grained,Coarse-Grained,Coarse-Grained,Coarse-Grained
2,Coarse-Grained,Coarse-Grained,Coarse-Grained,Coarse-Grained
3,Coarse-Grained,Coarse-Grained,Coarse-Grained,Coarse-Grained
4,Coarse-Grained,Cored,Cored,Cored
...,...,...,...,...
378,Diffuse,,Diffuse,Diffuse
379,Coarse-Grained,,Coarse-Grained,Coarse-Grained
380,Diffuse,,Coarse-Grained,Coarse-Grained
381,Diffuse,,Coarse-Grained,Coarse-Grained


In [28]:
dict1 = {"Coarse-Grained":1, "Diffuse":2, "Cored":3, "None":4}

table= table.fillna("None")

table["monika_class"]=table["monika_class"].apply(lambda l: dict1[l])
table["vivek_class"]=table["vivek_class"].apply(lambda l:  dict1[l])
table["ceren_class"]=table["ceren_class"].apply(lambda l:  dict1[l])
table["max_class"]=table["max_class"].apply(lambda l: dict1[l])




In [29]:
table

Unnamed: 0,monika_class,vivek_class,ceren_class,max_class
0,1,1,1,1
1,1,1,1,1
2,1,1,1,1
3,1,1,1,1
4,1,3,3,3
...,...,...,...,...
378,2,4,2,2
379,1,4,1,1
380,2,4,1,1
381,2,4,1,1


In [35]:
l = [1,2,3,4]
for i in l:
    tmp1 = table[table["max_class"]==i]
    rater1 = tmp1["monika_class"].values
    rater2 = tmp1["vivek_class"].values
    rater3 = tmp1["ceren_class"].values
    rater4 = tmp1["max_class"].values
    lists = [rater1, rater2, rater3]
    
    kappa = fleiss_kappa1(lists,[1,2,3,4])
    print(i, kappa)

1 0.5790491033477704
2 0.4983165180666362
3 0.6684567552289427
4 0.5059900542495482


In [33]:
l = [1,2,3,4]
for i in l:
    tmp1 = table[table["max_class"]==i]
    rater1 = tmp1["monika_class"].values
    rater2 = tmp1["vivek_class"].values
    rater3 = tmp1["ceren_class"].values
    rater4 = tmp1["max_class"].values
    lists = [rater1, rater2, rater3,rater4]
    
    kappa = fleiss_kappa1(lists,[1,2,3,4])
    print(i, kappa)

1 0.5232887276423821
2 0.4729328663857071
3 0.5882074456638691
4 0.4919634415379767


## Max, Harry, Brittany

In [45]:
json_dir2= "/gladstone/finkbeiner/steve/work/data/npsad_data/vivek/interrater-study/interrater_geojson_Max/Brittany"
json_dir1= "/gladstone/finkbeiner/steve/work/data/npsad_data/vivek/interrater-study/interrater_geojson_Max/Max"
json_dir3= "/gladstone/finkbeiner/steve/work/data/npsad_data/vivek/interrater-study/interrater_geojson_Max/Harry"

In [46]:
geojsons_main_rater =  glob(os.path.join(json_dir1,"*.geojson"))
geojsons_names =  [x.split("/")[-1] for x in geojsons_main_rater]

In [47]:
geojsons_names

['22650_7_AmyB_1_ceren.mrxs.geojson',
 '25144_1_AmyB_1_ceren.mrxs.geojson',
 '22640_1_AmyB_1_ceren.mrxs.geojson',
 '30414_6_AmyB_1_ceren.mrxs.geojson',
 '420418_6_AmyB_1_ceren.mrxs.geojson',
 '354049_6_AmyB_1_ceren.mrxs.geojson']

In [48]:
## main rater monika
harry_brittany_output = find_match(json_dir3,json_dir2, geojsons_names,200,"Point","Point")
max_harry_output = find_match(json_dir1,json_dir3, geojsons_names,200,"Polygon","Point")
max_brittany_output = find_match(json_dir1,json_dir2, geojsons_names,200,"Polygon","Point")



------------------ 22650_7_AmyB_1_ceren.mrxs.geojson ---------------------
0 12:Coarse-Grained [77838.03, 99400.31]
1 10:Coarse-Grained [77330.66, 99216.44]
2 09:Diffuse [76220.6, 98933.81]
3 05:Coarse-Grained [77058.26, 98927]
4 08:Coarse-Grained [78692.71, 98293.65]
8 07:Coarse-Grained [80248.84, 97827.15]
9 06:Diffuse [78491.81, 97711.38]
11 11:Diffuse [76397.66, 96219.94]
13 24:Coarse-Grained [71166.34, 142018.05]
14 19:Coarse-Grained [69681.71, 141653.69]
20 13:Diffuse [72889.32, 140043.06]
22 15:Coarse-Grained [72742.9, 139719.58]
23 16:Cored [72255.97, 139562.95]
24 25:Coarse-Grained [70897.33, 139222.44]
------------------ 25144_1_AmyB_1_ceren.mrxs.geojson ---------------------
1 18:Diffuse [32637.44, 88388.35]
2 17:Diffuse [31732.75, 88284.73]
3 07:Diffuse [34291.37, 88236.91]
4 None [33410.6, 88097.42]
5 08:Diffuse [34064.2, 87838.37]
6 16:Diffuse [31417.91, 87822.43]
7 03:Cored [32127.31, 87403.96]
8 04:Cored [31103.07, 87256.5]
9 15:Diffuse [31083.14, 87128.97]
11 09:Diffus

In [18]:
harry_brittany_output

Unnamed: 0,main_rater_index,main_rater_class,main_rater_object_name,rater1_index,rater1_class,rater1_object_name,polygon_coords,point_coords,main_rater_annotation,rater1_annotation,geojson_file
0,5,Cored,01:Cored,0,Cored,01:Cored,"[[76941, 98269], [76841.0, 98369.0], [76741, 9...","[76746.86, 98282.79]",Polygon(C),Point(C),22650_7_AmyB_1_ceren.mrxs.geojson
1,6,Neuritic,03:Neuritic,1,Diffuse,02:Diffuse,"[[77602, 98133], [77502.0, 98233.0], [77402, 9...","[77376.74, 98142.24]",PolygonNone,Point(D),22650_7_AmyB_1_ceren.mrxs.geojson
2,7,Neuritic,04:Neuritic,2,Diffuse,04:Diffuse,"[[79645, 97878], [79545.0, 97978.0], [79445, 9...","[79360.06, 97840.32]",PolygonNone,Point(D),22650_7_AmyB_1_ceren.mrxs.geojson
3,10,Coarse-Grained,02:Coarse-Grained,3,Diffuse,03:Diffuse,"[[76607, 97316], [76507.0, 97416.0], [76407, 9...","[76408.5, 97283.32]",Polygon(CG),Point(D),22650_7_AmyB_1_ceren.mrxs.geojson
4,12,Coarse-Grained,22:Coarse-Grained,4,Coarse-Grained,11:Coarse-Grained,"[[72609, 142201], [72509.0, 142301.0], [72409,...","[72377.02, 142198.59]",Polygon(CG),Point(CG),22650_7_AmyB_1_ceren.mrxs.geojson
...,...,...,...,...,...,...,...,...,...,...,...
301,68,Diffuse,66:Diffuse,,,,"[63324.9, 101458.12]",,Polygon(D),,354049_6_AmyB_1_ceren.mrxs.geojson
302,69,Diffuse,49:Diffuse,,,,"[60719.2, 101383.8]",,Polygon(D),,354049_6_AmyB_1_ceren.mrxs.geojson
303,71,Diffuse,63:Diffuse,,,,"[62494.22, 101335.71]",,Polygon(D),,354049_6_AmyB_1_ceren.mrxs.geojson
304,72,,,,,,"[30296.67, 40340.28]",,,,354049_6_AmyB_1_ceren.mrxs.geojson


In [37]:
common_harry_brittany  = harry_brittany_output[(harry_brittany_output["main_rater_class"]!="None") & (harry_brittany_output["rater1_class"]!="None")]

In [38]:
cohen_kappa_score(common_harry_brittany["main_rater_class"], common_harry_brittany["rater1_class"])

0.5051752921535893

In [32]:
non_na_harry_brittany_output = harry_brittany_output[(harry_brittany_output["main_rater_class"]!="None")]

In [34]:
cohen_kappa_score(harry_brittany_output["main_rater_class"], harry_brittany_output["rater1_class"])

0.15067097858470624

## Max, Harry, Brittany, Osama

In [3]:
json_dir2= "/gladstone/finkbeiner/steve/work/data/npsad_data/vivek/interrater-study/interrater_geojson_Max/Brittany"
json_dir1= "/gladstone/finkbeiner/steve/work/data/npsad_data/vivek/interrater-study/interrater_geojson_Max/Max"
json_dir3= "/gladstone/finkbeiner/steve/work/data/npsad_data/vivek/interrater-study/interrater_geojson_Max/Harry"
json_dir4= "/gladstone/finkbeiner/steve/work/data/npsad_data/vivek/interrater-study/interrater_geojson_Max/Osama"

In [4]:
geojsons_main_rater =  glob(os.path.join(json_dir1,"*.geojson"))
geojsons_names =  [x.split("/")[-1] for x in geojsons_main_rater]

In [5]:
geojsons_names

['22650_7_AmyB_1_ceren.mrxs.geojson',
 '25144_1_AmyB_1_ceren.mrxs.geojson',
 '22640_1_AmyB_1_ceren.mrxs.geojson',
 '30414_6_AmyB_1_ceren.mrxs.geojson',
 '420418_6_AmyB_1_ceren.mrxs.geojson',
 '354049_6_AmyB_1_ceren.mrxs.geojson']

In [8]:
## main rater Max
harry_brittany_output = find_match(json_dir3,json_dir2, geojsons_names,200,"Point","Point")
max_harry_output = find_match(json_dir1,json_dir3, geojsons_names,200,"Polygon","Point")
max_brittany_output = find_match(json_dir1,json_dir2, geojsons_names,200,"Polygon","Point")
max_osama_output = find_match(json_dir1,json_dir4, geojsons_names,200,"Polygon","Point")
harry_osama_output = find_match(json_dir3,json_dir4, geojsons_names,200,"Point","Point")
osama_brittany_output = find_match(json_dir4,json_dir2, geojsons_names,200,"Point","Point")


------------------ 22650_7_AmyB_1_ceren.mrxs.geojson ---------------------
0 12:Coarse-Grained [77838.03, 99400.31]
1 10:Coarse-Grained [77330.66, 99216.44]
2 09:Diffuse [76220.6, 98933.81]
3 05:Coarse-Grained [77058.26, 98927]
4 08:Coarse-Grained [78692.71, 98293.65]
8 07:Coarse-Grained [80248.84, 97827.15]
9 06:Diffuse [78491.81, 97711.38]
11 11:Diffuse [76397.66, 96219.94]
13 24:Coarse-Grained [71166.34, 142018.05]
14 19:Coarse-Grained [69681.71, 141653.69]
20 13:Diffuse [72889.32, 140043.06]
22 15:Coarse-Grained [72742.9, 139719.58]
23 16:Cored [72255.97, 139562.95]
24 25:Coarse-Grained [70897.33, 139222.44]
------------------ 25144_1_AmyB_1_ceren.mrxs.geojson ---------------------
1 18:Diffuse [32637.44, 88388.35]
2 17:Diffuse [31732.75, 88284.73]
3 07:Diffuse [34291.37, 88236.91]
4 None [33410.6, 88097.42]
5 08:Diffuse [34064.2, 87838.37]
6 16:Diffuse [31417.91, 87822.43]
7 03:Cored [32127.31, 87403.96]
8 04:Cored [31103.07, 87256.5]
9 15:Diffuse [31083.14, 87128.97]
11 09:Diffus

In [None]:
common_harry_brittany  = harry_brittany_output[(harry_brittany_output["main_rater_class"]!="None") & (harry_brittany_output["rater1_class"]!="None")]