In [1]:
import geojson
import os
import pandas as pd
import cv2
import numpy as np
from sklearn.metrics import cohen_kappa_score
from glob import glob

In [2]:
def json_load(json_dir,geofile1):
    with open(os.path.join(json_dir,geofile1)) as f:
        gj = geojson.load(f)
    features = gj['features']
    return features


def polygon_rater_data_load(json_dir,geofile1):
    features1 = json_load(json_dir,geofile1)
    feature1_points = [f for f in features1 if (f["geometry"]["type"]=="Polygon") & ("isLocked" not in f["properties"].keys())]
    feature1_coords = [cords["geometry"]["coordinates"] for cords in feature1_points]
    feature1_name = []
    feature1_class = []
    for cords in feature1_points:
        if "name" not in cords["properties"].keys():
            feature1_name.append(None)
        else:
            feature1_name.append(cords["properties"]["name"])
        if "classification" not in cords["properties"].keys():
            feature1_class.append(None)
        else:
            feature1_class.append(cords["properties"]["classification"]["name"])
    feature1_df = pd.DataFrame({"coordinates":feature1_coords,"class":feature1_class, "name":feature1_name})
    return feature1_coords, feature1_class, feature1_name, feature1_df


def point_rater_data_load(json_dir,geofile2):
    features2 = json_load(json_dir,geofile2)
    features2_points = [f for f in features2 if f["geometry"]["type"]=="Point"]
    feature2_coords = [cords["geometry"]["coordinates"] for cords in features2_points]
    feature2_class = [cords["properties"]["classification"]["name"] if "classification" in (cords["properties"].keys()) else "None" for cords in features2_points ]
    feature2_name = [cords["properties"]["name"] if "name" in (cords["properties"].keys()) else "None" for cords in features2_points]
    feature2_df = pd.DataFrame({"coordinates":feature2_coords,"class":feature2_class, "name":feature2_name})
    return feature2_coords, feature2_class, feature2_name, feature2_df

def match_main_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name, radius):
    match = []
    i=0
    for c, class_var, name_var in zip(feature1_coords,feature1_class,feature1_name):
        #print(contours,class_var,name_var)
        j=0
        for cnt, class_var1, name1 in zip(feature2_coords,feature2_class,feature2_name):
            #print(c, class_var1, name1)
            #print((int(c[0]),int(c[1])))
            
            contours = [[int(c[0])+radius,int(c[1])],[int(c[0])+0.5*radius,int(c[1])+0.5*radius], [int(c[0]),int(c[1])+radius], [int(c[0])-0.5*radius,int(c[1])+0.5*radius], [int(c[0])-radius,int(c[1])], 
                        [int(c[0])-0.5*radius,int(c[1])-0.5*radius], [int(c[0])-radius,int(c[1])],[int(c[0])+0.5*radius,int(c[1])-0.5*radius]]
            #print(len(cnt[0]))
            if (len(cnt[0])>2):
                cnt = np.mean(cnt[0], axis=0)
            #print(len(cnt))
            dist = cv2.pointPolygonTest(np.int32(np.array(contours).round()),(int(cnt[0]),int(cnt[1])),False)
            if dist>=1:
                match.append([i,class_var,name_var,j,class_var1,name1, contours, cnt])
            j=j+1
        i=i+1 
    df = pd.DataFrame(match, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return df



def match_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name):
    match = []
    i=0
    for contours, class_var, name_var in zip(feature1_coords,feature1_class,feature1_name):
        #print(contours,class_var,name_var)
        j=0
        for c, class_var1, name1 in zip(feature2_coords,feature2_class,feature2_name):
            #print(c, class_var1, name1)
            #print((int(c[0]),int(c[1])))
            dist = cv2.pointPolygonTest(np.int32(np.array(contours).round()),(int(c[0]),int(c[1])),False)
            if dist>=1:
                match.append([i,class_var,name_var,j,class_var1,name1, contours, c])
            j=j+1
        i=i+1 
    df = pd.DataFrame(match, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return df




def compute_kappa_score(df, rater1_column, rater2_column):
    #df1= df[~((df["main_rater_class"]=='None') & (df["main_rater_class"]=='None'))]
    df1=df
    print(len(df1))
    labeler1 = df1[rater1_column]
    labeler2 = df1[rater2_column]
    return cohen_kappa_score(labeler1, labeler2)

def find_missing_main_rater(df, feature1_coords,feature1_class,feature1_name):
    missing_main_rater_list = []
    for i in range(len(feature1_coords)):
        if i not in df["main_rater_index"].values:
            print(i,feature1_name[i], feature1_coords[i])
            missing_main_rater_list.append([i,feature1_class[i],feature1_name[i],None, None,None, feature1_coords[i],None])
    missing_main_rater = pd.DataFrame(missing_main_rater_list, columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return missing_main_rater

def find_missing_rater1(df, feature2_coords,feature2_class,feature2_name):
    missing_rater1_list = []
    for i in range(len(feature2_class)):
        if i not in df["rater1_index"].values:
            print(i,feature2_class[i],feature2_coords[i] )
            missing_rater1_list.append([None, None,None,i,feature2_class[i],feature2_name[i], None, feature2_coords[i]])
    missing_rater1 = pd.DataFrame(missing_rater1_list,columns=["main_rater_index","main_rater_class","main_rater_object_name","rater1_index","rater1_class","rater1_object_name","polygon_coords","point_coords"])
    return missing_rater1

def map_class(l):
    if l=="Cored":
        return "(C)"
    if l=="Diffuse":
        return "(D)"
    if l=="Mature":
        return "(M)"
    if l=="Pre":
        return "(P)"
    if l=="Ghost":
        return "(G)"
    if l=="Coarse-Grained":
        return "(CG)"
    return None


def find_match(json_dir1,json_dir2, geojsons_names,radius):
    all_geojson_df = pd.DataFrame()
    for geofile1 in geojsons_names:
        print("------------------",geofile1,"---------------------")
        #feature1_coords, feature1_class, feature1_name, feature1_df = point_rater_data_load(json_dir1,geofile1)
        feature1_coords, feature1_class, feature1_name, feature1_df = polygon_rater_data_load(json_dir1,geofile1)
        print(len(feature1_coords))
        feature2_coords, feature2_class, feature2_name, feature2_df = point_rater_data_load(json_dir2,geofile1)
        #feature2_coords, feature2_class, feature2_name, feature2_df = polygon_rater_data_load(json_dir2,geofile1)
        print(len(feature2_coords))
        #df = match_main_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name, radius)
        df = match_point_rater(feature1_coords,feature1_class,feature1_name,feature2_coords,feature2_class,feature2_name)
        missing_main_rater = find_missing_main_rater(df, feature1_coords,feature1_class,feature1_name)
        #missing_rater1 = find_missing_rater1(df, feature2_coords,feature2_class,feature2_name)
        #df_final = pd.concat([df,missing_rater1,missing_main_rater], axis=0, ignore_index=True) 
        df_final = pd.concat([df,missing_main_rater], axis=0, ignore_index=True) 
        df_final["main_rater_class"] = np.where(df_final["main_rater_class"].isna(),"None",df_final["main_rater_class"])
        df_final["rater1_class"] = np.where(df_final["rater1_class"].isna(),"None",df_final["rater1_class"])
        df_final["main_rater_annotation"] = df_final["main_rater_class"].apply(lambda l: "Polygon"+ map_class(l) if l!='None' else "")
        df_final["rater1_annotation"] = df_final["rater1_class"].apply(lambda l: "Point"+ map_class(l) if l!='None' else "")
        df_final["geojson_file"] = geofile1
        if len(all_geojson_df)==0:
            all_geojson_df = df_final
        else:
            all_geojson_df =  pd.concat([all_geojson_df,df_final], ignore_index=True)
    return all_geojson_df
        

In [7]:
json_dir1= "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/monika"
json_dir2= "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/vivek"
json_dir3 = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/Max_calibrated"
json_dir4 = "/gladstone/finkbeiner/steve/work/data/npsad_data/monika/Amy_plaque_Results/interrater-tool/monika_vivek_interrater_analysis/Ceren_calibrated"
#geofile1 = "94213_7_Sil_1.mrxs (1).geojson"
#geofile2 = "94213_7_Sil_1.mrxs.geojson"

In [8]:
geojsons_main_rater =  glob(os.path.join(json_dir1,"*.geojson"))
geojsons_names =  [x.split("/")[-1] for x in geojsons_main_rater]

In [9]:
geojsons_names

['XE11-039_1_AmyB_1.mrxs.geojson',
 'XE07-049_1_AmyB_1.mrxs.geojson',
 'XE19-037_1_AmyB_1.mrxs.geojson',
 'XE18-003_1_AmyB_1.mrxs.geojson',
 'XE18-066_1_AmyB_1.mrxs.geojson']

In [10]:
max_output = find_match(json_dir1,json_dir3, geojsons_names,250)

------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
27
30
10 20:Cored [[[58889, 144057], [58886, 144058], [58885.02, 144058.66], [58885, 144058.67], [58884, 144059], [58881, 144061], [58878, 144063], [58875, 144065], [58874, 144066], [58871, 144068], [58868, 144070], [58867, 144071], [58865.86, 144072.7], [58865, 144073], [58862, 144075], [58861, 144075.67], [58860, 144076], [58857, 144078], [58856.6, 144078.59], [58856, 144079], [58855.71, 144079.44], [58854, 144080], [58848, 144084], [58845, 144086], [58844.6, 144086.59], [58844, 144087], [58842, 144090], [58841.73, 144090.42], [58840, 144091], [58834, 144095], [58833, 144096], [58832, 144097], [58830, 144100], [58828, 144103], [58827.33, 144105], [58826, 144107], [58825.66, 144108.02], [58825, 144109], [58824.67, 144109.98], [58824, 144111], [58824, 144112], [58824, 144114], [58824, 144115], [58824, 144117], [58824, 144118], [58824, 144119], [58824, 144120], [58824, 144121], [58824, 144122], [58824, 144123], [588

In [None]:
def find_equiv_diameter(cnt):
    area = cv2.contourArea(cnt)
    equi_diameter = np.sqrt(4*area/np.pi)
    return equi_diameter

In [17]:
max_output["area"] = max_output["polygon_coords"].apply(lambda l: cv2.contourArea(np.int32(np.array(l).round())))

In [19]:
max_output["diameter"] = max_output["area"].apply(lambda l: np.sqrt(4*l/np.pi))

In [29]:
max_output["size_bracket"] = np.where(max_output["diameter"]<=10,"a. <=10",np.where(max_output["diameter"]<=50,"b. 10-50", 
                                                                                   np.where(max_output["diameter"]<=100,"c. 50-100",
                                                                                            np.where(max_output["diameter"]<=250,"d. 100-250",
                                                                                                     np.where(max_output["diameter"]<=500,"e. 250-500",
                                                                                                     "f. >500")))))

In [31]:
max_output.columns

Index(['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name', 'polygon_coords',
       'point_coords', 'main_rater_annotation', 'rater1_annotation',
       'geojson_file', 'area', 'diameter', 'size_bracket'],
      dtype='object')

In [33]:
max_output.groupby(["main_rater_class","size_bracket"])["main_rater_index"].count().reset_index()

Unnamed: 0,main_rater_class,size_bracket,main_rater_index
0,Coarse-Grained,c. 50-100,4
1,Coarse-Grained,d. 100-250,84
2,Coarse-Grained,e. 250-500,20
3,Cored,c. 50-100,1
4,Cored,d. 100-250,18
5,Cored,e. 250-500,12
6,Diffuse,c. 50-100,3
7,Diffuse,d. 100-250,48
8,Diffuse,e. 250-500,8
9,Diffuse,f. >500,1


In [14]:
max_output["main_rater_class"].isna().sum()

0

In [15]:
max_output["rater1_class"].isna().sum()

0

In [13]:
compute_kappa_score(max_output, "main_rater_class", "rater1_class")

200


0.589986201458703

In [29]:
compute_kappa_score(max_output[max_output["rater1_class"]!="None"], "main_rater_class", "rater1_class")

188


0.6506874767744333

In [66]:
vivek_output = find_match(json_dir1,json_dir2, geojsons_names,200)

------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
27
16
5 16:Coarse-Grained [[[57601, 145116], [57600, 145117], [57598, 145117], [57597, 145118], [57595, 145119], [57594, 145120], [57592, 145121], [57591.33, 145122.33], [57590, 145123], [57589.33, 145124.33], [57588, 145125], [57587, 145126], [57586.68, 145126.66], [57586.67, 145126.67], [57586, 145127], [57585, 145129], [57584, 145130], [57584, 145131], [57583, 145133], [57582, 145134], [57582, 145135], [57581, 145136], [57581, 145137], [57581, 145138], [57580, 145140], [57579, 145141], [57579, 145143], [57578, 145144], [57578, 145145], [57578, 145146], [57577, 145147], [57577, 145148], [57576, 145149], [57576, 145150], [57575, 145151], [57575, 145153], [57574, 145154], [57574, 145155], [57573, 145157], [57572, 145158], [57572, 145159], [57571, 145160], [57571, 145162], [57571, 145163], [57570, 145165], [57569, 145166], [57569, 145168], [57569, 145170], [57568, 145171], [57568, 145172], [57568, 145173], [57568, 1

In [54]:
vivek_output

Unnamed: 0,main_rater_index,main_rater_class,main_rater_object_name,rater1_index,rater1_class,rater1_object_name,polygon_coords,point_coords,main_rater_annotation,rater1_annotation,geojson_file
0,0,Coarse-Grained,13:Coarse-Grained,6,Coarse-Grained,9:Coarse-Grained,"[[[57865, 144201], [57857, 144202], [57850, 14...","[57910.02, 144392.48]",Polygon(CG),Point(CG),XE11-039_1_AmyB_1.mrxs.geojson
1,1,Coarse-Grained,12:Coarse-Grained,7,Coarse-Grained,8:Coarse-Grained,"[[[55739, 143692], [55725, 143698], [55724.66,...","[55774.76, 143826.91]",Polygon(CG),Point(CG),XE11-039_1_AmyB_1.mrxs.geojson
2,2,Coarse-Grained,15:Coarse-Grained,0,Coarse-Grained,11:Coarse-Grained,"[[[57705, 147315], [57702, 147316], [57701, 14...","[57765.24, 147446.95]",Polygon(CG),Point(CG),XE11-039_1_AmyB_1.mrxs.geojson
3,3,Coarse-Grained,14:Coarse-Grained,2,Coarse-Grained,10:Coarse-Grained,"[[[57711, 146899], [57708, 146900], [57704, 14...","[57685.84, 147021.94]",Polygon(CG),Point(CG),XE11-039_1_AmyB_1.mrxs.geojson
4,4,Coarse-Grained,17:Coarse-Grained,3,Cored,12:Cored,"[[[55464, 146813], [55459, 146814], [55457.71,...","[55471.7, 146980.17]",Polygon(CG),Point(C),XE11-039_1_AmyB_1.mrxs.geojson
...,...,...,...,...,...,...,...,...,...,...,...
195,51,Coarse-Grained,52:Coarse-Grained,,,,"[[[38966, 187732], [38964, 187733], [38962, 18...",,Polygon(CG),,XE18-066_1_AmyB_1.mrxs.geojson
196,53,Coarse-Grained,51:Coarse-Grained,,,,"[[[38839, 187970], [38833, 187972], [38832, 18...",,Polygon(CG),,XE18-066_1_AmyB_1.mrxs.geojson
197,55,Diffuse,47:Diffuse,,,,"[[[41228, 186739], [41226, 186740], [41223, 18...",,Polygon(D),,XE18-066_1_AmyB_1.mrxs.geojson
198,56,Coarse-Grained,43:Coarse-Grained,,,,"[[[41470, 186952], [41467, 186953], [41465, 18...",,Polygon(CG),,XE18-066_1_AmyB_1.mrxs.geojson


In [30]:
compute_kappa_score(vivek_output, "main_rater_class", "rater1_class")

194


0.37983945442921074

In [27]:
compute_kappa_score(vivek_output[vivek_output["rater1_class"]!="None"], "main_rater_class", "rater1_class")

118


0.7725812668636772

In [21]:
vivek_output["rater1_class"].isna().sum()

0

In [67]:
ceren_output = find_match(json_dir1,json_dir4, geojsons_names,250)

------------------ XE11-039_1_AmyB_1.mrxs.geojson ---------------------
27
34
12 24:Coarse-Grained [[[58902, 147311], [58901, 147312], [58899, 147312], [58898.5, 147313], [58897, 147313], [58896, 147314], [58894, 147314], [58893.5, 147315], [58892, 147315], [58891, 147317], [58889, 147317], [58888.5, 147318], [58888, 147318], [58887.49, 147319], [58887, 147319], [58886.67, 147319.66], [58886, 147320], [58885.67, 147320.67], [58885, 147321], [58884.67, 147321.67], [58884, 147322], [58883.67, 147322.67], [58883.66, 147322.69], [58883, 147323], [58883, 147323.5], [58882, 147324], [58882, 147324.5], [58881, 147325], [58881, 147326], [58880.67, 147326.67], [58880, 147327], [58880, 147327.5], [58879, 147328], [58878.67, 147328.67], [58878.66, 147328.69], [58878, 147329], [58878, 147330], [58876, 147331], [58876, 147332.5], [58875, 147333], [58875, 147333.5], [58874, 147334], [58874, 147336], [58873, 147337], [58873, 147338.5], [58872, 147339], [58872, 147340], [58872, 147341], [58871, 147342

In [35]:
ceren_output.columns

Index(['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name', 'polygon_coords',
       'point_coords', 'main_rater_annotation', 'rater1_annotation',
       'geojson_file'],
      dtype='object')

In [32]:
compute_kappa_score(ceren_output, "main_rater_class", "rater1_class")

201


0.5840741876149602

In [33]:
compute_kappa_score(ceren_output[ceren_output["rater1_class"]!="None"], "main_rater_class", "rater1_class")

179


0.7020236175414867

In [41]:
ceren_output["main_rater_class"].isna().sum()

0

In [43]:
vivek_output.columns

Index(['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name', 'polygon_coords',
       'point_coords', 'main_rater_annotation', 'rater1_annotation',
       'geojson_file'],
      dtype='object')

In [68]:
vivek_output_short = vivek_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
vivek_output_short.columns = ['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater2_index', 'rater2_class', 'rater2_object_name','geojson_file']

ceren_output_short = ceren_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]
ceren_output_short.columns = ['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater3_index', 'rater3_class', 'rater3_object_name','geojson_file']

In [69]:
max_output_short = max_output[['main_rater_index', 'main_rater_class', 'main_rater_object_name',
       'rater1_index', 'rater1_class', 'rater1_object_name','geojson_file']]

In [70]:
output_all = pd.merge(max_output_short,ceren_output_short, how='left', on=['main_rater_index', 'main_rater_class', 'main_rater_object_name','geojson_file'])

In [71]:
output_all

Unnamed: 0,main_rater_index,main_rater_class,main_rater_object_name,rater1_index,rater1_class,rater1_object_name,geojson_file,rater3_index,rater3_class,rater3_object_name
0,0,Coarse-Grained,13:Coarse-Grained,12,Coarse-Grained,6:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,13,Coarse-Grained,12:Coarse-Grained
1,1,Coarse-Grained,12:Coarse-Grained,13,Coarse-Grained,9:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,15,Coarse-Grained,14:Coarse-Grained
2,2,Coarse-Grained,15:Coarse-Grained,0,Coarse-Grained,4:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,0,Coarse-Grained,16:Coarse-Grained
3,3,Coarse-Grained,14:Coarse-Grained,4,Coarse-Grained,3:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,2,Coarse-Grained,23:Coarse-Grained
4,4,Coarse-Grained,17:Coarse-Grained,5,Cored,0:Cored,XE11-039_1_AmyB_1.mrxs.geojson,3,Cored,13:Cored
...,...,...,...,...,...,...,...,...,...,...
209,54,Coarse-Grained,42:Coarse-Grained,81,Coarse-Grained,79:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,
210,55,Diffuse,47:Diffuse,82,Diffuse,82:Diffuse,XE18-066_1_AmyB_1.mrxs.geojson,,,
211,56,Coarse-Grained,43:Coarse-Grained,77,Coarse-Grained,84:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,
212,57,Coarse-Grained,37:Coarse-Grained,86,Coarse-Grained,80:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,


In [72]:
output_all = pd.merge(output_all,vivek_output_short, how='left', on=['main_rater_index', 'main_rater_class', 'main_rater_object_name','geojson_file'])

In [73]:
output_all

Unnamed: 0,main_rater_index,main_rater_class,main_rater_object_name,rater1_index,rater1_class,rater1_object_name,geojson_file,rater3_index,rater3_class,rater3_object_name,rater2_index,rater2_class,rater2_object_name
0,0,Coarse-Grained,13:Coarse-Grained,12,Coarse-Grained,6:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,13,Coarse-Grained,12:Coarse-Grained,6,Coarse-Grained,9:Coarse-Grained
1,1,Coarse-Grained,12:Coarse-Grained,13,Coarse-Grained,9:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,15,Coarse-Grained,14:Coarse-Grained,7,Coarse-Grained,8:Coarse-Grained
2,2,Coarse-Grained,15:Coarse-Grained,0,Coarse-Grained,4:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,0,Coarse-Grained,16:Coarse-Grained,0,Coarse-Grained,11:Coarse-Grained
3,3,Coarse-Grained,14:Coarse-Grained,4,Coarse-Grained,3:Coarse-Grained,XE11-039_1_AmyB_1.mrxs.geojson,2,Coarse-Grained,23:Coarse-Grained,2,Coarse-Grained,10:Coarse-Grained
4,4,Coarse-Grained,17:Coarse-Grained,5,Cored,0:Cored,XE11-039_1_AmyB_1.mrxs.geojson,3,Cored,13:Cored,3,Cored,12:Cored
...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,54,Coarse-Grained,42:Coarse-Grained,81,Coarse-Grained,79:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,,38,Coarse-Grained,26:Coarse-Grained
210,55,Diffuse,47:Diffuse,82,Diffuse,82:Diffuse,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,
211,56,Coarse-Grained,43:Coarse-Grained,77,Coarse-Grained,84:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,
212,57,Coarse-Grained,37:Coarse-Grained,86,Coarse-Grained,80:Coarse-Grained,XE18-066_1_AmyB_1.mrxs.geojson,,,,,,


In [74]:
table = output_all[["main_rater_class","rater1_class","rater3_class","rater2_class"]]

In [75]:
dict1 = {"Coarse-Grained":1, "Diffuse":2, "Cored":3, "None":4}

In [76]:
table= table.fillna("None")

In [77]:
table["main_rater_class"]=table["main_rater_class"].apply(lambda l: dict1[l])
table["rater1_class"]=table["rater1_class"].apply(lambda l:  dict1[l])
table["rater3_class"]=table["rater3_class"].apply(lambda l:  dict1[l])
table["rater2_class"]=table["rater2_class"].apply(lambda l: dict1[l])

In [78]:
table

Unnamed: 0,main_rater_class,rater1_class,rater3_class,rater2_class
0,1,1,1,1
1,1,1,1,1
2,1,1,1,1
3,1,1,1,1
4,1,3,3,3
...,...,...,...,...
209,1,1,4,1
210,2,2,4,4
211,1,1,4,4
212,1,1,4,4


In [79]:
table =  table[~((table["main_rater_class"]==4) & (table["rater1_class"]==4) & (table["rater2_class"]==4) & (table["rater3_class"]==4))]

In [80]:
def fleiss_kappa1(lists, classes):
    n = len(lists)
    N = len(lists[0])
    k = len(classes)
    
    nij = []
    for i in range(N):
        nij.append([0]*k)
        
    
    for i in range(len(lists)):
        for j in range(len(lists[i])):
            nij[j][classes.index(lists[i][j])] += 1 
    
    P = []
    for i in nij:
        P.append(1/(n*(n-1))*(sum([j*j for j in i])-n))
    return (((sum(P)/N)-(sum([y*y for y in [x/(N*n) for x in[sum(i) for i in zip(*nij)]]])))/(1-sum([y*y for y in [x/(N*n) for x in[sum(i) for i in zip(*nij)]]]))+1)/2

In [81]:
rater1 = table["main_rater_class"].values
rater2 = table["rater1_class"].values
rater3 = table["rater2_class"].values
rater4 = table["rater3_class"].values

In [82]:
lists = [rater1, rater2, rater3,rater4]

In [83]:
kappa = fleiss_kappa1(lists,[1,2,3,4])

In [84]:
kappa

0.7472464319544686

In [81]:
cohen_kappa_score(rater1,rater2)

0.33514090699719445

In [82]:
cohen_kappa_score(rater1,rater3)

0.1870969473091062

In [83]:
cohen_kappa_score(rater1,rater4)

0.748604364532734