In [1]:
import os, sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

os.chdir("..")

import numpy as np
import pandas as pd
import sklearn.metrics as skm

In [2]:
preds = pd.read_csv("input/preds_by_case.csv")
preds.shape

(485, 7)

In [3]:
targets = pd.read_csv("input/reads_ICHonly.csv").sort_values(by="name", ignore_index=True).drop(columns="Category")
print(targets.shape)
targets.head()

(491, 19)


Unnamed: 0,name,R1:ICH,R1:IPH,R1:IVH,R1:SDH,R1:EDH,R1:SAH,R2:ICH,R2:IPH,R2:IVH,R2:SDH,R2:EDH,R2:SAH,R3:ICH,R3:IPH,R3:IVH,R3:SDH,R3:EDH,R3:SAH
0,CQ500-CT-0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
1,CQ500-CT-1,1,1,0,0,0,1,1,1,1,0,0,1,1,1,0,0,0,1
2,CQ500-CT-10,1,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0
3,CQ500-CT-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,CQ500-CT-101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [4]:
filt = preds["PatientID"].isin(targets["name"])
preds["PatientID"][~filt]

Series([], Name: PatientID, dtype: object)

In [5]:
preds[filt].shape

(485, 7)

In [6]:
filt = targets["name"].isin(preds["PatientID"])
targets = targets[filt]
targets.shape

(485, 19)

In [7]:
label_map = {
    "ICH": "any",
    "EDH": "epidural",
    "SDH": "subdural",
    "SAH": "subarachnoid",
    "IVH": "intraventricular",
    "IPH": "intraparenchymal"
}

def consensus_score(row):
    for k, v in label_map.items():
        score = row[[f'R1:{k}', f'R2:{k}', f'R3:{k}']].sum()
        row[v] = 1 if score >= 2 else 0
    return row


In [8]:
targs = targets.apply(consensus_score, axis=1).drop(columns=targets.columns[1:19]).rename(columns={"name": "PatientID"})
targs.head()

Unnamed: 0,PatientID,any,epidural,subdural,subarachnoid,intraventricular,intraparenchymal
0,CQ500-CT-0,0,0,0,0,0,0
1,CQ500-CT-1,1,0,0,1,0,1
2,CQ500-CT-10,1,0,0,0,0,1
3,CQ500-CT-100,0,0,0,0,0,0
4,CQ500-CT-101,0,0,0,0,0,0


In [9]:
preds.head()

Unnamed: 0,PatientID,any,epidural,subdural,subarachnoid,intraventricular,intraparenchymal
0,CQ500-CT-0,0,0,0,0,0,0
1,CQ500-CT-1,1,0,0,1,1,1
2,CQ500-CT-10,1,0,0,0,0,1
3,CQ500-CT-100,0,0,0,0,0,0
4,CQ500-CT-101,0,0,0,0,0,0


In [12]:
assert list(preds["PatientID"]) == list(targs["PatientID"])

In [13]:
skm.accuracy_score(preds["any"], targs["any"])

0.8907216494845361

In [14]:
skm.accuracy_score(preds["epidural"], targs["epidural"])

0.9835051546391752

In [15]:
skm.accuracy_score(preds["subdural"], targs["subdural"])

0.8989690721649485

In [16]:
skm.accuracy_score(preds["subarachnoid"], targs["subarachnoid"])

0.9298969072164949

In [17]:
skm.accuracy_score(preds["intraventricular"], targs["intraventricular"])

0.9443298969072165

In [18]:
skm.accuracy_score(preds["intraparenchymal"], targs["intraparenchymal"])

0.9257731958762887

In [23]:
df = pd.merge(preds, targs, on="PatientID", suffixes=["_pred", "_targ"])
df.head()

Unnamed: 0,PatientID,any_pred,epidural_pred,subdural_pred,subarachnoid_pred,intraventricular_pred,intraparenchymal_pred,any_targ,epidural_targ,subdural_targ,subarachnoid_targ,intraventricular_targ,intraparenchymal_targ
0,CQ500-CT-0,0,0,0,0,0,0,0,0,0,0,0,0
1,CQ500-CT-1,1,0,0,1,1,1,1,0,0,1,0,1
2,CQ500-CT-10,1,0,0,0,0,1,1,0,0,0,0,1
3,CQ500-CT-100,0,0,0,0,0,0,0,0,0,0,0,0
4,CQ500-CT-101,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
filt = df["any_pred"] != df["any_targ"]
discord = df[filt]
print(discord.shape)
discord.head()

(53, 13)


Unnamed: 0,PatientID,any_pred,epidural_pred,subdural_pred,subarachnoid_pred,intraventricular_pred,intraparenchymal_pred,any_targ,epidural_targ,subdural_targ,subarachnoid_targ,intraventricular_targ,intraparenchymal_targ
5,CQ500-CT-102,0,0,0,0,0,0,1,0,0,0,0,1
48,CQ500-CT-143,1,0,0,0,0,1,0,0,0,0,0,0
49,CQ500-CT-144,0,0,0,0,0,0,1,0,0,0,0,1
59,CQ500-CT-153,1,0,1,0,0,1,0,0,0,0,0,0
66,CQ500-CT-16,1,0,0,0,0,0,0,0,0,0,0,0
