# Create a folder with the images of the testing dataset

In [61]:
import pandas as pd
df_sjd = pd.read_csv("csv_with_infos/df_patient_names_to_sjid.csv", index_col=0)

In [68]:
df_sjd.loc["patient9"]

patient_sjid    182-608-594
Name: patient9, dtype: object

In [8]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score

## Create the df (old, should be updated for the correct paths)

In [1]:
test_df = pd.read_csv("beton_files/test_data.csv")
test_indices = np.arange(0, len(test_df))

test_data_loader = Loader("beton_files/test_data.beton",
                batch_size=1,
                num_workers=os.cpu_count(),
                order=OrderOption.SEQUENTIAL,
                pipelines={
                  'images': [NDArrayDecoder(), ToTensor(), ToDevice("cuda", non_blocking=True)],
                  'patient_data': [NDArrayDecoder(), ToTensor(), ToDevice("cuda", non_blocking=True)],
                  'label': [BytesDecoder(), ToTensor(), ToDevice("cuda", non_blocking=True)]
                },
                batches_ahead=10)

NameError: name 'pd' is not defined

In [None]:
patient_idx_l = []
patient_mi_l = []

for idx, (images, _, label) in enumerate(test_data_loader):
    patient_name = "patient_"+str(idx)
    os.mkdir("test_dataset_img/"+patient_name)
    for i_artery, artery in enumerate(["lad", "lcx", "rca"]):
        for i_view, view in enumerate(["1", "2"]):
            img = images[0, i_artery, i_view, 0, :, :].cpu().numpy()
            plt.imsave("test_dataset_img/"+patient_name+"/"+artery+"_"+view+".png", img, cmap="gray")
    
    patient_idx_l.append(idx)
    patient_mi_l.append(label[0][-1].cpu().item())
    

In [21]:
df_mi = pd.DataFrame({'idx': patient_idx_l, 'mi': patient_mi_l})

In [22]:
df_mi.to_csv("test_dataset_img/df_mi.csv")

In [23]:
df_mi[df_mi["mi"]>0]

Unnamed: 0,idx,mi
21,21,1.0
35,35,1.0
43,43,1.0
44,44,1.0
58,58,1.0


In [24]:
test_df.iloc[21].patient_name

'patient30'

In [25]:
df_sjid = pd.read_csv("../cnn_and_patient_approach/df_patient_names_to_sjid.csv", index_col=0)
df_sjid.loc['patient30']

patient_sjid    546-157-601
Name: patient30, dtype: object

## Look at the physician prediction

In [23]:
medic_pred_df = pd.read_csv("../cnn_and_patient_approach/physician_prediction.csv", index_col=0) #

In [24]:
medic_pred_df.sample(3)

Unnamed: 0_level_0,MI (0/1)
Patient folder name,Unnamed: 1_level_1
patient_18,1
patient_7,1
patient_58,0


In [25]:
true_labels_df = pd.read_csv("../cnn_and_patient_approach/test_dataset_img/df_mi.csv", index_col=0)
true_labels_df["idx"] = "patient_"+ true_labels_df["idx"].astype(str)
true_labels_df = true_labels_df.set_index("idx")

In [26]:
true_labels_df.sample(3)

Unnamed: 0_level_0,mi
idx,Unnamed: 1_level_1
patient_38,0.0
patient_74,0.0
patient_40,0.0


In [27]:
merged = pd.merge(medic_pred_df, true_labels_df, left_index=True, right_index=True)

In [28]:
merged = merged.rename(columns={"MI (0/1)": "Medic Pred MI", "mi": "True MI"})

In [29]:
merged.sample(3)

Unnamed: 0,Medic Pred MI,True MI
patient_4,1,0.0
patient_74,1,0.0
patient_82,1,0.0


In [30]:
merged[merged["True MI"]==1]

Unnamed: 0,Medic Pred MI,True MI
patient_21,1,1.0
patient_35,1,1.0
patient_43,0,1.0
patient_44,0,1.0
patient_58,0,1.0


In [31]:
# remove some patients indicated by the doctor
merged = merged.drop(index=["patient_34", "patient_78", "patient_79"])

In [32]:
# percentage of true from doctor
medic_pred_df["MI (0/1)"].sum()/len(medic_pred_df["MI (0/1)"])

0.449438202247191

In [33]:
# true percentage of true 
merged["True MI"].sum()/len(merged["True MI"])

0.05813953488372093

In [34]:
f1_score(merged["True MI"], merged["Medic Pred MI"])

0.09523809523809523

In [35]:
recall_score(merged["True MI"], merged["Medic Pred MI"])

0.4

In [36]:
precision_score(merged["True MI"], merged["Medic Pred MI"])

0.05405405405405406

In [37]:
roc_auc_score(merged["True MI"], merged["Medic Pred MI"])

0.48395061728395067

In [38]:
# specificty
recall_score(merged["True MI"], merged["Medic Pred MI"], pos_label=0)

0.5679012345679012

## Random MI predictor

In [39]:
nb = len(merged)
nb_mi = merged["True MI"].sum()
nb_non_mi = nb - nb_mi
perc_mi = merged["True MI"].sum()/len(merged)

In [54]:
true_pos = nb_mi*perc_mi
false_pos = nb_non_mi*perc_mi
true_neg = nb_non_mi*(1-perc_mi)
false_neg = nb_mi*(1-perc_mi)

In [55]:
# f1 score
true_pos/(true_pos + 0.5*(false_pos+false_neg))

0.058139534883720936

In [56]:
# recall
true_pos/(true_pos+false_neg)

0.058139534883720936

In [57]:
# precision
true_pos/(true_pos+false_pos)

0.058139534883720936

In [58]:
# auc_roc
true_pos/(2*(true_pos+false_neg)) + true_neg/(2*(false_pos+true_neg))

0.5

In [59]:
# specificty
true_neg/(true_neg+false_pos)

0.9418604651162791

## Check the perf of the CNN

In [57]:
import os
import pandas as pd

import torch

from ffcv.loader import OrderOption
from ffcv.loader import Loader
from ffcv.fields.decoders import BytesDecoder, NDArrayDecoder
from ffcv.transforms import ToTensor, ToDevice

from datasets import NormalisePatientDate
from network import MiPredArteryLevel_Or_with_patient

In [58]:
def load_state_dict_pretrained_to_net(self, state_dict):
    # from https://discuss.pytorch.org/t/how-to-load-part-of-pre-trained-model/1113
 
    own_state = self.state_dict()
    for name, param in state_dict.items():
        if name not in own_state:
             continue
        if isinstance(param, torch.nn.parameter.Parameter):
            # backwards compatibility for serialized parameters
            param = param.data
        try:
            own_state[name].copy_(param)
        except RuntimeError as e:
            print("State dict {} from pretrained cannot be loaded into new network (network.py).\nError: {}".format(name, e))

In [66]:
train_configuration = {}
train_configuration["dropout"] = 0
train_configuration["nb_neur_per_hidden_layer_patient"] = [50,10]
train_configuration["dropout_patient_net"] = 0
train_configuration["batch_norm_patient"] = False
train_configuration["batch_size"] = 1

net = MiPredArteryLevel_Or_with_patient(train_configuration)
net = net.cuda()
load_state_dict_pretrained_to_net(net, torch.load("saved_networks/cnn_and_pat_test/6_0.26666666666666666.pt"))

# 6_0.26666666666666666.pt 29_0.16000000000000003.pt

Using cache found in /home/jovyan/.cache/torch/hub/pytorch_vision_v0.10.0
Using cache found in /home/jovyan/.cache/torch/hub/pytorch_vision_v0.10.0
Using cache found in /home/jovyan/.cache/torch/hub/pytorch_vision_v0.10.0


In [67]:
data_loader = Loader("beton_files/test_data.beton",
    batch_size=1,
    num_workers=os.cpu_count(),
    order=OrderOption.SEQUENTIAL,
    pipelines={
      'images': [NDArrayDecoder(), ToTensor(), ToDevice("cuda", non_blocking=True)],
      'patient_data': [NDArrayDecoder(), ToTensor(), NormalisePatientDate(True), ToDevice("cuda", non_blocking=True)],
      'label': [BytesDecoder(), ToTensor(), ToDevice("cuda", non_blocking=True)]
    },    
    batches_ahead=10,
    recompile=True
)

# df_ffcv = pd.read_csv("beton_files/test_data.csv")
# df_to_sjid = pd.read_csv("df_patient_names_to_sjid.csv")
# df_to_sjid["patient_name"] = df_to_sjid["patient_name"].str[7:].astype(int)
# df_to_sjid = df_to_sjid.set_index(["patient_name"])

label_l = []
pred_l = []

for idx, (data, patient_data, target) in enumerate(data_loader):
    print(idx)
    if idx != 34 and idx != 78 and idx != 79: # remove patient not considered by doctor
        # patient_true_idx = df_ffcv.iloc[idx]
        # patient_true_idx = int(patient_true_idx["patient_name"][7:])
        # sjid = df_to_sjid.loc[patient_true_idx].values
    
        data, patient_data = data.cuda(), patient_data.cuda()
        pred = net(data, patient_data)
    
        label_l.append(target[0][-1])
        pred_l.append((pred[0][0]>0.5).cpu().numpy()[0])
        
        del data, patient_data
        

Exception ignored in: <finalize object at 0x7f36045049a0; dead>
Traceback (most recent call last):
  File "/opt/conda/envs/ffcv/lib/python3.9/weakref.py", line 591, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/opt/conda/envs/ffcv/lib/python3.9/site-packages/numba/core/dispatcher.py", line 312, in finalizer
    for cres in overloads.values():
KeyError: (array(uint8, 1d, C), array(uint8, 1d, C))
Exception ignored in: <finalize object at 0x7f3604504620; dead>
Traceback (most recent call last):
  File "/opt/conda/envs/ffcv/lib/python3.9/weakref.py", line 591, in __call__
    return info.func(*info.args, **(info.kwargs or {}))
  File "/opt/conda/envs/ffcv/lib/python3.9/site-packages/numba/core/dispatcher.py", line 312, in finalizer
    for cres in overloads.values():
KeyError: (array(uint8, 1d, C), array(uint8, 1d, C))


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88


### For best epoch

In [68]:
f1_score(merged["True MI"], pred_l)

0.1818181818181818

In [69]:
recall_score(merged["True MI"], pred_l)

0.2

In [70]:
precision_score(merged["True MI"], pred_l)

0.16666666666666666

In [71]:
roc_auc_score(merged["True MI"], pred_l)

0.5691358024691358

In [72]:
# specificity
recall_score(merged["True MI"], pred_l, pos_label=0)

0.9382716049382716

### For last epoch

In [61]:
f1_score(merged["True MI"], pred_l)

0.16666666666666666

In [62]:
recall_score(merged["True MI"], pred_l)

0.6

In [63]:
precision_score(merged["True MI"], pred_l)

0.0967741935483871

In [64]:
roc_auc_score(merged["True MI"], pred_l)

0.6271604938271605

In [65]:
# specificity
recall_score(merged["True MI"], pred_l, pos_label=0)

0.654320987654321

## Check the perfo of the transformer

In [1]:
from networks_implementation.transfo_and_pat_softmax_net import PatientLevelDNN_PatientDataSoftmax
import torch
from datasets_implementation.transfo_datasets import get_data_loaders
from init_net import init_net

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_config = {
# General
    # General
    "seed": 42, # seed to use to enforce the training-testing set separation
    "nb_cv": None, # if not using cross validation -> None, else specify the number of cross validation to use
    "save_best_net": True, # if true will save at each new F1_valid best score and at the end
    "load_network": None, # path to the network
    
    # Network structure
    "network_class": PatientLevelDNN_PatientDataSoftmax, # pytorch class to construct the network
    "dropout": 0.0 , # dropout for the main network
    "weights_init": "Xavier Uniform", # "Xavier Uniform", "Xavier Normal", "Kaiming Uniform" and "Kaiming Normal"
    
    # Transformer specific parameters
    "patch_size_l" : [64, 64, 64, 64], # list of the size of the patch extract for each kind of box (! for the network, size has to be same for all)
    "nb_patch_l": [32, 64, 128, 128], # list of the number of patches to extract for each kind of box (values can be different)
    
    # Patient network specific parameters (! also if patient network used inside of another network !)
    "dropout_patient_net": 0.0, # dropout for the patient network
    "nb_neur_per_hidden_layer_patient": [50, 10], # list with the number of neurons for each hidden layer
    "batch_norm_patient": False, # True or False
    "weights_init_patient": "Kaiming Uniform", # "Std", "Xavier Uniform", "Xavier Normal", "Kaiming Uniform" and "Kaiming Normal"
    
    # Dataset information
    "dataset_type": "transformer_patient", # "CNN", "transformer", "CNN_patient" and "transformer_patient"
    "balance_method": "oversample", # "no", "oversample" and "undersample"
    "train_test_ratio": 0.2, # ratio to use in testing vs in training (also in validation vs in training)
    "test": True, # False: remove test data and then separate train and valid. True: separate test and train (impossible to use CV)
    "normalise": False, # True or False (on images)
    "gaussian_blur": None, # probability 
    "random_rotation": 0.1, # probability
    "random_crop": 0.1, # probability
    # "random_color_modifs": 0.2, # for CNN, probability
    "patch_randomness": 0.1, # for transformers, probability to NOT take a sample on centerline
    "normalise_patient": True, # for patient network, normalise or not the data, True or False
    
    # Define training
    "n_epochs": 50, 
    "batch_size": 4, 
    "change_strategy_epoch_l": [-1], # list indicating at which epoch change the optimizer, loss and lr, start with -1 for the first one
    
    # Define the optimiser
    "optimizer_l": ["SGD"], # list of optimizer to use, "SGD", "Adam", "PESG", "PDSCA"
    "weight_decay": 0.00634788540665404,
    "lr_l": [0.0004493489445028419], # list of the lr
    "SGD_momentum": 0.6566222749612993 , # HP of the SGD
    # "PESG_gamma": None, # HP of the PSEG
    # "PESG_margin": None, # HP of the PSEG
    # "PESG_imratio": None, # HP of the PSEG
    # "Compo_gamma": None, # HP of the PDSCA
    # "Compo_margin": None, # HP of the PDSCA
    # "Compo_imratio": None, # HP of the PDSCA
    # "Compo_beta1": None, # HP of the PDSCA
    # "Compo_beta2": None, # HP of the PDSCA
    
    # Define the scheduler
    "scheduler_patience": 5, # nb of epochs without improvement before reducing lr
    "scheduler_factor": 0.1, # how much to reduce the lr when plateau (lr*=scheduler_factor)
    
    # Define the loss
    "criterion_l": ["BCE"], # list of optimizer to use, "BCE", "AUC", "Focal"
    "siamese_pred_loss_ratio": 7.399590987258929e-05, # ratio btw artery MI prediction and global MI prediction losses
    "arteries_pred_loss_ratio": 0.2071422613609661, # ratio btw siamese loss and global MI predicition losses
    "patient_data_loss_ratio": 0.09457471894183037, # ratio btw patient data prediction loss and global MI predicition losses
    # "focal_alpha": None, # HP of the focal loss
    # "focal_gamma": None, # HP of the focal loss
    # "focal_reduction": None, # HP of the focal loss
}

In [14]:
net, criterion, scheduler, optimizer = init_net(train_config)
net = net.cuda()
net.load_state_dict(torch.load("saved_networks/transfo_and_pat_test/49_0.3157894736842105.pt")) # 16_0.41379310344827586.pt

Nb of parameters is 25272638


<All keys matched successfully>

In [15]:
valid_data_loader, test_data_loader = get_data_loaders(train_config, None)

Nb of elements in train_data_loader 372
Nb of MI in train data 29
Nb of elements in valid_data_loader 96
Nb of MI in valid data 11


In [16]:
truth_l = []
pred_l = []

net = net.eval()

for batch_idx, batch in enumerate(test_data_loader):
    (data, patient_data, target, available_arteries) = batch 
    patient_data = patient_data.cuda()
    data[0][0], data[0][1], data[1][0], data[1][1], data[2][0], data[2][1] = data[0][0].cuda(), data[0][1].cuda(), data[1][0].cuda(), data[1][1].cuda(), data[2][0].cuda(), data[2][1].cuda()
    
    mi_target = target[:, -1].cpu().tolist()
    print(mi_target)
    
    pred = net(data, patient_data)
    pred = (pred[0]>0.5).cpu().tolist()
    try:
        pred = [pred[0][0], pred[1][0], pred[2][0], pred[3][0]]
    except:
        pred = [pred[0][0], pred[1][0]]
    print(pred)
    
    truth_l += mi_target
    pred_l += pred
    
    del patient_data, data, target, available_arteries, batch, pred
    torch.cuda.empty_cache()

[0.0, 0.0, 0.0, 0.0]
[False, False, False, True]
[0.0, 0.0, 1.0, 0.0]
[False, False, True, False]
[0.0, 0.0, 0.0, 0.0]
[False, False, True, False]
[1.0, 0.0, 0.0, 0.0]
[True, False, False, False]
[0.0, 0.0, 0.0, 0.0]
[True, False, False, False]
[0.0, 0.0, 0.0, 0.0]
[False, False, False, False]
[0.0, 0.0, 0.0, 0.0]
[False, False, True, True]
[0.0, 0.0, 0.0, 0.0]
[False, True, False, False]
[0.0, 0.0, 0.0, 0.0]
[False, True, False, False]
[0.0, 0.0, 0.0, 0.0]
[False, False, False, False]
[0.0, 0.0, 1.0, 0.0]
[False, False, True, False]
[0.0, 0.0, 0.0, 0.0]
[False, True, False, False]
[0.0, 0.0, 0.0, 0.0]
[False, True, False, False]
[0.0, 0.0, 0.0, 0.0]
[True, False, False, False]
[0.0, 1.0, 0.0, 0.0]
[False, False, True, False]
[1.0, 0.0, 0.0, 0.0]
[False, False, False, False]
[0.0, 1.0, 1.0, 0.0]
[False, False, True, False]
[0.0, 0.0, 0.0, 0.0]
[False, False, True, False]
[0.0, 1.0, 0.0, 0.0]
[False, False, False, True]
[0.0, 0.0, 0.0, 1.0]
[True, False, False, False]
[0.0, 0.0, 1.0, 0.

###  on best epoch

In [17]:
f1_score(truth_l, pred_l)

0.2352941176470588

In [18]:
recall_score(truth_l, pred_l)

0.36363636363636365

In [19]:
precision_score(truth_l, pred_l)

0.17391304347826086

In [20]:
roc_auc_score(truth_l, pred_l)

0.5673603504928807

In [21]:
# specificity
recall_score(truth_l, pred_l, pos_label=0)

0.7710843373493976

### on last epoch

In [9]:
f1_score(truth_l, pred_l)

0.411764705882353

In [10]:
recall_score(truth_l, pred_l)

0.6363636363636364

In [11]:
precision_score(truth_l, pred_l)

0.30434782608695654

In [12]:
roc_auc_score(truth_l, pred_l)

0.7217962760131434

In [13]:
# specificity
recall_score(truth_l, pred_l, pos_label=0)

0.8072289156626506

--------