In [1]:
import numpy as np
import random
import pandas as pd
from tqdm import tqdm 
import time

from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score, accuracy_score
from sklearn.model_selection import train_test_split

import sktime

import matplotlib.pyplot as plt

In [2]:
def tnr_score(y_test, y_pred):
    y_t = np.array(y_test)
    y_p = np.array(y_pred)
    tn = np.sum((1-y_t)*(1-y_p))
    fp = np.sum(y_p*(1-y_t))
    if (tn + fp) == 0:
        return 0
    else:
        return tn / (tn + fp)

In [3]:
def prepareData(dataGroup, id_list, window_time):
    if dataGroup == "dataMimic":
        dataPath = "../Mimic/dataMimic/"
    elif dataGroup == "dataECMO":
        dataPath = "../dataECMO/"
    else:
        dataPath = "../dataRea/"

    finalDataPath = dataPath + "finalData/"

    data = []

    for encounterId in tqdm(id_list, total=len(id_list)):
        
        df_mask = pd.read_parquet(finalDataPath + encounterId + "/mask.parquet")
        df_dynamic = pd.read_parquet(finalDataPath + encounterId + "/dynamic.parquet")
        df_static = pd.read_parquet(finalDataPath + encounterId + "/static.parquet")

        # 0 --> HR
        # 1 --> SpO2
        # 2 --> PAD
        # 3 --> PAM
        # 4 --> PAS
        # 5 --> RR
        # 6 --> Temperature
        # 7 --> Diuresis
        # 8 --> SpO2/FiO2
        # 9 --> FiO2
        # 10 --> ECMO flow
        
        # idx_variables_kept = [0,1,3,4,6,7]
        if dataGroup == "dataECMO":
            # idx_variables_kept = [0,1,2,3,4,5,6,7,8,9]
            idx_variables_kept = [0,1,2,3,4,5,6,7,8]
        else:
            idx_variables_kept = [0,1,2,3,4,5,6,7,8]

        data_patient = df_dynamic.iloc[:(window_time*24), idx_variables_kept].to_numpy()
        
        for value in df_static.to_numpy()[0]:
            new_column = np.ones(shape=(window_time*24,1)) * value
            data_patient = np.append(data_patient, new_column, axis=1)
        
        # df_dynamic_masked = df_dynamic.iloc[:(window_time*24)].mask(df_mask.iloc[:(window_time*24)] == 0)

        # # idx_variables_kept = [0,1,3,4,6,7]
        # idx_variables_kept = list(range(0,10))
        # df_dynamic_masked = df_dynamic_masked.iloc[:,idx_variables_kept]
        # df_dynamic = df_dynamic.iloc[:,idx_variables_kept]


        data.append(data_patient)
    
    return np.array(data)


def prepareDeathList(dataGroup, window_time):
    target = []
    id_list = []
    
    if dataGroup == "dataMimic":
        dataPath = "../Mimic/dataMimic/"
        patients_df = pd.read_csv(dataPath + "ventiles.csv")

        nb_patients = len(patients_df)
    
        for index, row in tqdm(patients_df.iterrows(), total=nb_patients):
            stay_id = str(row["stay_id"])

            df_mask = pd.read_parquet(dataPath + "finalData/" + stay_id + "/mask.parquet")
            total_true_values = df_mask.values.sum()
            total_values = df_mask.values.size
            percentageMissingValues = (total_values-total_true_values)/total_values * 100

            if percentageMissingValues < 40:
                id_list.append(stay_id)
                
                label_death = int(row["label"])
                target.append(label_death)
            
        
        return target, id_list
    
    if dataGroup == "dataECMO":
        dataPath = "../dataECMO/"
    elif dataGroup == "dataRangueil":
        dataPath = "../dataRea/"

    
    patients_df = pd.read_parquet(dataPath + "patients.parquet")

    df_death = pd.read_csv(dataPath + "delais_deces.csv")
    
    nb_patients = len(patients_df)

    for _, row in tqdm(patients_df.iterrows(), total=nb_patients):
        encounterId = str(row["encounterId"])
        
        df_mask = pd.read_parquet(dataPath + "finalData/" + encounterId + "/mask.parquet")
        total_true_values = df_mask.values.sum()
        total_values = df_mask.values.size
        percentageMissingValues = (total_values-total_true_values)/total_values * 100
        
        withdrawal_date = pd.Timestamp(row["withdrawal_date"])
        installation_date = pd.Timestamp(row["installation_date"])
        total_time_hour = (withdrawal_date - installation_date).total_seconds() / 3600 + 4

        if total_time_hour >= window_time * 24 and percentageMissingValues < 40:
            id_list.append(encounterId)
            
            delai_sortie_deces = df_death.loc[df_death["encounterId"] == int(encounterId), "delai_sortie_deces"].to_numpy()[0]
            if delai_sortie_deces <= 1:
                target.append(1)
            else:
                target.append(0)
    
    return target, id_list

In [4]:
target_ECMO, id_list_ECMO = prepareDeathList("dataECMO", window_time=5)
data_ECMO = prepareData("dataECMO", id_list_ECMO, window_time=5)

print(f"ECMO dataset size: {len(target_ECMO)} , num_deceased: {np.sum(target_ECMO)}")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 189/189 [00:00<00:00, 400.93it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 153/153 [00:00<00:00, 164.16it/s]

ECMO dataset size: 153 , num_deceased: 55





In [5]:
target_Rangueil, id_list_Rangueil =  prepareDeathList("dataRangueil", window_time=5)
data_Rangueil = prepareData("dataRangueil", id_list_Rangueil, window_time=5)

print(f"Rea dataset size: {len(target_Rangueil)} , num_deceased: {np.sum(target_Rangueil)}")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2052/2052 [00:05<00:00, 399.25it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1697/1697 [00:10<00:00, 166.21it/s]


Rea dataset size: 1697 , num_deceased: 445


In [6]:
target_Mimic, id_list_Mimic = prepareDeathList("dataMimic", window_time=5)
data_Mimic = prepareData("dataMimic", id_list_Mimic, window_time=5)

print(f"Mimic dataset size: {len(target_Mimic)} , num_deceased: {np.sum(target_Mimic)}")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4265/4265 [00:09<00:00, 459.16it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4140/4140 [00:25<00:00, 164.77it/s]

Mimic dataset size: 4140 , num_deceased: 1049





In [7]:
target_MR = target_Mimic + target_Rangueil
id_list = id_list_Mimic + id_list_Rangueil

data_MR = np.concatenate((data_Mimic,data_Rangueil),axis=0)

print(f"dataset size mimic+rea: {len(target_MR)} , num_deceased: {np.sum(target_MR)}")

dataset size mimic+rea: 5837 , num_deceased: 1494


In [19]:
from sktime.classification.hybrid import HIVECOTEV2

data = data_ECMO
target = target_ECMO

test_activated = True
validation_activated = False

num_samples = len(target)
K = 10
sub_samples_size = num_samples//K

test_fold = 3
if not(test_activated):
    test_fold = -1
    
if validation_activated:
    val_fold = random.randint(0,9)
    while val_fold == test_fold:
        val_fold = random.randint(0,9)
else:
    val_fold = None

x_train = np.empty((0,np.shape(data)[1], np.shape(data)[2]))
y_train = []
x_test = []
y_test = []
for i in range(K):
    if i != (K-1): 
        current_target_fold = target[i*sub_samples_size:(i+1)*sub_samples_size]
        current_data_fold = data[i*sub_samples_size:(i+1)*sub_samples_size]
    else:
        current_target_fold = target[i*sub_samples_size:]
        current_data_fold = data[i*sub_samples_size:]

    if i == test_fold:
        x_test = current_data_fold
        y_test = current_target_fold
    elif i == val_fold:
        x_val = current_data_fold
        y_val = current_target_fold
    else:
        x_train = np.concatenate((x_train, current_data_fold))
        y_train = y_train + current_target_fold

y_train = np.array(y_train)

clf = HIVECOTEV2(verbose=2)

clf.fit(x_train, y_train)

STC  00:13:22 20/06/2024
STC train estimate  00:13:34 20/06/2024
STC weight = 0.4822530864197532


  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


DrCIF  01:03:47 20/06/2024
DrCIF train estimate  01:07:39 20/06/2024
DrCIF weight = 0.18090630036342067
Arsenal  01:07:47 20/06/2024
Arsenal train estimate  01:07:51 20/06/2024
Arsenal weight = 0.19753086419753083
TDE  03:09:29 20/06/2024
TDE train estimate  03:09:29 20/06/2024
TDE weight = 0.8627604300740265


In [20]:
aurocs = []
precisions = []
recalls = []
tnrs = []
f1s = []
accuracies = []

y_pred_proba = clf.predict_proba(x_test)[:, 1]
y_pred = clf.predict(x_test)

print(y_pred_proba)
print(y_test)

auroc = roc_auc_score(y_test, y_pred_proba)
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
tnr = tnr_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, zero_division=0)
accuracy = accuracy_score(y_test, y_pred)

aurocs.append(auroc)
precisions.append(precision)
recalls.append(recall)
tnrs.append(tnr)
f1s.append(f1)
accuracies.append(accuracy)


print(f"AUROC: {np.mean(aurocs):.4f}")
print(f"Precision: {np.mean(precisions):.4f}")
print(f"Recall: {np.mean(recalls):.4f}")
print(f"Specificity: {np.mean(tnrs):.4f}")
print(f"Accuracy: {np.mean(accuracies):.4f}")
print(f"F1 Score: {np.mean(f1s):.4f}")

  warn(
  warn(


[0.50634943 0.55237927 0.17266451 0.33259745 0.19452021 0.14387742
 0.77276349 0.75183692 0.45452639 0.35226405 0.19096118 0.51585911
 0.37446066 0.32758509 0.33062681]
[1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]
AUROC: 0.4800
Precision: 0.3333
Recall: 0.2000
Specificity: 0.8000
Accuracy: 0.6000
F1 Score: 0.2500


In [14]:
aurocs = []
precisions = []
recalls = []
tnrs = []
f1s = []
accuracies = []

y_pred_proba = clf.predict_proba(data_Rangueil)[:, 1]
y_pred = clf.predict(data_Rangueil)

# print(y_pred_proba)
# print(target)

auroc = roc_auc_score(target_Rangueil, y_pred_proba)
precision = precision_score(target_Rangueil, y_pred, zero_division=0)
recall = recall_score(target_Rangueil, y_pred, zero_division=0)
tnr = tnr_score(target_Rangueil, y_pred)
f1 = f1_score(target_Rangueil, y_pred, zero_division=0)
accuracy = accuracy_score(target_Rangueil, y_pred)

aurocs.append(auroc)
precisions.append(precision)
recalls.append(recall)
tnrs.append(tnr)
f1s.append(f1)
accuracies.append(accuracy)


print(f"AUROC: {np.mean(aurocs):.4f}")
print(f"Precision: {np.mean(precisions):.4f}")
print(f"Recall: {np.mean(recalls):.4f}")
print(f"Specificity: {np.mean(tnrs):.4f}")
print(f"Accuracy: {np.mean(accuracies):.4f}")
print(f"F1 Score: {np.mean(f1s):.4f}")

  warn(
  warn(


AUROC: 0.5955
Precision: 0.3618
Recall: 0.2472
Specificity: 0.8450
Accuracy: 0.6883
F1 Score: 0.2937


In [None]:
num_train = 1
K = 10

save_path = "saved_models/test.pth"
load_path = "saved_models/cnn_1d_76_76.pth"


best_val_auroc_all_models = 0
best_model_state_dict = 0

aurocs_mean = []
precisions_mean = []
recalls_mean = []
tnrs_mean = []
accuracies_mean = []
f1s_mean = []   

for i in tqdm(range(num_train), total=num_train):
    
    aurocs = []
    precisions = []
    recalls = []
    tnrs = []
    accuracies = []
    f1s = []    
    
    for j in tqdm(range(1)):
        auroc, precision, recall, tnr, f1, accuracy, HL_score, best_val_auroc, state_dict  =  train_model(data=data_MR,
                                                                                    target=target_MR,
                                                                                    test_fold = j,
                                                                                    validation_activated=True,
                                                                                    test_activated = True,
                                                                                    num_epochs=50, 
                                                                                    model_name="InceptionTime",
                                                                                    verbose=True, 
                                                                                    save_path=save_path, 
                                                                                    save_model=True, 
                                                                                    plot_train_curves = True,
                                                                                    load_model_path = None)
        
        aurocs.append(auroc)
        precisions.append(precision)
        recalls.append(recall)
        tnrs.append(tnr)
        f1s.append(f1)
        accuracies.append(accuracy)
        
        print(f"test AUROC: {auroc:.4f}")
        print(f"test Precision: {precision:.4f}")
        print(f"test Recall: {recall:.4f}")
        print(f"test Specificity: {tnr:.4f}")
        print(f"test Accuracy: {accuracy:.4f}")
        print(f"test F1 Score: {f1:.4f}")
        print(f"test HL Score: {HL_score:.4f}")
        # if best_val_auroc >= best_val_auroc_all_models:
        #     aurocs = [auroc]
        #     precisions = [precision]
        #     recalls = [recall]
        #     tnrs = [tnr]
        #     accuracies = [accuracy]
        #     f1s = [f1]
        #     best_val_auroc_all_models = best_val_auroc
        #     best_model_state_dict = state_dict
            
            # print(f"New best val_auroc: {best_val_auroc_all_models}")
    
        # print(f"Test AUROC with best model: {np.mean(aurocs):.4f}")
    
    print(f"Mean AUROC: {np.mean(aurocs):.4f}")
    print(f"Mean Precision: {np.mean(precisions):.4f}")
    print(f"Mean Recall: {np.mean(recalls):.4f}")
    print(f"Mean Specificity: {np.mean(tnrs):.4f}")
    print(f"Mean Accuracy: {np.mean(accuracies):.4f}")
    print(f"Mean F1 Score: {np.mean(f1s):.4f}")
    print(f"SD Auroc: {np.std(aurocs):.4f}")

    aurocs_mean.append(np.mean(aurocs))
    precisions_mean.append(np.mean(precisions))
    recalls_mean.append(np.mean(recalls))
    tnrs_mean.append(np.mean(tnrs))
    accuracies_mean.append(np.mean(accuracies))
    f1s_mean.append(np.mean(f1s))
# print(f"best val_auroc: {best_val_auroc_all_models:.4f}")
# print(f"num_algos: {np.size(aurocs)}")

print(f"Total Mean AUROC: {np.mean(aurocs_mean):.4f}")
print(f"Total Mean Precision: {np.mean(precisions_mean):.4f}")
print(f"Total Mean Recall: {np.mean(recalls_mean):.4f}")
print(f"Total Mean Specificity: {np.mean(tnrs_mean):.4f}")
print(f"Total Mean Accuracy: {np.mean(accuracies_mean):.4f}")
print(f"Total Mean F1 Score: {np.mean(f1s_mean):.4f}")

In [None]:
num_features = np.size(data_ECMO,2)
num_features_dynamic = 9
num_features_static = 3

input_size = 1
hidden_size = 16
num_layers = 2
output_size = 1

# model = LSTMModel2(input_size, hidden_size, num_layers, output_size, num_features_dynamic, num_features_static)
# model = CNN_1D_1(num_features)
model = InceptionModel(num_blocks=2, in_channels=12, out_channels=16,
                           bottleneck_channels=2, kernel_sizes=41, 
                           use_residuals=True, num_pred_classes=1)

save_path = "./saved_models/test.pth"
model_state_dict = torch.load(save_path)
model.load_state_dict(model_state_dict)
model.eval()

# Convert data to PyTorch tensors
x_test_tensor = torch.tensor(data_ECMO, dtype=torch.float32)
y_test_tensor = torch.tensor(target_ECMO, dtype=torch.float32)

# Create DataLoader for training and testing sets
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

true_labels = []
predictions = []
predictions_binary = []

treshold = 0.5

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.permute(0, 2, 1)
        true_labels.extend(labels.numpy())

        outputs = nn.functional.sigmoid(model(inputs))
        predictions.extend(outputs.numpy())
        predictions_binary.extend((outputs.numpy() > treshold).astype(int))
        
        # print(np.round(np.array([p[0] for p in outputs.numpy()]), 1))
        # print(np.round(np.array(labels), 1))
auroc = roc_auc_score(true_labels, predictions)
precision = precision_score(true_labels, predictions_binary, zero_division=0)
recall = recall_score(true_labels, predictions_binary, zero_division=0)
tnr = tnr_score(true_labels, predictions_binary)
f1 = f1_score(true_labels, predictions_binary, zero_division=0)
accuracy = accuracy_score(true_labels, predictions_binary)

print(f"AUROC: {auroc:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Specificity: {tnr:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")

print(np.array(predictions_binary).squeeze())
print(np.array(true_labels).astype(int))

In [None]:
1/2*(0.6182+0.5565)