## Load data

In [1]:
from dataAnalysis.DataAnalysis import DataAnalysis
import pandas as pd

data = pd.read_csv(r"extdata/sbcdata.csv", header=0)
data_analysis = DataAnalysis(data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['Label'] = self.data['Diagnosis']


Training: 
Assessable data are 528101 cases and 1015074 CBCs
Control data are 527038 cases and 1013548 CBCs
Sepsis data are 1488 cases and 1526 CBCs
$$$$$$$$$$$$$$$$$$$$
Testing: 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['Label'] = self.data['Diagnosis']


Controls: 365794, Sepsis: 490
Assessable data are 180494 cases and 366284 CBCs
Control data are 180157 cases and 365794 CBCs
Sepsis data are 472 cases and 490 CBCs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['Label'] = self.data['Diagnosis']


Controls: 437629, Sepsis: 448
Assessable data are 157922 cases and 438077 CBCs
Control data are 180157 cases and 437629 CBCs
Sepsis data are 438 cases and 448 CBCs


In [2]:
import torch

y_train = torch.tensor(data_analysis.get_y_train(), dtype=torch.long)
X_train = torch.tensor(data_analysis.get_X_train(), dtype=torch.float)

y_test = torch.tensor(data_analysis.get_y_test(), dtype=torch.long)
X_test = torch.tensor(data_analysis.get_X_test(), dtype=torch.float)

y_gw_test = torch.tensor(data_analysis.get_y_gw(), dtype=torch.long)
X_gw_test = torch.tensor(data_analysis.get_X_gw(), dtype=torch.float)

## Normalize data

In [3]:
def normalize(tensor):
    mean = torch.mean(tensor, dim = 0)
    std = torch.std(tensor, dim = 0)
    mean_diff = tensor - mean
    return mean_diff / std

X_train = normalize(X_train)
X_test = normalize(X_test)
X_gw_test = normalize(X_gw_test)

## Train/Validation split

In [4]:
def true_indices_like(tensor):
    return torch.ones((tensor.shape[0])).type(torch.bool)

def false_indices_like(tensor):
    return torch.zeros((tensor.shape[0])).type(torch.bool)

def split(train_features):
    tensor = true_indices_like(train_features)
    max_index = round(tensor.shape[0] * 0.8)
    train = torch.zeros(tensor.shape[0])
    train[:max_index] = 1
    
    val = torch.zeros(tensor.shape[0])
    val[max_index:] = 1
    return{
        "train": train.type(torch.bool),
        "val":val.type(torch.bool)
    }
X_train_clone = torch.clone(X_train)
y_train_clone = torch.clone(y_train)
train_data = split(X_train_clone)

train_mask = train_data["train"]
val_mask = train_data["val"]

X_train = X_train_clone[train_mask]
X_val = X_train_clone[val_mask]
y_train = y_train_clone[train_mask]
y_val = y_train_clone[val_mask]

## Define model

In [5]:
import torch.nn.functional as F
from torch.nn import Linear
import torch
from dataAnalysis.Constants import FEATURES

class NeuralNetwork(torch.nn.Module):

    def __init__(self, hidden_dim = 128, out_channels = 1):
        super(NeuralNetwork, self).__init__()
        
        input_dim = len(FEATURES)
        self.lin_0 = Linear(input_dim, hidden_dim)
        self.lin_1 = Linear(input_dim, hidden_dim)
        self.lin_end_0 = Linear(hidden_dim, out_channels)
        self.lin_end_1 = Linear(hidden_dim, out_channels)
        self.batchnorm_1 = torch.nn.BatchNorm1d(hidden_dim)

    def forward(self, x):
        x = self.lin_0(x) + self.lin_1(x)
        x = F.normalize(x, p=2., dim=-1)
        x = torch.relu(x)
        x= self.batchnorm_1(x)
        x = self.lin_end_0(x) + self.lin_end_1(x) 
        return x
            
    def predict_proba(self, x):
        with torch.inference_mode():
            self.eval()
            logits = self.forward(x)
            scores = torch.sigmoid(torch.squeeze(logits))
            scores = torch.unsqueeze(scores, 0)
            proba_predict = torch.concat((1- scores, scores), dim = 0)
            return torch.transpose(proba_predict, 0, 1)
            
    def predict(self, x):
        return torch.round(self.predict_proba(x)[:, 1])

## Shift data to device

In [6]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

X_train = X_train.to(device)
y_train = y_train.to(device)

X_val = X_val.to(device)
y_val = y_val.to(device)

X_test = X_test.to(device)
y_test = y_test.to(device)

X_gw_test = X_gw_test.to(device)
y_gw_test = y_gw_test.to(device)

WEIGHT = torch.tensor([664])
WEIGHT = WEIGHT.to(device)

print("Data shifted to the device " + str(device))

Data shifted to the device cuda:0


## Model-Wrapper class

In [7]:
import torch 

class ModelWrapper():
    def __init__(self):
        self.LEARNING_RATE = 3e-4
        self.MAX_EPOCHS = 10000

        self.model = NeuralNetwork(hidden_dim = 128, out_channels=1) 
        self.model = self.model.to(device)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.LEARNING_RATE,betas=(0.9, 0.999), eps=1e-08)
        
        self.last_loss = 0
        self.increased_loss = 0
        self.BREAKING_THRESHOLD = 5      
    
    def validate(self):
        with torch.inference_mode():
            self.model.eval()
            out = self.model(X_val)
            loss = F.binary_cross_entropy_with_logits(torch.squeeze(out), y_val.type(torch.float32),
                                                      pos_weight=WEIGHT)
#             print(loss)
            if loss.item() > self.last_loss:
                self.increased_loss += 1
            else:
                self.increased_loss = 0
            self.last_loss = loss.item()

    def train(self):
        for epoch in range(self.MAX_EPOCHS):
#             print(epoch)
            self.model.train()
            self.optimizer.zero_grad()
            out = self.model(X_train)
            loss = F.binary_cross_entropy_with_logits(torch.squeeze(out), y_train.type(torch.float32),
                                                      pos_weight=WEIGHT)
            loss.backward()
            self.optimizer.step()
            self.validate() 

            if self.increased_loss >= self.BREAKING_THRESHOLD:
                print(f"Breaked at {str(epoch)}")
                break
            
    def get_model(self):
        return self.model    

In [8]:
modelWrapper = ModelWrapper()
modelWrapper.train()

0
tensor(1.3719, device='cuda:0')
1
tensor(1.3706, device='cuda:0')
2
tensor(1.3693, device='cuda:0')
3
tensor(1.3679, device='cuda:0')
4
tensor(1.3664, device='cuda:0')
5
tensor(1.3649, device='cuda:0')
6
tensor(1.3633, device='cuda:0')
7
tensor(1.3616, device='cuda:0')
8
tensor(1.3598, device='cuda:0')
9
tensor(1.3579, device='cuda:0')
10
tensor(1.3558, device='cuda:0')
11
tensor(1.3537, device='cuda:0')
12
tensor(1.3515, device='cuda:0')
13
tensor(1.3491, device='cuda:0')
14
tensor(1.3466, device='cuda:0')
15
tensor(1.3440, device='cuda:0')
16
tensor(1.3412, device='cuda:0')
17
tensor(1.3382, device='cuda:0')
18
tensor(1.3351, device='cuda:0')
19
tensor(1.3318, device='cuda:0')
20
tensor(1.3284, device='cuda:0')
21
tensor(1.3247, device='cuda:0')
22
tensor(1.3209, device='cuda:0')
23
tensor(1.3168, device='cuda:0')
24
tensor(1.3126, device='cuda:0')
25
tensor(1.3081, device='cuda:0')
26
tensor(1.3034, device='cuda:0')
27
tensor(1.2985, device='cuda:0')
28
tensor(1.2934, device='cuda

tensor(0.8926, device='cuda:0')
235
tensor(0.8923, device='cuda:0')
236
tensor(0.8919, device='cuda:0')
237
tensor(0.8916, device='cuda:0')
238
tensor(0.8913, device='cuda:0')
239
tensor(0.8909, device='cuda:0')
240
tensor(0.8906, device='cuda:0')
241
tensor(0.8903, device='cuda:0')
242
tensor(0.8900, device='cuda:0')
243
tensor(0.8897, device='cuda:0')
244
tensor(0.8894, device='cuda:0')
245
tensor(0.8891, device='cuda:0')
246
tensor(0.8887, device='cuda:0')
247
tensor(0.8884, device='cuda:0')
248
tensor(0.8881, device='cuda:0')
249
tensor(0.8878, device='cuda:0')
250
tensor(0.8876, device='cuda:0')
251
tensor(0.8873, device='cuda:0')
252
tensor(0.8870, device='cuda:0')
253
tensor(0.8867, device='cuda:0')
254
tensor(0.8864, device='cuda:0')
255
tensor(0.8862, device='cuda:0')
256
tensor(0.8859, device='cuda:0')
257
tensor(0.8857, device='cuda:0')
258
tensor(0.8854, device='cuda:0')
259
tensor(0.8852, device='cuda:0')
260
tensor(0.8849, device='cuda:0')
261
tensor(0.8847, device='cuda:

tensor(0.8529, device='cuda:0')
463
tensor(0.8528, device='cuda:0')
464
tensor(0.8526, device='cuda:0')
465
tensor(0.8525, device='cuda:0')
466
tensor(0.8524, device='cuda:0')
467
tensor(0.8523, device='cuda:0')
468
tensor(0.8522, device='cuda:0')
469
tensor(0.8521, device='cuda:0')
470
tensor(0.8520, device='cuda:0')
471
tensor(0.8519, device='cuda:0')
472
tensor(0.8518, device='cuda:0')
473
tensor(0.8517, device='cuda:0')
474
tensor(0.8516, device='cuda:0')
475
tensor(0.8515, device='cuda:0')
476
tensor(0.8515, device='cuda:0')
477
tensor(0.8514, device='cuda:0')
478
tensor(0.8513, device='cuda:0')
479
tensor(0.8512, device='cuda:0')
480
tensor(0.8511, device='cuda:0')
481
tensor(0.8510, device='cuda:0')
482
tensor(0.8510, device='cuda:0')
483
tensor(0.8509, device='cuda:0')
484
tensor(0.8508, device='cuda:0')
485
tensor(0.8507, device='cuda:0')
486
tensor(0.8506, device='cuda:0')
487
tensor(0.8505, device='cuda:0')
488
tensor(0.8504, device='cuda:0')
489
tensor(0.8503, device='cuda:

In [9]:
model = modelWrapper.get_model()

## Shift data and model back to CPU for evaluation

In [9]:
y_test = y_test.cpu()
X_test = X_test.cpu()

y_gw_test = y_gw_test.cpu()
X_gw_test = X_gw_test.cpu()
# model = model.cpu()

## Evaluation

In [10]:
from dataAnalysis.Metrics import Evaluation

evaluation = Evaluation(y_test, y_gw_test, X_test, X_gw_test)
# evaluation.plot_confusion_matrix(model)
# evaluation.get_df_metrics(model)

  warn(f"Failed to load image Python extension: {e}")


## Error evaluation

In [11]:
import time

dfs = []
times = []
for i in range(100):
    print(20*"%")
    start = time.time()
    modelWrapper = ModelWrapper()
    modelWrapper.train()
    print(time.time() - start)
    times.append(time.time() - start)
    
    model = modelWrapper.get_model().cpu()
    print(evaluation.get_df_metrics(model))
#     dfs.append(evaluation.get_df_metrics(model))

%%%%%%%%%%%%%%%%%%%%
0
tensor(1.3476, device='cuda:0')
1
tensor(1.3476, device='cuda:0')
2
tensor(1.3475, device='cuda:0')
3
tensor(1.3473, device='cuda:0')
4
tensor(1.3472, device='cuda:0')
5
tensor(1.3469, device='cuda:0')
6
tensor(1.3466, device='cuda:0')
7
tensor(1.3462, device='cuda:0')
8
tensor(1.3457, device='cuda:0')
9
tensor(1.3451, device='cuda:0')
10
tensor(1.3443, device='cuda:0')
11
tensor(1.3435, device='cuda:0')
12
tensor(1.3425, device='cuda:0')
13
tensor(1.3414, device='cuda:0')
14
tensor(1.3401, device='cuda:0')
15
tensor(1.3387, device='cuda:0')
16
tensor(1.3371, device='cuda:0')
17
tensor(1.3353, device='cuda:0')
18
tensor(1.3333, device='cuda:0')
19
tensor(1.3312, device='cuda:0')
20
tensor(1.3289, device='cuda:0')
21
tensor(1.3263, device='cuda:0')
22
tensor(1.3236, device='cuda:0')
23
tensor(1.3207, device='cuda:0')
24
tensor(1.3175, device='cuda:0')
25
tensor(1.3142, device='cuda:0')
26
tensor(1.3106, device='cuda:0')
27
tensor(1.3068, device='cuda:0')
28
tensor

tensor(0.8710, device='cuda:0')
235
tensor(0.8705, device='cuda:0')
236
tensor(0.8700, device='cuda:0')
237
tensor(0.8695, device='cuda:0')
238
tensor(0.8690, device='cuda:0')
239
tensor(0.8685, device='cuda:0')
240
tensor(0.8680, device='cuda:0')
241
tensor(0.8676, device='cuda:0')
242
tensor(0.8671, device='cuda:0')
243
tensor(0.8667, device='cuda:0')
244
tensor(0.8662, device='cuda:0')
245
tensor(0.8658, device='cuda:0')
246
tensor(0.8653, device='cuda:0')
247
tensor(0.8649, device='cuda:0')
248
tensor(0.8645, device='cuda:0')
249
tensor(0.8640, device='cuda:0')
250
tensor(0.8636, device='cuda:0')
251
tensor(0.8632, device='cuda:0')
252
tensor(0.8627, device='cuda:0')
253
tensor(0.8623, device='cuda:0')
254
tensor(0.8618, device='cuda:0')
255
tensor(0.8614, device='cuda:0')
256
tensor(0.8609, device='cuda:0')
257
tensor(0.8605, device='cuda:0')
258
tensor(0.8600, device='cuda:0')
259
tensor(0.8596, device='cuda:0')
260
tensor(0.8592, device='cuda:0')
261
tensor(0.8588, device='cuda:

KeyboardInterrupt: 

In [9]:
for t in times:
    print(t)

23.67472767829895
21.573588609695435
20.657383680343628
21.805586338043213
14.168943166732788
17.08205246925354
19.81443452835083
14.167516469955444
19.297861576080322
16.865254402160645
24.719831943511963
20.95818042755127
20.21818208694458
23.429773330688477
21.512087106704712
18.857051134109497
17.23066544532776
23.98099374771118
17.562803268432617
17.375792741775513
19.518230438232422
15.718163013458252
23.65390658378601
18.5971577167511
21.43760585784912
16.752075672149658
26.530201196670532
16.751776218414307
17.858837842941284
18.299582719802856
19.183961629867554
16.234987497329712
20.699447870254517
21.585558891296387
27.785765886306763
22.212035179138184
16.67612075805664
23.539618492126465
26.52658176422119
21.21462321281433
19.78019118309021
11.623957395553589
23.57751226425171
14.094248533248901
21.586645364761353
16.606101274490356
16.677392721176147
21.768888473510742
20.774094820022583
19.627937078475952
17.6001558303833
17.158462047576904
17.930248975753784
18.00478982

In [103]:
for df in dfs:
    print(df)

         NAME       MCC  F1-Macro     AUROC     AUPRC
0     Leipzig  0.053656  0.447750  0.885764  0.015580
1  Greifswald  0.039041  0.449935  0.817885  0.006255
         NAME       MCC  F1-Macro     AUROC    AUPRC
0     Leipzig  0.052468  0.444576  0.882996  0.01651
1  Greifswald  0.039676  0.447266  0.818936  0.00609
         NAME       MCC  F1-Macro     AUROC     AUPRC
0     Leipzig  0.051957  0.447225  0.880128  0.017834
1  Greifswald  0.038441  0.451215  0.814695  0.006421
         NAME       MCC  F1-Macro     AUROC    AUPRC
0     Leipzig  0.051493  0.446364  0.882117  0.01865
1  Greifswald  0.037578  0.448316  0.817028  0.00664
         NAME       MCC  F1-Macro     AUROC     AUPRC
0     Leipzig  0.050606  0.445684  0.878325  0.016797
1  Greifswald  0.038485  0.448404  0.810973  0.005983
         NAME       MCC  F1-Macro     AUROC     AUPRC
0     Leipzig  0.051770  0.448814  0.878872  0.018400
1  Greifswald  0.037534  0.451517  0.813145  0.007105
         NAME       MCC  F1-Macro 