In [1]:
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression


from collections import Counter
import pickle
import joblib

import data_splitter as ds
import data_loader as dl
import feature_extraction as fex
import run_model as rm

2023-04-15 00:17:49.373172: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
%load_ext autoreload
%autoreload 2

# Hyperparameter search
Using 25 % of the training set and the PCA model trained on fold 0 as feature extractor

## Image resolution 256 x 256

In [51]:
X = np.load('sample_flatten_X.npy')
y = np.load('sample_flatten_y.npy')

In [7]:
Counter(y)

Counter({0.0: 8570, 1.0: 181})

In [8]:
nmf = pickle.load(open('../feature_extractors/NMF_26_42_256_0.pkl', 'rb'))

logit = LogisticRegression(
    warm_start = True,
    solver = 'saga',
    max_iter = 10000,
    l1_ratio = 0.5,
    random_state = 42
    )

pipeline = Pipeline([
    ('feature_extraction', nmf),
    ('classify', logit)
])

           
param_grid = {
    'classify__C': np.logspace(0, -4, num=5, base=10, endpoint=True),
    'classify__penalty': ['l1', 'l2', 'elasticnet'],
    'classify__tol': np.logspace(0, -4, num=5, base=10, endpoint=True)
}

In [10]:
grid = rm.hp_search(pipeline, param_grid, random_state=42, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate42_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'l2', 'classify__C': 0.01}
Best average precision score:  0.03382606654759592


['models/nmf_logit_hp_search_256_randomstate42_minres800.pkl']

In [11]:
grid = rm.hp_search(pipeline, param_grid, random_state=42, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate42_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits




Best hyperparameters:  {'classify__tol': 0.0001, 'classify__penalty': 'l1', 'classify__C': 1.0}
Best average precision score:  0.02755836290144546


['models/nmf_logit_hp_search_256_randomstate42_minres1600.pkl']

In [12]:
grid = rm.hp_search(pipeline, param_grid, random_state=0, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate0_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.03218810258899793


['models/nmf_logit_hp_search_256_randomstate0_minres800.pkl']

In [13]:
grid = rm.hp_search(pipeline, param_grid, random_state=0, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate0_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.028430576364813755


['models/nmf_logit_hp_search_256_randomstate0_minres1600.pkl']

In [14]:
grid = rm.hp_search(pipeline, param_grid, random_state=101, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate101_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 0.0001, 'classify__penalty': 'l2', 'classify__C': 0.0001}
Best average precision score:  0.03956783609070953


['models/nmf_logit_hp_search_256_randomstate101_minres800.pkl']

In [15]:
grid = rm.hp_search(pipeline, param_grid, random_state=101, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate101_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits




Best hyperparameters:  {'classify__tol': 0.0001, 'classify__penalty': 'l1', 'classify__C': 1.0}
Best average precision score:  0.026296751079033898


['models/nmf_logit_hp_search_256_randomstate101_minres1600.pkl']

In [16]:
grid = rm.hp_search(pipeline, param_grid, random_state=300, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate300_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'l2', 'classify__C': 0.01}
Best average precision score:  0.03270777931233879


['models/nmf_logit_hp_search_256_randomstate300_minres800.pkl']

In [17]:
grid = rm.hp_search(pipeline, param_grid, random_state=300, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate300_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'l2', 'classify__C': 0.01}
Best average precision score:  0.027100621556824828


['models/nmf_logit_hp_search_256_randomstate300_minres1600.pkl']

In [18]:
grid = rm.hp_search(pipeline, param_grid, random_state=88, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate88_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.026999222229248897


['models/nmf_logit_hp_search_256_randomstate88_minres800.pkl']

In [9]:
grid = rm.hp_search(pipeline, param_grid, random_state=88, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate88_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'l1', 'classify__C': 1.0}
Best average precision score:  0.026610921702653213


['models/nmf_logit_hp_search_256_randomstate88_minres1600.pkl']

In [10]:
grid = rm.hp_search(pipeline, param_grid, random_state=555, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate555_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.028779218087969967


['models/nmf_logit_hp_search_256_randomstate555_minres800.pkl']

In [11]:
grid = rm.hp_search(pipeline, param_grid, random_state=555, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate555_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.026549142177463597


['models/nmf_logit_hp_search_256_randomstate555_minres1600.pkl']

In [12]:
grid = rm.hp_search(pipeline, param_grid, random_state=699, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate699_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.03321318102604728


['models/nmf_logit_hp_search_256_randomstate699_minres800.pkl']

In [8]:
grid = rm.hp_search(pipeline, param_grid, random_state=699, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate699_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'l1', 'classify__C': 1.0}
Best average precision score:  0.02715812662718258


['models/nmf_logit_hp_search_256_randomstate699_minres1600.pkl']

In [9]:
grid = rm.hp_search(pipeline, param_grid, random_state=123, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate123_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.027654625587905952


['models/nmf_logit_hp_search_256_randomstate123_minres800.pkl']

In [10]:
grid = rm.hp_search(pipeline, param_grid, random_state=123, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate123_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'l2', 'classify__C': 0.01}
Best average precision score:  0.0221005536057585


['models/nmf_logit_hp_search_256_randomstate123_minres1600.pkl']

In [11]:
grid = rm.hp_search(pipeline, param_grid, random_state=321, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate321_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'l2', 'classify__C': 0.01}
Best average precision score:  0.03845321922425294


['models/nmf_logit_hp_search_256_randomstate321_minres800.pkl']

In [12]:
grid = rm.hp_search(pipeline, param_grid, random_state=321, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate321_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.027115246289744065


['models/nmf_logit_hp_search_256_randomstate321_minres1600.pkl']

In [13]:
grid = rm.hp_search(pipeline, param_grid, random_state=18, min_resources=800, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate18_minres800.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 800
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 10
n_resources: 800
Fitting 5 folds for each of 10 candidates, totalling 50 fits




----------
iter: 1
n_candidates: 2
n_resources: 4000
Fitting 5 folds for each of 2 candidates, totalling 10 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'elasticnet', 'classify__C': 1.0}
Best average precision score:  0.023245152277711425


['models/nmf_logit_hp_search_256_randomstate18_minres800.pkl']

In [14]:
grid = rm.hp_search(pipeline, param_grid, random_state=18, min_resources=1600, verbose=1)

#save your model or results
joblib.dump(grid, '../hyperparameter_search/nmf_logit_hp_search_256_randomstate18_minres1600.pkl')

n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 1600
max_resources_: 8751
aggressive_elimination: True
factor: 5
----------
iter: 0
n_candidates: 5
n_resources: 1600
Fitting 5 folds for each of 5 candidates, totalling 25 fits




----------
iter: 1
n_candidates: 1
n_resources: 8000
Fitting 5 folds for each of 1 candidates, totalling 5 fits




Best hyperparameters:  {'classify__tol': 1.0, 'classify__penalty': 'l1', 'classify__C': 1.0}
Best average precision score:  0.02883886406403981


['models/nmf_logit_hp_search_256_randomstate18_minres1600.pkl']

# Training Logistic regression
Using 5-fold cross validation

Evaluation metrics:
- **Average precision** the main metric (summarizes a precision-recall curve (PR AUC) as the weighted mean of precisions achieved at each threshold, with the increase in recall from the previous threshold used as the weight)
- F1-score
- Confustion matrix (true positives/false positives/true negatives/false negatives)
- ROC AUC
- Accuracy
- Precision
- Recall
- Brier score loss


In [3]:
metadata = pd.read_csv('train.csv')
len(metadata.image_id.unique())

54706

In [4]:
splitter = ds.DataSplitter(verbose=True)

Total patient_id in training set:  9530
Total patient_id in test set:  2383
Total image_id in training set:  43767
Total image_id in test set:  10939
Total patient_id in training set:  7624
Total patient_id in calibration set:  1906
Total image_id in training set:  35003
Total image_id in calibration set:  8764


In [5]:
# image id of all sample training set after split to calib and train set

train_img_ids = [id for k, v in splitter.trainset.items() for id in v]
print(len(train_img_ids))

35003


In [6]:
# splitter.trainset is the training set in format of {patient_id: [image_id, ...]}
train_ids = splitter.trainset.keys()
label_by_img = splitter.labels

# getting cross-validation folds from training set
RANDOM_STATE = 42
cv_patient, cv_img = splitter.get_cv(n_splits=5, random_state=RANDOM_STATE)

Splitting training set into 5 stratified k-folds...
--Fold:  0
----Total patient_id in training set:  6099
----Total image_id in training set:  28031
----Total patient_id in validation set:  1525
----Total image_id in validation set:  6972
--Fold:  1
----Total patient_id in training set:  6099
----Total image_id in training set:  28016
----Total patient_id in validation set:  1525
----Total image_id in validation set:  6987
--Fold:  2
----Total patient_id in training set:  6099
----Total image_id in training set:  27937
----Total patient_id in validation set:  1525
----Total image_id in validation set:  7066
--Fold:  3
----Total patient_id in training set:  6099
----Total image_id in training set:  28025
----Total patient_id in validation set:  1525
----Total image_id in validation set:  6978
--Fold:  4
----Total patient_id in training set:  6100
----Total image_id in training set:  28003
----Total patient_id in validation set:  1524
----Total image_id in validation set:  7000


## Image resolution 256 x 256

**Best hyperparameters**: 
- C = 1
- penalty = elasticnet
- tol = 1

**Constant hyperparameters**:
- warm_start = True,
- solver = 'saga',
- max_iter = 10000,
- l1_ratio = 0.5,
- random_state = 42

In [10]:
BATCH_SIZE = 488 # losing as less samples as possible
IMG_SIZE = (256, 256, 1)

BASEPATH = '' # directory of images preprocessed as numpy arrays
EXTRACTOR_PATH = '../feature_extractors' # directory storing the feature extractor
SAVEPATH = '../predictive_models' #directory to save the trained models

RANDOM_STATE = 42
HP = {'C': 1, 'penalty': 'elasticnet', 'tol': 1, 'class_weight': None}


nmf_logit_512 = rm.run_cv_training(
    
        model_name = 'logit',
        model_params = HP,
    
        cv_img = cv_img,
        label_img_dict = splitter.labels,
        patient_img_dict = splitter.trainset,
        
        from_numpy = True,
        img_basepath = BASEPATH,
        batch_size = BATCH_SIZE, 
        img_size = IMG_SIZE,
        shuffle = True,
        normalize = (0, 1),

        feature_extractor_name = 'NMF',
        CNN_preprocess=None,
        n_components = 26,
        extractor_path = EXTRACTOR_PATH,
        savepath = SAVEPATH,
        random_state = RANDOM_STATE,
 
        verbose = 1,
        return_model = False
        )

Fold:  0
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.021667630085779266, 'std': 0.010736457852631233}, 'brier_loss': {'mean': 0.020401348617941756, 'std': 0.006172635235136042}, 'roc_auc': {'mean': 0.39907377165581764, 'std': 0.10891700421073294}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.50773360491468}, 'accuracy': {'mean': 0.9792154566744731, 'std': 0.006380121404479906}}
---Saving model...
Fold:  1
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch  33
------Training batch  34
------Train

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.02273620739525587, 'std': 0.007491900886106375}, 'brier_loss': {'mean': 0.021500180448096522, 'std': 0.004752503206677282}, 'roc_auc': {'mean': 0.43313495032104793, 'std': 0.08742923973232213}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.30056363432476}, 'accuracy': {'mean': 0.9784836065573772, 'std': 0.005181168038766934}}
---Saving model...
Fold:  2
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch  33
------Training batch  34
------Train

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.03175287946721929, 'std': 0.02021357047327966}, 'brier_loss': {'mean': 0.01957904150319677, 'std': 0.006027228810294074}, 'roc_auc': {'mean': 0.4268828727608324, 'std': 0.12166587889661801}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.75123676073355}, 'accuracy': {'mean': 0.9800936768149884, 'std': 0.006264910585196917}}
---Saving model...
Fold:  3
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch  33
------Training batch  34
------Train

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.03046643195473709, 'std': 0.014357433548758153}, 'brier_loss': {'mean': 0.01978518115060916, 'std': 0.005018088796958197}, 'roc_auc': {'mean': 0.5481745945278449, 'std': 0.07426700860501846}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.66651230989035}, 'accuracy': {'mean': 0.9798009367681498, 'std': 0.005244869106314082}}
---Saving model...
Fold:  4
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model


  _warn_prf(average, modifier, msg_start, len(result))


------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch  33
------Training batch  34
------Training batch  35
------Training batch  36
------Training batch  37
------Training batch  38
------Training batch  39
------Trai

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.03567917723030305, 'std': 0.011048282998970886}, 'brier_loss': {'mean': 0.021245932873264227, 'std': 0.005814569881677547}, 'roc_auc': {'mean': 0.5681553691257266, 'std': 0.07676464068542015}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.2625496424379}, 'accuracy': {'mean': 0.9783372365339579, 'std': 0.005985084397031871}}
---Saving model...


  _warn_prf(average, modifier, msg_start, len(result))


## Image Resolution 512 x 512

In [None]:
BATCH_SIZE = 488 # losing as less samples as possible
IMG_SIZE = (512, 512, 1)

BASEPATH = '' # directory of images preprocessed as numpy arrays
EXTRACTOR_PATH = '../feature_extractors' # directory storing the feature extractor
SAVEPATH = '../predictive_models' #directory to save the trained models

RANDOM_STATE = 42
HP = {'C': 1, 'penalty': 'elasticnet', 'tol': 1, 'class_weight': None}


nmf_logit_512 = rm.run_cv_training(
    
        model_name = 'logit',
        model_params = HP,
    
        cv_img = cv_img,
        label_img_dict = splitter.labels,
        patient_img_dict = splitter.trainset,
        
        from_numpy = True,
        img_basepath = BASEPATH,
        batch_size = BATCH_SIZE, 
        img_size = IMG_SIZE,
        shuffle = True,
        normalize = (0, 1),

        feature_extractor_name = 'NMF',
        CNN_preprocess=None,
        n_components = 26,
        extractor_path = EXTRACTOR_PATH,
        savepath = SAVEPATH,
        random_state = RANDOM_STATE,
 
        verbose = 1,
        return_model = False
        )

Fold:  0
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.03894947230714062, 'std': 0.013906266673603585}, 'brier_loss': {'mean': 0.020582287677038187, 'std': 0.003970701250125691}, 'roc_auc': {'mean': 0.6071675678731193, 'std': 0.07978817423307522}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.50060827160584}, 'accuracy': {'mean': 0.9792154566744731, 'std': 0.004013849297658702}}
---Saving model...
Fold:  1
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch  33
------Training batch  34
------Train

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
BATCH_SIZE = 488 # losing as less samples as possible
IMG_SIZE = (512, 512, 1)

BASEPATH = '' # directory of images preprocessed as numpy arrays
EXTRACTOR_PATH = '../feature_extractors' # directory storing the feature extractor
SAVEPATH = '../predictive_models' #directory to save the trained models

RANDOM_STATE = 42
HP = {'C': 1, 'penalty': 'elasticnet', 'tol': 1, 'class_weight': None}


nmf_logit_512 = rm.run_cv_training(
    
        model_name = 'logit',
        model_params = HP,
    
        cv_img = cv_img,
        label_img_dict = splitter.labels,
        patient_img_dict = splitter.trainset,
        
        from_numpy = True,
        img_basepath = BASEPATH,
        batch_size = BATCH_SIZE, 
        img_size = IMG_SIZE,
        shuffle = True,
        normalize = (0, 1),

        feature_extractor_name = 'NMF',
        CNN_preprocess=None,
        n_components = 26,
        extractor_path = EXTRACTOR_PATH,
        savepath = SAVEPATH,
        random_state = RANDOM_STATE,
 
        verbose = 1,
        return_model = False
        )

Fold:  0
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_512_0.pkl
Fold:  1
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_512_1.pkl
Fold:  2
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
-

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.025347789166085488, 'std': 0.016913123073958904}, 'brier_loss': {'mean': 0.02015488405657302, 'std': 0.007813384747079287}, 'roc_auc': {'mean': 0.43655911869561076, 'std': 0.08613030426651655}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.6367289316631}, 'accuracy': {'mean': 0.9796545667447308, 'std': 0.008066303122631659}}
---Saving model...
Fold:  3
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14


In [7]:
BATCH_SIZE = 488 # losing as less samples as possible
IMG_SIZE = (512, 512, 1)

BASEPATH = '' # directory of images preprocessed as numpy arrays
EXTRACTOR_PATH = '../feature_extractors' # directory storing the feature extractor
SAVEPATH = '../predictive_models' #directory to save the trained models

RANDOM_STATE = 42
HP = {'C': 1, 'penalty': 'elasticnet', 'tol': 1, 'class_weight': None}


nmf_logit_512 = rm.run_cv_training(
    
        model_name = 'logit',
        model_params = HP,
    
        cv_img = cv_img,
        label_img_dict = splitter.labels,
        patient_img_dict = splitter.trainset,
        
        from_numpy = True,
        img_basepath = BASEPATH,
        batch_size = BATCH_SIZE, 
        img_size = IMG_SIZE,
        shuffle = True,
        normalize = (0, 1),

        feature_extractor_name = 'NMF',
        CNN_preprocess=None,
        n_components = 26,
        extractor_path = EXTRACTOR_PATH,
        savepath = SAVEPATH,
        random_state = RANDOM_STATE,
 
        verbose = 1,
        return_model = False
        )

Fold:  0
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_512_0.pkl
Fold:  1
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_512_1.pkl
Fold:  2
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_512_2.pkl
Fold:  3
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
----

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.04032478611675145, 'std': 0.019825158807225476}, 'brier_loss': {'mean': 0.019781098012554475, 'std': 0.007757666603052304}, 'roc_auc': {'mean': 0.5497896098223, 'std': 0.1517866137916864}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.677625701693}, 'accuracy': {'mean': 0.9798009367681499, 'std': 0.00811793010764756}}
---Saving model...
Fold:  4
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch  33
------Training batch  34
------Train

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.03590749795048946, 'std': 0.016774013242173785}, 'brier_loss': {'mean': 0.021390596475990873, 'std': 0.00613341738925443}, 'roc_auc': {'mean': 0.5625333423407778, 'std': 0.10396508195756543}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.26341960654224}, 'accuracy': {'mean': 0.9783372365339579, 'std': 0.006230619629330455}}
---Saving model...


  _warn_prf(average, modifier, msg_start, len(result))


## Image Resolution 1024 x 1024

In [None]:
BATCH_SIZE = 488 # losing as less samples as possible
IMG_SIZE = (1024, 1024, 1)

BASEPATH = '' # directory of images preprocessed as numpy arrays
EXTRACTOR_PATH = '../feature_extractors' # directory storing the feature extractor
SAVEPATH = '../predictive_models' #directory to save the trained models

RANDOM_STATE = 42
HP = {'C': 1, 'penalty': 'elasticnet', 'tol': 1, 'class_weight': None}


nmf_logit_512 = rm.run_cv_training(
    
        model_name = 'logit',
        model_params = HP,
    
        cv_img = cv_img,
        label_img_dict = splitter.labels,
        patient_img_dict = splitter.trainset,
        
        from_numpy = True,
        img_basepath = BASEPATH,
        batch_size = BATCH_SIZE, 
        img_size = IMG_SIZE,
        shuffle = True,
        normalize = (0, 1),

        feature_extractor_name = 'NMF',
        CNN_preprocess=None,
        n_components = 26,
        extractor_path = EXTRACTOR_PATH,
        savepath = SAVEPATH,
        random_state = RANDOM_STATE,
 
        verbose = 1,
        return_model = False
        )

Fold:  0
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.021504307285545025, 'std': 0.005301933176969506}, 'brier_loss': {'mean': 0.02024940483489433, 'std': 0.004308736595352871}, 'roc_auc': {'mean': 0.42494685485786043, 'std': 0.051548381377138144}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.54240090898173}, 'accuracy': {'mean': 0.9793618266978924, 'std': 0.004480453122076693}}
---Saving model...
Fold:  1
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch  33
------Training batch  34
------Train

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
BATCH_SIZE = 488 # losing as less samples as possible
IMG_SIZE = (1024, 1024, 1)

BASEPATH = '' # directory of images preprocessed as numpy arrays
EXTRACTOR_PATH = '../feature_extractors' # directory storing the feature extractor
SAVEPATH = '../predictive_models' #directory to save the trained models

RANDOM_STATE = 42
HP = {'C': 1, 'penalty': 'elasticnet', 'tol': 1, 'class_weight': None}


nmf_logit_512 = rm.run_cv_training(
    
        model_name = 'logit',
        model_params = HP,
    
        cv_img = cv_img,
        label_img_dict = splitter.labels,
        patient_img_dict = splitter.trainset,
        
        from_numpy = True,
        img_basepath = BASEPATH,
        batch_size = BATCH_SIZE, 
        img_size = IMG_SIZE,
        shuffle = True,
        normalize = (0, 1),

        feature_extractor_name = 'NMF',
        CNN_preprocess=None,
        n_components = 26,
        extractor_path = EXTRACTOR_PATH,
        savepath = SAVEPATH,
        random_state = RANDOM_STATE,
 
        verbose = 1,
        return_model = False
        )

Fold:  0
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_1024_0.pkl
Fold:  1
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.023702716592942007, 'std': 0.00870258642377039}, 'brier_loss': {'mean': 0.021083761717126396, 'std': 0.0072091283215981955}, 'roc_auc': {'mean': 0.4429084002285742, 'std': 0.10326355330808594}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.30908754502667}, 'accuracy': {'mean': 0.9784836065573768, 'std': 0.007499232117944666}}
---Saving model...
Fold:  2
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31
------Training batch  32
------Training batch  33
------Training batch  34
------Train

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.02351405035159504, 'std': 0.011395395275484136}, 'brier_loss': {'mean': 0.019665662863566524, 'std': 0.004547450478666455}, 'roc_auc': {'mean': 0.436558437738127, 'std': 0.0876142254302138}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.70566698215057}, 'accuracy': {'mean': 0.9799473067915692, 'std': 0.004722560247336556}}
---Saving model...
Fold:  3
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
------Training batch  19
------Training batch  20
------Training batch  21
------Training batch  22
------Training batch  23
------Training batch  24
------Training batch  25
------Training batch  26
------Training batch  27
------Training batch  28
------Training batch  29
------Training batch  30
------Training batch  31


In [None]:
BATCH_SIZE = 488 # losing as less samples as possible
IMG_SIZE = (1024, 1024, 1)

BASEPATH = '' # directory of images preprocessed as numpy arrays
EXTRACTOR_PATH = '../feature_extractors' # directory storing the feature extractor
SAVEPATH = '../predictive_models' #directory to save the trained models

RANDOM_STATE = 42
HP = {'C': 1, 'penalty': 'elasticnet', 'tol': 1, 'class_weight': None}


nmf_logit_512 = rm.run_cv_training(
    
        model_name = 'logit',
        model_params = HP,
    
        cv_img = cv_img,
        label_img_dict = splitter.labels,
        patient_img_dict = splitter.trainset,
        
        from_numpy = True,
        img_basepath = BASEPATH,
        batch_size = BATCH_SIZE, 
        img_size = IMG_SIZE,
        shuffle = True,
        normalize = (0, 1),

        feature_extractor_name = 'NMF',
        CNN_preprocess=None,
        n_components = 26,
        extractor_path = EXTRACTOR_PATH,
        savepath = SAVEPATH,
        random_state = RANDOM_STATE,
 
        verbose = 1,
        return_model = False
        )

Fold:  0
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_1024_0.pkl
Fold:  1
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_1024_1.pkl
Fold:  2
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_1024_2.pkl
Fold:  3
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
------Training batch  14
------Training batch  15
------Training batch  16
------Training batch  17
------Training batch  18
-

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.027940375236861114, 'std': 0.013479122418815272}, 'brier_loss': {'mean': 0.019994700578916554, 'std': 0.00513330998417368}, 'roc_auc': {'mean': 0.5022200614484407, 'std': 0.1123097002528762}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.6261343867971}, 'accuracy': {'mean': 0.9796545667447306, 'std': 0.005335985402095483}}
---Saving model...
Fold:  4
---Loading feature extractor...


  _warn_prf(average, modifier, msg_start, len(result))


---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2


In [7]:
BATCH_SIZE = 488 # losing as less samples as possible
IMG_SIZE = (1024, 1024, 1)

BASEPATH = '' # directory of images preprocessed as numpy arrays
EXTRACTOR_PATH = '../feature_extractors' # directory storing the feature extractor
SAVEPATH = '../predictive_models' #directory to save the trained models

RANDOM_STATE = 42
HP = {'C': 1, 'penalty': 'elasticnet', 'tol': 1, 'class_weight': None}


nmf_logit_512 = rm.run_cv_training(
    
        model_name = 'logit',
        model_params = HP,
    
        cv_img = cv_img,
        label_img_dict = splitter.labels,
        patient_img_dict = splitter.trainset,
        
        from_numpy = True,
        img_basepath = BASEPATH,
        batch_size = BATCH_SIZE, 
        img_size = IMG_SIZE,
        shuffle = True,
        normalize = (0, 1),

        feature_extractor_name = 'NMF',
        CNN_preprocess=None,
        n_components = 26,
        extractor_path = EXTRACTOR_PATH,
        savepath = SAVEPATH,
        random_state = RANDOM_STATE,
 
        verbose = 1,
        return_model = False
        )

Fold:  0
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_1024_0.pkl
Fold:  1
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_1024_1.pkl
Fold:  2
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_1024_2.pkl
Fold:  3
logit model trained on this cross-validation fold already exist
--> /home/irinb/models/NMF_26_logit_42_1024_3.pkl
Fold:  4
---Loading feature extractor...
---Creating training and validation data generators...
---Initializing logistic regression model...
---Start training model
------Training batch  0
------Training batch  1
------Training batch  2
------Training batch  3
------Training batch  4
------Training batch  5
------Training batch  6
------Training batch  7
------Training batch  8
------Training batch  9
------Training batch  10
------Training batch  11
------Training batch  12
------Training batch  13
---

  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  1
------Calculating scores of batch  1


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  2
------Calculating scores of batch  2


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  3
------Calculating scores of batch  3


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  4
------Calculating scores of batch  4


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  5
------Calculating scores of batch  5


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  6
------Calculating scores of batch  6


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  7
------Calculating scores of batch  7


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  8
------Calculating scores of batch  8


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  9
------Calculating scores of batch  9


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  10
------Calculating scores of batch  10


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  11
------Calculating scores of batch  11


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  12
------Calculating scores of batch  12


  _warn_prf(average, modifier, msg_start, len(result))


------Getting predictions for batch  13
------Calculating scores of batch  13
>>> Finish training model!
>>> Mean scores over batches: 
{'pr_auc': {'mean': 0.027077405226249764, 'std': 0.011171722270626008}, 'brier_loss': {'mean': 0.020804857840321413, 'std': 0.005183450861480266}, 'roc_auc': {'mean': 0.5035043297745914, 'std': 0.056418229607374255}, 'f1-score': {'mean': 0.0, 'std': 0.0}, 'recall': {'mean': 0.0, 'std': 0.0}, 'precision': {'mean': 0.0, 'std': 0.0}, 'confusion_matrix': {'mean': 122.0, 'std': 205.38265680014396}, 'accuracy': {'mean': 0.9787763466042154, 'std': 0.0054551068950773555}}
---Saving model...


  _warn_prf(average, modifier, msg_start, len(result))
