In [None]:
%run default-imports.ipynb

In [None]:
%run run-experiment-cv.ipynb

In [None]:
filenames = {'MIMIC' : "~/cohorts/hs_mimic.csv", 'SINAI' : "~/cohorts/hs_sinai_preprocessed.csv", 'DHZB' : "~/cohorts/hs_dhzb.csv"}
save_path = './experiments/mimic_model_results_calibration.d'

In [None]:
''' define options to run experiments on '''
options = {
    'target': ["AKI"],
    'cohort': ['MIMIC'],
    'test_size': [0.2],
    'imputation_method':[imputers.DEFAULT],    
    'algorithm': [
#        algorithms.LR,
#        algorithms.DT,
#        algorithms.ADABOOST,
#        algorithms.ENLR,
#        algorithms.GNBAYES,
        algorithms.GBDT,
        algorithms.RF
    ],
    'sampling_method': [samplers.SMOTE],
    'sampling_strategy': [0.25],
    'scale_method': [None],
    'calibration_method':['isotonic','sigmoid'],
    'save_pipeline':[False],
    'optimize_mode': [False],
    'n_splits':[10]
}

experiments = unpickle(save_path) or {}
n_experiments = 0

with Timer() as t:
    
    ''' iterate over different options '''
    for combination in product(*options.values()):

        ''' initialize parameters '''
        params = dict(zip(options.keys(), combination))
        print(f"Running experiment with following parameters: {params}")
        exp_id = str(uuid.uuid1())
        experiment = defaultdict(lambda: {})        

        ''' load the data '''
        data = Load().execute(filename=filenames[params['cohort']])
        
        ''' split the data '''
        train, test = Split().execute(data,test_size=params['test_size'])        
        
        ''' train model on subset of data '''
        experiment = run_experiment(params, train, test)
                
        ''' after training, our data for subsequent steps becomes train for computing CV statistics '''
        data = train.copy()
        
        features, labels = data.drop(params['target'], axis=1), data[params['target']]                
        
        ''' initialize metric storage '''
        metrics = ['precision', 'recall', 'f1-score', 'auc', 'dor']
        cv_experiments = []
        cv_metrics = defaultdict(lambda: {})
        cv_performance = defaultdict(lambda: {})
        
        k_params = params
        del k_params['calibration_method'] #no need to calibrate within the k-folds
        
        ''' for each of the k-folds '''
        skf = StratifiedKFold(n_splits=params['n_splits'], random_state=None, shuffle=False)        
        for train_index, test_index in skf.split(features, labels):
            train_data = data.iloc[train_index]
            test_data = data.iloc[test_index]
            cv_experiments.append(run_experiment(k_params, train_data, test_data))

        ''' summarize results of crossvalidation '''        
        for metric in metrics:                       
            measurements = [exp['performance']['discrimination'][metric] for exp in cv_experiments]
            cv_performance[metric]['mean'] = np.mean(measurements)
            cv_performance[metric]['std'] = np.std(measurements)
            cv_performance[metric]['ci'] = np.std(measurements) * 2 #95% CI
        
        ''' save everything '''
        experiment['parameters'] = params
        experiment['performance']['cv'] = cv_performance
        experiment['exp_id'] = exp_id
        
        experiments[exp_id] = experiment
        
        n_experiments += 1

print(f'Running {n_experiments} experiments took {t.interval:.03f} sec.')

''' store everything '''
if pickle(experiments, save_path): print('Successfully saved.')