In [16]:
%run default-imports.ipynb

In [17]:
def run_experiment(params):
    
    print("Running experiment with following parameters: ")
    print(params)
    
    filenames = {'MIMIC' : "~/cohorts/hs_mimic.csv", 'SINAI' : "~/cohorts/hs_sinai_preprocessed.csv"}    
    
    ''' first load and split data '''
    data = Load().execute(filename=filenames[params['cohort']])
    
    ''' keep only features that were explicitly chosen '''
    if params.get('features_to_select'):
        for feature in data.columns:
            if feature not in [params['target']] + params['features_to_select']:
                data.drop(feature, axis=1, inplace=True)
    else:
        params['features_to_select'] = list([column for column in data.columns if column != params['target']])
    
    train, test = Split().execute(data,test_size=params['test_size'])
    train, imputer = Impute().execute(train.copy(), imputation_method=params['imputation_method'])
    
    scaler=None
    if params.get('scaling_method'):
        train, scaler = Scale().execute(train.copy(), params['target'],transform_method=params['scaling_method'])
    
    if params.get('sampling_method'):
        train = Sample().execute(train.copy(), params['target'],sampling_method=params['sampling_method'])        
    
    runtime = {}
    
    crossval_metrics = {}
    with Timer() as t:
        ''' fit models '''
        if params.get('crossval'):
            models, crossval_metrics = Train().execute(train, target=params['target'], algorithms=[params['algorithm']], optimize=params['optimize_mode'], crossval=params.get('crossval'))
        else:
            models = Train().execute(train, target=params['target'], algorithms=[params['algorithm']], optimize=params['optimize_mode'])
    
    runtime['train'] = t.interval
    
    ''' evaluate models '''
    test, _ = Impute().execute(test.copy(), imputation_method=params['imputation_method'], imputer=imputer)    
    scaler = None
    if params.get('scale_method'):
        test,_ = Scale().execute(test.copy(), params['target'], scaler=scaler)
    
    results = Evaluate().execute(test, target=params['target'], models=models)
    
    ''' obtain performance metrics '''
    performance = {}
    performance['crossval_metrics'] = crossval_metrics[params['algorithm']]
    performance['discrimination'] = get_discrimination_metrics(**results[params['algorithm']])
    performance['calibration'] = get_calibration_metrics(results[params['algorithm']]['y_true'],results[params['algorithm']]['y_probs'])
    performance['clinical_usefulness'] = get_clinical_usefulness_metrics(performance['discrimination'])
    
    ''' save pipeline for later reproducibility '''
    pipeline = None
    if params.get('save_pipeline'):
        pipeline = {'model': models[params['algorithm']].clf, 'imputer' : imputer, 'scaler': scaler}

    ''' interpret explanations '''
    explanations = {}     
    weighted_explanations = {}
    feature_importances = None
       
    with Timer() as t:
        if params.get('explainers'):
            explanations = Explain().execute(train, models=models, target=params['target'], explainers=params['explainers'], exp_kwargs={'test':test, 'sample_size':200, 'num_features': params.get('num_features'), 'num_exps_desired':20})
            weighted_explanations = get_weighted_explanations(explanations[params['algorithm']])
            feature_importances = list(sorted([(k,v) for k,v in explanations[params['algorithm']]['FeatContribExplainer'].items()], key=lambda x: x[1]))            
            
    runtime['explain'] = t.interval
    
    params['datetime'] = datetime.now()
    
    ''' summarize results '''
    experiment = {'parameters' : params,
                  'pipeline': pipeline,
                  'performance' : performance,
                  'explanations' : explanations, 
                  'weighted_explanations' : weighted_explanations,
                  'feature_importances' : feature_importances,
                  'runtime' : runtime}
    
    return experiment 
    
    