In [6]:
%run default-imports.ipynb

In [7]:
def run_experiment(params, train, test):
           
    ''' keep only features that were explicitly chosen or all features '''
    ''' if features_to_select contains a feature not in the dataset, we retain the ones that do exist '''
    if params.get('features_to_select'):        
        features_to_select = [params['target']] + params['features_to_select']
        train = train[list(set(features_to_select) & set(list(train.columns)) )]
        test = test[list(set(features_to_select) & set(list(test.columns)))]
    
    params['model_features'] = list(train.columns)
    
    train, imputer = Impute().execute(train.copy(), imputation_method=params['imputation_method'])
    
    scaler=None
    if params.get('scaling_method'):
        train, scaler = Scale().execute(train.copy(), params['target'],transform_method=params['scaling_method'])
    
    if params.get('sampling_method'):
        train = Sample().execute(train.copy(), params['target'],sampling_method=params['sampling_method'], sampling_strategy=params.get('sampling_strategy'))        
    
    runtime = {}
    
    crossval_metrics = {}
    with Timer() as t:
        ''' fit models '''
        if params.get('crossval'):
            models, crossval_metrics = Train().execute(train, target=params['target'], algorithms=[params['algorithm']], optimize=params['optimize_mode'], crossval=params.get('crossval'))
        else:                    
            models = Train().execute(train, target=params['target'], algorithms=[params['algorithm']], optimize=params['optimize_mode'])
    
    runtime['train'] = t.interval    
    
    ''' evaluate models '''
    test, _ = Impute().execute(test.copy(), imputer=imputer)    
    
    if params.get('scale_method'):
        test,_ = Scale().execute(test.copy(), params['target'], scaler=scaler)
    
    results = Evaluate().execute(test.copy(), target=params['target'], models=models)
    
    calibrated_results = {}
    if params.get('calibration_method'):
        calibration_method = params.get('calibration_method')
        calibrated_models = Calibrate().execute(train.copy(), params['target'], models=models, calibration_method=calibration_method)
        calibrated_results = Evaluate().execute(test.copy(), params['target'], models=calibrated_models)
    
    ''' obtain performance metrics '''
    performance = {}
    performance['crossval_metrics'] = crossval_metrics.get(params['algorithm'])
    performance['discrimination'] = get_discrimination_metrics(**results[params['algorithm']])
    performance['calibration'] = get_calibration_metrics(results[params['algorithm']]['y_true'],results[params['algorithm']]['y_probs'])
    performance['clinical_usefulness'] = get_clinical_usefulness_metrics(performance['discrimination'])
    
    ''' save pipeline for later reproducibility '''
    pipeline = None
    if params.get('save_pipeline'):
        pipeline = {'model': models[params['algorithm']], 'imputer' : imputer, 'scaler': scaler}

    ''' interpret explanations '''
    explanations = {}     
    weighted_explanations = {}
    feature_importances = None
       
    with Timer() as t:
        if params.get('explainers'):
            explanations = Explain().execute(train, models=models, target=params['target'], explainers=params['explainers'], exp_kwargs={'test':test, 'sample_size':200, 'mimic': params.get('mimic'), 'num_features': params.get('num_features'),'num_exps_desired' : params.get('num_exps_desired')})
            explanations = {k : dict(v) for k, v in explanations.items()} #need to remove lambda for pickling
            weighted_explanations = get_weighted_explanations(explanations[params['algorithm']])
            feature_importances = list(sorted([(k,v) for k,v in explanations[params['algorithm']]['FeatContribExplainer'].items()], key=lambda x: x[1]))            
            
    runtime['explain'] = t.interval    
    params['datetime'] = datetime.now()
    
    ''' summarize results '''
    experiment = {'parameters' : params,
                  'pipeline': pipeline,
                  'results': results[params['algorithm']],
                  'calibrated_results': calibrated_results.get(params['algorithm']),
                  'performance' : performance,
                  'explanations' : explanations, 
                  'weighted_explanations' : weighted_explanations,
                  'feature_importances' : feature_importances,
                  'runtime' : runtime}
    
    return experiment 
    
    