# Step 2 - Evaluate Modeling Approaches on Validation Data
1. Get processed modeling data from database
2. Model Stage 1 - level_binary
3. Model Stage 2 - multiclass level
4. Calculate performance metrics

### import packages

In [None]:
import numpy as np
import pandas as pd
import psycopg2 as pg
import datetime as dt
from sklearn import preprocessing
from collections import OrderedDict
from collections import defaultdict
from pprint import pprint
import cPickle as pickle
import gc
import socket
import boto3
from boto.utils import get_instance_metadata
import ast
from Segments import Segments
from Times import Times
from Cluster import Cluster
import time
import datetime
import os
import shutil
import joblib
import string
from AWS import AWS
from Utility import Utility

# clustering
from scipy.sparse import csr_matrix
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sqlalchemy import create_engine

# modeling
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import BernoulliNB, MultinomialNB, GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict

%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
start_time = time.time()

### set inputs

In [None]:
# set environment

aws = None
s3_bucket_name = 'dse-cohort3-group3'
s3_dat_dir = 'PreprocessedWazeData'

# assume connection file is always present
conn_str_file = '../conf/db_conn_str.txt'
sampling_args_file = '../conf/pipeline_args.txt'

In [None]:
fr = open(sampling_args_file, 'r')
fa = fr.read()
file_args = ast.literal_eval(fa)
file_args

In [None]:
# assume save_dir already exists
save_dir = file_args['save_dir']
save_dir

# 1. get data from db

In [None]:
get_data_start = time.time()

### create AWS object and helper methods

In [None]:
util = Utility(file_args)

In [None]:
if util.isAWS():
    aws = AWS(s3_bucket_name, s3_dat_dir)

pg_conn_str = open(conn_str_file, 'r').read()

### connect to database and get data

In [None]:
conn = pg.connect(pg_conn_str) 
util.conn = conn

In [None]:
train_data, test_data = util.get_modeling_data()

In [None]:
print('--- getting data took {0:.1f} seconds ---'.format(time.time() - get_data_start))

# 2. Modeling

In [None]:
modeling_start = time.time()

## 2.1.  first modeling stage - level_binary

In [None]:
first_modeling_stage_start = time.time()

In [None]:
# create prediction dataframes
targets = [c for c in train_data.columns if c.startswith('level')]
train_preds = train_data[['date','time','date_idx','time_idx','segment_id','day_of_week','cluster']+targets].copy()
test_preds = test_data[['date','time','date_idx','time_idx','segment_id','day_of_week','cluster']+targets].copy()

In [None]:
# dicts to track validation results
val_results_dict = {
    'stage_1': {},
    'stage_2': {}
}

In [None]:
ps = util.get_validation_splits(train_data)

In [None]:
# set f1_score average parameter
f1_avg = 'binary' if file_args['scoring_metric'] == 'f1' else str.replace(file_args['scoring_metric'], 'f1_','')

### stage 1 avg baseline

In [None]:
# calculate validation f1 scores for choosing best model for cluster ensemble
if file_args['model_avg_baseline']:
    print('calculating validation scores for average baseline...')
    pred_suffix = '_preds_avg_baseline'
    
    # create dict to store local results
    model_val_results = {k:np.array([]) for k in train_data['cluster'].unique()}
    
    # split data into trn and val and calculate prediction scores
    splits = ps.split() if file_args['train_test_method']=='date' else ps.split(train_data, train_data[file_args['target_first_stage']])
    for idx, (trn_idx, val_idx) in enumerate(splits):
        print('validation fold {}...'.format(idx))
        X_t, X_v = train_data.iloc[trn_idx,:], train_data.iloc[val_idx,:]
    
        # calculate average of target for time, segment, dow groups
        y_trn_avg = X_t.groupby(['time_idx', 'segment_id', 'day_of_week'],as_index=False)[file_args['target_first_stage']].mean()

        # make prediction for time/segment based on target average
        y_preds_avg = y_trn_avg[['time_idx','segment_id','day_of_week',file_args['target_first_stage']]].copy()
        y_preds_avg = y_preds_avg.round({file_args['target_first_stage']: 0})
        y_preds_avg.rename(columns={file_args['target_first_stage']:file_args['target_first_stage']+pred_suffix}, inplace=True)

        # join predictions to train and val dataframes
        train_preds_avg = X_t.merge(y_preds_avg, how='left', on=['time_idx','segment_id','day_of_week'])
        val_preds_avg = X_v.merge(y_preds_avg, how='left', on=['time_idx','segment_id','day_of_week'])

        # fill null predictions with 0
        train_preds_avg[file_args['target_first_stage']+pred_suffix].fillna(value=0, inplace=True)
        val_preds_avg[file_args['target_first_stage']+pred_suffix].fillna(value=0, inplace=True)

        # calculate f1 scores for individual clusters for this val fold
        for clust in X_t['cluster'].unique():
            tmp_val_clust = val_preds_avg[val_preds_avg['cluster']==clust]
            tmp_val_preds = tmp_val_clust[file_args['target_first_stage']]
            tmp_val_actuals = tmp_val_clust[file_args['target_first_stage']+pred_suffix]
        
            val_f1 = f1_score(tmp_val_actuals, tmp_val_preds, average=f1_avg)

            # update model_val_metrics
            model_val_results[clust] = np.append(model_val_results[clust], val_f1)
            
    # calculate avg f1 scores from multiple validation sets
    for key in model_val_results:
        model_val_results[key] = model_val_results[key].mean()        

    # add model val results to dict
    val_results_dict['stage_1']['model_avg_baseline'] = model_val_results

    # save averages "model" (dataframe for merging later) for full training data
    y_trn_avg = train_data.groupby(['time_idx', 'segment_id', 'day_of_week'],as_index=False)[file_args['target_first_stage']].mean()
    y_preds_avg = y_trn_avg[['time_idx','segment_id','day_of_week',file_args['target_first_stage']]].copy()
    y_preds_avg = y_preds_avg.round({file_args['target_first_stage']: 0})
    y_preds_avg.rename(columns={file_args['target_first_stage']:file_args['target_first_stage']+pred_suffix}, inplace=True)
    fn = os.path.join(save_dir, 'stage1_model_avg_baseline.pkl')
    joblib.dump(y_preds_avg, fn)

### stage 1 non-baseline models

In [None]:
model_dict = {
    'random_forest': RandomForestClassifier(random_state=file_args['seed']),
    'knn': KNeighborsClassifier(),
    'extra_trees': ExtraTreesClassifier(random_state=file_args['seed']),
    'gradient_boosting': GradientBoostingClassifier(random_state=file_args['seed']),
    'logistic_regression': LogisticRegression(random_state=file_args['seed']),
    'gaussian_nb': GaussianNB()
}

In [None]:
def fit_stage1_model_on_full(model_key):
    if file_args['model_full_{}'.format(model_key)]:
        print('training {} on full data...'.format(model_key))
        model = model_dict[model_key]
        pred_suffix = '_preds_full_{}'.format(model_key)

        # split features and targets
        level_cols = [c for c in train_data.columns if c.startswith('level')]
        X_trn = train_data.drop(labels=['date','time']+level_cols, axis=1)
        Y_trn = train_data.loc[:,file_args['target_first_stage']].values.ravel()

        # create dict to store local results
        model_val_results = {k:np.array([]) for k in train_data['cluster'].unique()}

        # split data into trn and val and calculate prediction scores
        splits = ps.split() if file_args['train_test_method']=='date' else ps.split(X_trn, Y_trn)
        for idx, (trn_idx, val_idx) in enumerate(splits):
            print('validation fold {}...'.format(idx))
            X_val = X_trn.iloc[val_idx,:]
            X_t = X_trn.iloc[trn_idx,:].drop(labels='cluster', axis=1)
            X_v = X_trn.iloc[val_idx,:].drop(labels='cluster', axis=1)
            Y_t, Y_v = Y_trn[trn_idx], Y_trn[val_idx]

            model.fit(X_t, Y_t)
            val_preds = model.predict(X_v)

            # calculate scores for individual clusters for this val fold
            for clust in train_data['cluster'].unique():
                tmp_val_preds = val_preds[X_val['cluster']==clust]
                tmp_val_actuals = Y_v[X_val['cluster']==clust]
                val_f1_clust = f1_score(tmp_val_actuals, tmp_val_preds, average=f1_avg)

                # update model_val_metrics
                model_val_results[clust] = np.append(model_val_results[clust], val_f1_clust)

            # fit model on full train+val data and save model
            if idx == 0:
                X = pd.concat([X_t, X_v])
                Y = np.append(Y_t, Y_v)
                model.fit(X, Y)
                fn = os.path.join(save_dir, 'stage1_model_full_{}.pkl'.format(model_key))
                joblib.dump(model, fn)

        # calculate avg f1 scores from multiple validation sets
        for key in model_val_results:
            model_val_results[key] = model_val_results[key].mean()

        # add model val results to dict
        val_results_dict['stage_1']['model_full_{}'.format(model_key)] = model_val_results

In [None]:
def fit_stage1_model_on_clusters(model_key):
    if file_args['model_clusters_{}'.format(model_key)]:
        print('training {} on clustered data...'.format(model_key))
        model = model_dict[model_key]   
        pred_suffix = '_preds_cluster_{}'.format(model_key)

        # create dict to store local results
        model_val_results = {k:np.array([]) for k in train_data['cluster'].unique()}

        # split data into trn and val and calculate prediction scores
        splits = ps.split() if file_args['train_test_method']=='date' else ps.split(train_data, train_data[file_args['target_first_stage']])
        for idx, (trn_idx, val_idx) in enumerate(splits):
            print('validation fold {}...'.format(idx))
            trn_data = train_data.iloc[trn_idx,:]
            val_data = train_data.iloc[val_idx,:]

            for clust in train_data['cluster'].unique():
                # subset data to cluster
                train_clust = trn_data[trn_data['cluster']==clust]
                val_clust = val_data[val_data['cluster']==clust]

                # calculate negative to positive ratio for each cluster
                trn_clust_ratio = util.get_neg_pos_ratio(train_clust)
                val_clust_ratio = util.get_neg_pos_ratio(val_clust)

                # unskew individual clusters
                if (file_args['unskew_train_clusters'] and trn_ratio > file_args['unskew_ratio']):
                    print('unskewing train data to negative positive ratio of {}...'.format(file_args['unskew_ratio']))
                    train_clust = util.unskew_data(train_clust, file_args['unskew_ratio'])
                if (file_args['unskew_test'] and val_ratio > file_args['unskew_ratio']):
                    print('unskewing val data to negative positive ratio of {}...'.format(file_args['unskew_ratio']))
                    val_clust = util.unskew_data(val_clust, file_args['unskew_ratio'])

                # split features and targets
                level_cols = [col for col in train_data.columns if col.startswith('level')]
                X_trn = train_clust.drop(labels=['date','time','cluster']+level_cols, axis=1)
                Y_trn = train_clust.loc[:,file_args['target_first_stage']].values.ravel()
                X_val = val_clust.drop(labels=['date','time','cluster']+level_cols, axis=1)
                Y_val = val_clust.loc[:,file_args['target_first_stage']].values.ravel()

                # fit model
                model.fit(X_trn, Y_trn)

                # make predictions
                val_preds = model.predict(X_val)

                # calculate f1 score for cluster and append actuals and predictions to list
                val_f1_clust = f1_score(Y_val, val_preds, average=f1_avg)
                model_val_results[clust] = np.append(model_val_results[clust], val_f1_clust)

                # fit model on full train+val data and save model
                if idx == 0:
                    X = pd.concat([X_trn, X_val])
                    Y = np.append(Y_trn, Y_val)
                    model.fit(X, Y)
                    fn = os.path.join(save_dir, 'stage1_model_clusters_{}_cluster_{}.pkl'.format(model_key, clust))
                    joblib.dump(model, fn)

        # calculate avg f1 scores from multiple validation sets
        for key in model_val_results:
            model_val_results[key] = model_val_results[key].mean()

        # add model val results to dict
        val_results_dict['stage_1']['model_clusters_{}'.format(model_key)] = model_val_results

In [None]:
for model_key in model_dict.keys():
    fit_stage1_model_on_full(model_key)
    fit_stage1_model_on_clusters(model_key)

In [None]:
# add cluster counts
clust_count_dict = {}
for clust in train_data['cluster'].unique():
    clust_count_dict[clust] = train_data[train_data['cluster']==clust].shape[0]
    
val_results_dict['stage_1']['cluster_counts'] = clust_count_dict

In [None]:
print('--- first modeling stage took {0:.1f} seconds ---'.format(time.time() - first_modeling_stage_start))

## 2.1  second modeling stage - non-binary level

In [None]:
second_modeling_stage_start = time.time()

In [None]:
# subset train data to only include existence of traffic
train_data_pos = train_data[train_data['level_binary'] == 1]

In [None]:
# get predefined splits for positive data only
ps_pos = util.get_validation_splits(train_data_pos)

### stage 2 average baseline

In [None]:
# calculate validation scores
if file_args['model_avg_baseline']:
    print('calculating validation scores for average baseline...')
    pred_suffix = '_preds_avg_baseline'
    
    # create dict to store local results
    model_val_results = {k:np.array([]) for k in train_data_pos['cluster'].unique()}
    
    # split data into trn and val and calculate prediction scores
    splits = ps_pos.split() if file_args['train_test_method']=='date' else ps_pos.split(train_data_pos, train_data_pos[file_args['target_second_stage']])
    for idx, (trn_idx, val_idx) in enumerate(splits):
        print('validation fold {}...'.format(idx))
        X_t, X_v = train_data_pos.iloc[trn_idx,:], train_data_pos.iloc[val_idx,:]
    
        # calculate average of target for time, segment, dow groups
        y_trn_avg = X_t.groupby(['time_idx', 'segment_id', 'day_of_week'],as_index=False)[file_args['target_second_stage']].mean()

        # make prediction for time/segment based on target average
        y_preds_avg = y_trn_avg[['time_idx','segment_id','day_of_week',file_args['target_second_stage']]].copy()
        y_preds_avg = y_preds_avg.round({file_args['target_second_stage']: 0})
        y_preds_avg.rename(columns={file_args['target_second_stage']:file_args['target_second_stage']+pred_suffix}, inplace=True)

        # join predictions to train and val dataframes
        train_preds_avg = X_t.merge(y_preds_avg, how='left', on=['time_idx','segment_id','day_of_week'])
        val_preds_avg = X_v.merge(y_preds_avg, how='left', on=['time_idx','segment_id','day_of_week'])

        # fill null predictions with 0
        train_preds_avg[file_args['target_second_stage']+pred_suffix].fillna(value=0, inplace=True)
        val_preds_avg[file_args['target_second_stage']+pred_suffix].fillna(value=0, inplace=True)

        # set stage 2 predictions to 0 if stage 'level_binary' prediction was 0
        y_trn_bin_avg = X_t.groupby(['time_idx', 'segment_id', 'day_of_week'],as_index=False)['level_binary'].mean()
        y_preds_bin_avg = y_trn_bin_avg[['time_idx','segment_id','day_of_week','level_binary']].copy()
        y_preds_bin_avg = y_preds_bin_avg.round({file_args['target_second_stage']: 0})
        y_preds_bin_avg.rename(columns={'level_binary':'level_binary_pred'}, inplace=True)
        train_preds_avg = train_preds_avg.merge(y_preds_bin_avg, how='left', on=['time_idx','segment_id','day_of_week'])
        val_preds_avg = val_preds_avg.merge(y_preds_bin_avg, how='left', on=['time_idx','segment_id','day_of_week'])
        train_preds_avg['level_binary_pred'].fillna(value=0, inplace=True)
        val_preds_avg['level_binary_pred'].fillna(value=0, inplace=True)
        
        train_preds_avg.loc[:, file_args['target_second_stage']+pred_suffix] \
            = train_preds_avg[file_args['target_second_stage']+pred_suffix] \
            * train_preds_avg['level_binary_pred']
        val_preds_avg.loc[:, file_args['target_second_stage']+pred_suffix] \
            = val_preds_avg[file_args['target_second_stage']+pred_suffix] \
            * val_preds_avg['level_binary_pred']
        
        # calculate f1 scores for individual clusters for this val fold
        for clust in X_t['cluster'].unique():
            tmp_val_clust = val_preds_avg[val_preds_avg['cluster']==clust]
            tmp_val_preds = tmp_val_clust[file_args['target_second_stage']]
            tmp_val_actuals = tmp_val_clust[file_args['target_second_stage']+pred_suffix]
        
            val_f1 = f1_score(tmp_val_actuals, tmp_val_preds, average=f1_avg)

            # update model_val_metrics
            model_val_results[clust] = np.append(model_val_results[clust], val_f1)
            
    # calculate avg f1 scores from multiple validation sets
    for key in model_val_results:
        model_val_results[key] = model_val_results[key].mean()

    # add model val results to dict
    val_results_dict['stage_2']['model_avg_baseline'] = model_val_results

    # save averages "model" (dataframe for merging later) for full training data
    y_trn_avg = train_data_pos.groupby(['time_idx', 'segment_id', 'day_of_week'],as_index=False)[file_args['target_second_stage']].mean()
    y_preds_avg = y_trn_avg[['time_idx','segment_id','day_of_week',file_args['target_second_stage']]].copy()
    y_preds_avg = y_preds_avg.round({file_args['target_second_stage']: 0})
    y_preds_avg.rename(columns={file_args['target_second_stage']:file_args['target_second_stage']+pred_suffix}, inplace=True)
    fn = os.path.join(save_dir, 'stage2_model_avg_baseline.pkl')
    joblib.dump(y_preds_avg, fn)

### stage 2 non-baseline models

In [None]:
def fit_stage2_model_on_full(model_key):
    if file_args['model_full_{}'.format(model_key)]:
        print('training {} on full data...'.format(model_key))
        model = model_dict[model_key]
        pred_suffix = '_preds_full_{}'.format(model_key)

        # split features and targets
        level_cols = [c for c in train_data_pos.columns if c.startswith('level')]
        X_trn = train_data_pos.drop(labels=['date','time']+level_cols, axis=1)
        Y_trn = train_data_pos.loc[:,file_args['target_second_stage']].values.ravel()

        # create dict to store local results
        model_val_results = {k:np.array([]) for k in train_data_pos['cluster'].unique()}

        # split data into trn and val and calculate prediction scores
        splits = ps_pos.split() if file_args['train_test_method']=='date' else ps_pos.split(X_trn, Y_trn)
        for idx, (trn_idx, val_idx) in enumerate(splits):
            print('validation fold {}...'.format(idx))
            X_val = X_trn.iloc[val_idx,:]
            X_t = X_trn.iloc[trn_idx,:].drop(labels='cluster', axis=1)
            X_v = X_trn.iloc[val_idx,:].drop(labels='cluster', axis=1)
            Y_t, Y_v = Y_trn[trn_idx], Y_trn[val_idx]

            model.fit(X_t, Y_t)
            val_preds = model.predict(X_v)

            # calculate scores for individual clusters for this val fold
            for clust in train_data_pos['cluster'].unique():
                tmp_val_preds = val_preds[X_val['cluster']==clust]
                tmp_val_actuals = Y_v[X_val['cluster']==clust]
                val_f1_clust = f1_score(tmp_val_actuals, tmp_val_preds, average=f1_avg)

                # update model_val_metrics
                model_val_results[clust] = np.append(model_val_results[clust], val_f1_clust)

            # fit model on full train+val data and save model
            if idx == 0:
                X = pd.concat([X_t, X_v])
                Y = np.append(Y_t, Y_v)
                model.fit(X, Y)
                fn = os.path.join(save_dir, 'stage2_model_full_{}.pkl'.format(model_key))
                joblib.dump(model, fn)

        # calculate avg f1 scores from multiple validation sets
        for key in model_val_results:
            model_val_results[key] = model_val_results[key].mean()

        # add model val results to dict
        val_results_dict['stage_2']['model_full_{}'.format(model_key)] = model_val_results

In [None]:
def fit_stage2_model_on_clusters(model_key):
    if file_args['model_clusters_{}'.format(model_key)]:
        print('training {} on clustered data...'.format(model_key))
        model = model_dict[model_key]
        pred_suffix = '_preds_cluster_{}'.format(model_key)

        # create dict to store local results
        model_val_results = {k:np.array([]) for k in train_data_pos['cluster'].unique()}

        # split data into trn and val and calculate prediction scores
        splits = ps_pos.split() if file_args['train_test_method']=='date' else ps_pos.split(train_data_pos, train_data_pos[file_args['target_second_stage']])
        for idx, (trn_idx, val_idx) in enumerate(splits):
            print('validation fold {}...'.format(idx))
            trn_data = train_data_pos.iloc[trn_idx,:]
            val_data = train_data_pos.iloc[val_idx,:]

            for clust in train_data_pos['cluster'].unique():

                # subset data to cluster
                train_clust = trn_data[trn_data['cluster']==clust]
                val_clust = val_data[val_data['cluster']==clust]

                # calculate negative to positive ratio for each cluster
                trn_clust_ratio = util.get_neg_pos_ratio(train_clust)
                val_clust_ratio = util.get_neg_pos_ratio(val_clust)

                # unskew individual clusters
                if (file_args['unskew_train_clusters'] and trn_ratio > file_args['unskew_ratio']):
                    print('unskewing train data to negative positive ratio of {}...'.format(file_args['unskew_ratio']))
                    train_clust = util.unskew_data(train_clust, file_args['unskew_ratio'])
                if (file_args['unskew_test'] and val_ratio > file_args['unskew_ratio']):
                    print('unskewing val data to negative positive ratio of {}...'.format(file_args['unskew_ratio']))
                    val_clust = util.unskew_data(val_clust, file_args['unskew_ratio'])

                # split features and targets
                level_cols = [col for col in train_data_pos.columns if col.startswith('level')]
                X_trn = train_clust.drop(labels=['date','time','cluster']+level_cols, axis=1)
                Y_trn = train_clust.loc[:,file_args['target_second_stage']].values.ravel()
                X_val = val_clust.drop(labels=['date','time','cluster']+level_cols, axis=1)
                Y_val = val_clust.loc[:,file_args['target_second_stage']].values.ravel()

                # fit model
                model.fit(X_trn, Y_trn)

                # make predictions
                val_preds = model.predict(X_val)

                # calculate f1 score for cluster and append actuals and predictions to list
                val_f1_clust = f1_score(Y_val, val_preds, average=f1_avg)
                model_val_results[clust] = np.append(model_val_results[clust], val_f1_clust)

                # fit model on full train+val data and save model
                if idx == 0:
                    X = pd.concat([X_trn, X_val])
                    Y = np.append(Y_trn, Y_val)
                    model.fit(X, Y)
                    fn = os.path.join(save_dir, 'stage2_model_clusters_{}_cluster_{}.pkl'.format(model_key, clust))
                    joblib.dump(model, fn)

        # calculate avg f1 scores from multiple validation sets
        for key in model_val_results:
            model_val_results[key] = model_val_results[key].mean()

        # add model val results to dict
        val_results_dict['stage_2']['model_clusters_{}'.format(model_key)] = model_val_results

In [None]:
for model_key in model_dict.keys():
    fit_stage2_model_on_full(model_key)
    fit_stage2_model_on_clusters(model_key)

In [None]:
# add cluster counts
clust_count_dict = {}
for clust in train_data_pos['cluster'].unique():
    clust_count_dict[clust] = train_data_pos[train_data_pos['cluster']==clust].shape[0]
    
val_results_dict['stage_2']['cluster_counts'] = clust_count_dict

In [None]:
print('--- second modeling stage took {0:.1f} seconds ---'.format(time.time() - second_modeling_stage_start))

# 3. evaluate results

In [None]:
eval_results_start = time.time()

In [None]:
util.add_best_models(val_results_dict)

In [None]:
# save val_results_dict
fn = os.path.join(save_dir, 'val_results_dict.pkl')
joblib.dump(val_results_dict, fn)

In [None]:
util.metrics_plot_model(val_results_dict, stage='stage_1', score_metric=file_args['scoring_metric'], 
                        sort=True, title_prefix='validation')

In [None]:
util.metrics_plot_model(val_results_dict, stage='stage_2', score_metric=file_args['scoring_metric'], 
                        sort=True, title_prefix='validation')

In [None]:
print('--- evaluating results took {0:.1f} seconds ---'.format(time.time() - eval_results_start))

In [None]:
print('--- entire pipeline took {0:.1f} seconds ---'.format(time.time() - start_time))