In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from bokeh.io import show, output_notebook
output_notebook()

from scripts_viz.visualization_utils import *
from scripts_viz.visualization_perfgrid import *
from scripts_viz.visualization_utils import TTQcolor
from scripts_mlflow.mlflow_utils import *

from bokeh.layouts import gridplot, row, column

import mlflow


In [2]:
pd.set_option("display.max_rows", 101)

In [3]:
expnames = set([exp.name for exp in mlflow.tracking.MlflowClient().list_experiments()])
expnames

{'MLP_enriched_time_seq24000_6000_imp',
 'MLP_enriched_time_seq24000_6000_p180',
 'MLP_enriched_time_seq24000_6000_p90',
 'benchmarks_shuffle_imp',
 'benchmarks_shuffle_opt_imp',
 'benchmarks_shuffle_opt_p180',
 'benchmarks_shuffle_opt_p90',
 'benchmarks_shuffle_p180',
 'benchmarks_shuffle_p90',
 'benchmarks_time_imp',
 'benchmarks_time_opt_imp',
 'benchmarks_time_opt_p180',
 'benchmarks_time_opt_p90',
 'benchmarks_time_p180',
 'benchmarks_time_p90',
 'enriched_shuffle_imp',
 'enriched_shuffle_p180',
 'enriched_shuffle_p90',
 'enriched_time_imp',
 'enriched_time_p180',
 'enriched_time_p90',
 'enriched_time_seq24000_6000_imp',
 'enriched_time_seq24000_6000_p180',
 'enriched_time_seq24000_6000_p90'}

In [4]:
experiment = 'benchmarks_time_p180'

In [5]:
viz = create_exp_df(experiment)

In [6]:
sgd_cols = ['eta0', 'shuffle', 'fit_intercept', 'n_iter_no_change', 'penalty', 'max_iter', 'alpha', 'early_stopping',
           'learning_rate', 'loss', 'val_auc', 'test_auc']

rf_cols = ['n_estimators', 'max_depth', 'max_features', 'max_leaf_nodes', 
                                        'min_samples_leaf', 'min_samples_split', 'bootstrap',
                                        'criterion', 'val_auc', 'test_auc']

ann_cols = ['batch_size', 'optimizer', 'epochs_actual', 'class_1_weight', 'early_stopping', 'early_stopping_metric',
            'hidden_layers_no', 'hidden_nodes', 'hl_out_activations', 'loss_func', 'dropout', 'tr_accuracy',
             'val_auc', 'test_auc']

In [7]:
colors = [TTQcolor['richOrange'], TTQcolor['azureBlue'], TTQcolor['algae'],
                                                       TTQcolor['yell'], TTQcolor['redBrown'], TTQcolor['bloodRed']]

In [8]:
#grid = performance_grid(viz, model_filter=None, single_row_folds=False, folds_p_width=600, single_spider=True, spider_p_width=600, spider_p_height=600,
#                       spreadsheet_cols = [])

In [9]:
#spiders_params = ['test_auc', 'val_auc', 'max_depth', 'max_features', 'n_estimators']
#viz.loc[spiders_params]

In [10]:
#viz.loc[spiders_params].apply(lambda x:(x-viz.loc[spiders_params].min(axis=1))/(viz.loc[spiders_params].max(axis=1)-viz.loc[spiders_params].min(axis=1)))

In [11]:
spr_settings_rf = {'viz_dict': viz, 
                                             'metric_list': rf_cols, 
                                             'model_type': ['RandomForestClassifier'], 
                                             'color_cells':True, 
                                             'colors':[TTQcolor['azureBlue']],
                                             'index_header':'RF', 
                                             'width':1200, 
                                             'height':60, 
                                             'index_width':150,
                                             'row_height':25}

spr_settings_sgd = {'viz_dict': viz, 
                                             'metric_list': sgd_cols, 
                                             'model_type': ['SGDClassifier'], 
                                             'color_cells':True, 
                                             'colors':[TTQcolor['richOrange']],
                                             'index_header':'SGD', 
                                             'width':1200, 
                                             'height':60, 
                                             'index_width':150,
                                             'row_height':25}

In [12]:
#from bokeh.io import output_file
#output_file(experiment+'.html')

In [15]:
grid = performance_grid(viz,
                     model_filter = None, 
                     legend_font_size='12pt', 
                     fpr_font_size='9pt', 
                     bestFprOnly=True, 
                     rocs_p_width=600, 
                     rocs_p_height=600,
                     rocs_line_width=2, 
                     single_spider=True, 
                     add_spider=True,
                     spider_in_row=2, 
                     spiders_params = ['tp_rate', 'tn_rate', 'fp_rate', 'fn_rate', 'test_auc', 'val_auc'],
                     spider_p_width=600, 
                     spider_p_height=600, 
                     spider_text_size='12pt', 
                     spider_line_width=4.5, 
                     spider_fill_alpha=0.1,
                     spider_margin_distance=0.25, 
                     normalize_spider=False,
                     single_row_folds=True, 
                     folds_p_width=600, 
                     folds_p_height=600, 
                     folds_xlabelorientation=1.55, 
                     folds_group_text_font_size='6pt',
                     folds_in_row=2, 
                     spreadsheet_settings = [spr_settings_rf,
                                            spr_settings_sgd],
                     plot_feat_importance=True,
                     normalize_importance=True,
                     fimp_text_group_size = '10pt',
                     colors=[ TTQcolor['richOrange'], TTQcolor['azureBlue'], TTQcolor['algae'], TTQcolor['yell'], TTQcolor['redBrown'], TTQcolor['bloodRed']])

In [16]:
show(grid)

In [13]:
#from bokeh.io import export_png
#export_png(l, experiment+'.png')