In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from bokeh.io import show, output_notebook
output_notebook()

from scripts_viz.visualization_utils import *
from scripts_viz.visualization_perfgrid import *
from scripts_viz.visualization_utils import TTQcolor
from scripts_mlflow.mlflow_utils import *

from bokeh.layouts import gridplot, row, column

import mlflow


In [2]:
pd.set_option("display.max_rows", 101)

In [3]:
expnames = set([exp.name for exp in mlflow.tracking.MlflowClient().list_experiments()])
expnames

{'MLP_enriched_time_seq24000_6000_imp',
 'MLP_enriched_time_seq24000_6000_p180',
 'MLP_enriched_time_seq24000_6000_p90',
 'benchmarks_shuffle_imp',
 'benchmarks_shuffle_opt_imp',
 'benchmarks_shuffle_opt_p180',
 'benchmarks_shuffle_opt_p90',
 'benchmarks_shuffle_p180',
 'benchmarks_shuffle_p90',
 'benchmarks_time_imp',
 'benchmarks_time_opt_imp',
 'benchmarks_time_opt_p180',
 'benchmarks_time_opt_p90',
 'benchmarks_time_p180',
 'benchmarks_time_p90',
 'enriched_shuffle_imp',
 'enriched_shuffle_p180',
 'enriched_shuffle_p90',
 'enriched_time_imp',
 'enriched_time_p180',
 'enriched_time_p90',
 'enriched_time_seq24000_6000_imp',
 'enriched_time_seq24000_6000_p180',
 'enriched_time_seq24000_6000_p90'}

In [4]:
experiment = 'MLP_enriched_time_seq24000_6000_imp'

In [5]:
viz = create_exp_df(experiment)

In [12]:
viz = viz.copy()[['sequential_116_190819_040.h5', 'sequential_36_190818_1913.h5']]

In [13]:
sgd_cols = ['eta0', 'shuffle', 'fit_intercept', 'n_iter_no_change', 'penalty', 'max_iter', 'alpha', 'early_stopping',
           'learning_rate', 'loss', 'val_auc', 'test_auc']

rf_cols = ['n_estimators', 'max_depth', 'max_features', 'max_leaf_nodes', 
                                        'min_samples_leaf', 'min_samples_split', 'bootstrap',
                                        'criterion', 'val_auc', 'test_auc']

ann_cols = ['batch_size', 'optimizer', 'epochs_actual', 'class_1_weight', 'early_stopping', 'early_stopping_metric',
            'hidden_layers_no', 'hidden_nodes', 'hl_out_activations', 'loss_func', 'dropout', 'tr_accuracy',
             'val_auc', 'test_auc']

In [14]:
sorted(list(viz.index))

['artifact_uri',
 'batch_and_steps',
 'batch_size',
 'bias_init',
 'class_1_weight',
 'dropout',
 'early_stopping',
 'early_stopping_metric',
 'end_time',
 'epochs_actual',
 'epochs_settings',
 'experiment_id',
 'experiment_type',
 'fn_rate',
 'fp_rate',
 'hidden_layers_no',
 'hidden_nodes',
 'hl_out_activations',
 'indexes_file_path',
 'kernel_init',
 'kernel_regularizers',
 'lifecycle_stage',
 'loss_func',
 'model_filename',
 'model_filepath',
 'model_type',
 'optimizer',
 'optimizer_settings',
 'pred_threshold',
 'roc_test_fpr',
 'roc_test_tpr',
 'roc_val_fpr',
 'roc_val_tpr',
 'run_id',
 'run_uuid',
 'start_time',
 'status',
 'test_auc',
 'test_file_path',
 'test_fn',
 'test_fnr',
 'test_fp',
 'test_fpr',
 'test_size',
 'test_tn',
 'test_tnr',
 'test_tp',
 'test_tpr',
 'tn_rate',
 'tp_rate',
 'tr_accuracy',
 'tr_auc_10',
 'tr_auc_11',
 'tr_auc_12',
 'tr_auc_13',
 'tr_auc_14',
 'tr_auc_15',
 'tr_auc_2',
 'tr_auc_22',
 'tr_auc_28',
 'tr_auc_3',
 'tr_auc_4',
 'tr_auc_6',
 'tr_auc_7',


In [15]:
colors = [TTQcolor['azureBlue'], TTQcolor['richOrange'], TTQcolor['algae'],
                                                       TTQcolor['yell'], TTQcolor['redBrown'], TTQcolor['bloodRed']]

In [16]:
#grid = performance_grid(viz, model_filter=None, single_row_folds=False, folds_p_width=600, single_spider=True, spider_p_width=600, spider_p_height=600,
#                       spreadsheet_cols = [])

In [17]:
grid = performance_grid(viz,
                     model_filter = None, 
                     legend_font_size='9pt', 
                     fpr_font_size='9pt', 
                     bestFprOnly=True, 
                     rocs_p_width=800, 
                     rocs_p_height=800,
                     rocs_line_width=2, 
                     single_spider=True, 
                     spider_in_row=2, 
                     spiders_params = ['test_auc', 'val_auc', 'tr_accuracy', 'epochs_actual'],
                     spider_p_width=800, 
                     spider_p_height=800, 
                     spider_text_size='12pt', 
                     spider_line_width=4.5, 
                     spider_fill_alpha=0.1,
                     spider_margin_distance=0.25, 
                     normalize_spider=True,
                     single_row_folds=True, 
                     folds_p_width=800, 
                     folds_p_height=800, 
                     folds_xlabelorientation=1.55, 
                     folds_group_text_font_size='3pt',
                     folds_in_row=2, 
                     spreadsheet_cols = ann_cols,
                     spr_index_header = 'MLP',
                     spr_height=200, 
                     spr_width=1600, 
                     spr_index_width=25,
                     spr_row_height=25,
                     plot_feat_importance=False,
                     normalize_importance=True,
                     fimp_text_group_size = '10pt',
                     colors=[TTQcolor['azureBlue'], TTQcolor['richOrange'], TTQcolor['algae'], TTQcolor['yell'], TTQcolor['redBrown'], TTQcolor['bloodRed']])

In [18]:
show(grid)

In [13]:
#from bokeh.io import export_png
#export_png(l, experiment+'.png')