In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from bokeh.io import show, output_notebook
output_notebook()

from scripts_viz.visualization_utils import *
from scripts_viz.visualization_perfgrid import *
from scripts_viz.visualization_utils import TTQcolor
from scripts_mlflow.mlflow_utils import *

from bokeh.layouts import gridplot, row, column

import mlflow


In [2]:
pd.set_option("display.max_rows", 101)

In [3]:
expnames = set([exp.name for exp in mlflow.tracking.MlflowClient().list_experiments()])
expnames

{'MLP_enriched_time_seq24000_6000_imp',
 'MLP_enriched_time_seq24000_6000_p180',
 'MLP_enriched_time_seq24000_6000_p90',
 'benchmarks_shuffle_imp',
 'benchmarks_shuffle_p180',
 'benchmarks_shuffle_p90',
 'benchmarks_time_imp',
 'benchmarks_time_old_imp',
 'benchmarks_time_old_p180',
 'benchmarks_time_old_p90',
 'benchmarks_time_p180',
 'benchmarks_time_p90',
 'enriched_shuffle_imp',
 'enriched_time_seq26000_2000_imp'}

In [4]:
experiment = 'enriched_shuffle_imp'

In [5]:
viz = create_exp_df(experiment)

In [10]:
colors = [TTQcolor['richOrange'], TTQcolor['azureBlue']]

#--------------------------------------------------

sgd_cols = ['eta0', 'shuffle', 'fit_intercept', 'n_iter_no_change', 'penalty', 'max_iter', 'alpha', 'early_stopping',
           'learning_rate', 'loss', 'val_auc', 'test_auc']

sgd_spider_cols = ['eta0','max_iter','alpha','test_auc', 'val_auc']

sgd_spider_cols_to_norm = ['eta0','max_iter','alpha']

#--------------------------------------------------
rf_cols = ['n_estimators', 'max_depth', 'max_features', 'max_leaf_nodes', 
                                        'min_samples_leaf', 'min_samples_split', 'bootstrap',
                                        'criterion', 'val_auc', 'test_auc']
rf_spider_cols = ['n_estimators',  'max_leaf_nodes', 'min_samples_leaf', 'min_samples_split', 'test_auc', 'val_auc']

rf_spider_cols_to_norm = ['n_estimators',  'max_leaf_nodes', 'min_samples_leaf', 'min_samples_split']

#--------------------------------------------------
ann_cols = ['batch_size', 'optimizer', 'epochs_actual', 'class_1_weight', 'early_stopping', 'early_stopping_metric',
            'hidden_layers_no', 'hidden_nodes', 'hl_out_activations', 'loss_func', 'dropout', 'tr_accuracy',
             'val_auc', 'test_auc']

In [12]:
clf = 'sgd_'

if clf=='rf_':
            model_filter = ['RandomForestClassifier']
            metric_cols = rf_cols
            spider_cols = rf_spider_cols
            spider_cols_to_norm = rf_spider_cols_to_norm
            index_header = 'RF'
            spr_settings = {'viz_dict': viz, 
                                             'metric_list': rf_cols, 
                                             'model_type': ['RandomForestClassifier'], 
                                             'color_cells':True, 
                                             'colors':colors,
                                             'index_header':'RF', 
                                             'width':1200, 
                                             'height':120, 
                                             'index_width':40,
                                             'row_height':25}

elif clf=='sgd_':
    model_filter = ['SGDClassifier']
    metric_cols = sgd_cols
    spider_cols = sgd_spider_cols
    spider_cols_to_norm = sgd_spider_cols_to_norm
    index_header = 'RF'
    spr_settings = {'viz_dict': viz, 
                                    'metric_list': sgd_cols, 
                                    'model_type': ['SGDClassifier'], 
                                    'color_cells':True, 
                                    'colors':colors,
                                    'index_header':'SGD', 
                                    'width':1200, 
                                    'height':120, 
                                    'index_width':40,
                                    'row_height':25}

In [16]:
grid = performance_grid(viz,
                     model_filter = model_filter, 
                     legend_font_size='9pt', 
                     fpr_font_size='9pt', 
                     bestFprOnly=True, 
                     rocs_p_width=600, 
                     rocs_p_height=600,
                     rocs_line_width=2, 
                     single_spider=False,
                     add_spider=True,
                     spider_in_row=3, 
                     spiders_params = spider_cols,
                     spiders_params_to_norm=spider_cols_to_norm,
                     spider_p_width=600, 
                     spider_p_height=600, 
                     spider_text_size='6pt', 
                     spider_line_width=4.5, 
                     spider_fill_alpha=0.1,
                     spider_margin_distance=0.25,
                     add_corr_scatter=False,
                     normalize_spider=True,
                     single_row_folds=True, 
                     folds_p_width=1200, 
                     folds_p_height=600, 
                     folds_xlabelorientation=1.55, 
                     folds_group_text_font_size='3pt',
                     folds_in_row=3, 
                     spreadsheet_settings = [spr_settings],
                     plot_feat_importance=True,
                     normalize_importance=False,
                     fimp_text_group_size = '10pt',
                     colors=[TTQcolor['azureBlue'], TTQcolor['richOrange'], TTQcolor['algae'], TTQcolor['yell'], TTQcolor['redBrown'], TTQcolor['bloodRed']])



In [17]:
show(grid)

In [13]:
#from bokeh.io import export_png
#export_png(l, experiment+'.png')