In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from bokeh.io import show, output_notebook
output_notebook()

from scripts_viz.visualization_utils import *
from scripts_viz.visualization_perfgrid import *
from scripts_viz.visualization_utils import TTQcolor
from scripts_mlflow.mlflow_utils import *

from bokeh.layouts import gridplot, row, column

import mlflow


In [2]:
pd.set_option("display.max_rows", 101)

In [3]:
expnames = set([exp.name for exp in mlflow.tracking.MlflowClient().list_experiments()])
expnames

{'benchmarks_shuffle_imp',
 'benchmarks_shuffle_opt_imp',
 'benchmarks_time_opt_imp',
 'enriched_shuffle_imp',
 'enriched_shuffle_p180',
 'enriched_shuffle_p90',
 'enriched_time_imp',
 'enriched_time_p180',
 'enriched_time_p90',
 'enriched_time_seq24000_6000_imp',
 'enriched_time_seq24000_6000_p180',
 'enriched_time_seq24000_6000_p90'}

In [4]:
experiment = 'benchmarks_shuffle_opt_imp'

In [5]:
viz = create_exp_df(experiment)

In [18]:
sgd_cols = ['eta0', 'shuffle', 'fit_intercept', 'n_iter_no_change', 'penalty', 'max_iter', 'alpha', 'early_stopping',
           'learning_rate', 'loss']

rf_cols = ['n_estimators', 'max_depth', 'max_features', 'max_leaf_nodes', 
                                        'min_samples_leaf', 'min_samples_split', 'bootstrap',
                                        'criterion', 'val_auc', 'test_auc']

In [19]:
viz.index

Index(['experiment_type', 'roc_val_fpr', 'eta0', 'shuffle', 'train_file_path',
       'fit_intercept', 'n_iter_no_change', 'criterion', 'verbose',
       'roc_test_fpr', 'test_file_path', 'penalty', 'model_filename',
       'average', 'train_file_name', 'max_iter', 'test_size', 'alpha',
       'n_estimators', 'model_filepath', 'early_stopping', 'power_t',
       'class_weight', 'min_impurity_split', 'roc_test_tpr', 'train_size',
       'roc_val_tpr', 'test_file_name', 'min_samples_split', 'learning_rate',
       'max_features', 'max_depth', 'min_weight_fraction_leaf',
       'min_impurity_decrease', 'min_samples_leaf', 'random_state',
       'oob_score', 'l1_ratio', 'model_type', 'n_jobs', 'epsilon', 'bootstrap',
       'warm_start', 'validation_fraction', 'loss', 'max_leaf_nodes', 'tol',
       'f_d_we_payment_share', 'f_c_impaired1_c', 'test_tpr',
       'f_c_pastdue90_c', 'test_tn', 'f_c_lent_c', 'test_fpr', 'f_c_trend_a',
       'f_currency_Britisches Pfund', 'test_tp', 'f_d_repaid

In [20]:
colors = [TTQcolor['azureBlue'], TTQcolor['richOrange'], TTQcolor['algae'],
                                                       TTQcolor['yell'], TTQcolor['redBrown'], TTQcolor['bloodRed']]

In [21]:
grid = performance_grid(viz, model_filter=['SGDClassifier'], single_row_folds=False, folds_p_width=600, single_spider=True, spider_p_width=600, spider_p_height=600)

['SGDClassifier_190819_152447', 'SGDClassifier_190819_152436', 'SGDClassifier_190819_152458', 'SGDClassifier_190819_152451', 'SGDClassifier_190819_152440']


In [24]:
grid = performance_grid(viz,
                     model_filter = ['SGDClassifier'], 
                     legend_font_size='9pt', 
                     fpr_font_size='9pt', 
                     bestFprOnly=True, 
                     rocs_p_width=600, 
                     rocs_p_height=600,
                     rocs_line_width=2, 
                     single_spider=True, 
                     spider_in_row=2, 
                     spiders_params = ['tp_rate', 'tn_rate', 'fp_rate', 'fn_rate', 'test_auc', 'val_auc'],
                     spider_p_width=600, 
                     spider_p_height=600, 
                     spider_text_size='12pt', 
                     spider_line_width=4.5, 
                     spider_fill_alpha=0.1,
                     spider_margin_distance=0.25, 
                     single_row_folds=False, 
                     folds_p_width=600, 
                     folds_p_height=600, 
                     folds_xlabelorientation=1.55, 
                     folds_group_text_font_size='6pt',
                     folds_in_row=2, 
                     spreadsheet_cols = sgd_cols,
                     spr_index_header = 'RF',
                     spr_height=160, 
                     spr_width=1200, 
                     spr_index_width=25,
                     spr_row_height=25,
                     plot_feat_importance=True,
                     normalize_importance=False,
                     fimp_text_group_size = '10pt',
                     colors=[TTQcolor['azureBlue'], TTQcolor['richOrange'], TTQcolor['algae'], TTQcolor['yell'], TTQcolor['redBrown'], TTQcolor['bloodRed']])

['SGDClassifier_190819_152447', 'SGDClassifier_190819_152436', 'SGDClassifier_190819_152458', 'SGDClassifier_190819_152451', 'SGDClassifier_190819_152440']


In [25]:
show(grid)

In [13]:
#from bokeh.io import export_png
#export_png(l, experiment+'.png')