# mlbox automl to test out models

In [1]:
import pandas as pd
import numpy as np
import os
import os.path
import sys
import tempfile
import shutil

import mlflow
from sklearn.model_selection import train_test_split


In [2]:
sys.path.append('..')
from utils.kaggle import get_global_parameters
from utils.mlflow_experiments import retrieve_artifacts, extract_run_data_for_experiment
global_parms = get_global_parameters()

In [3]:
global_parms

{'PROJ_DIR': '/opt/project'}

In [4]:
import h2o
from h2o.automl import H2OAutoML

h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "1.8.0_222"; OpenJDK Runtime Environment (build 1.8.0_222-8u222-b10-1~deb9u1-b10); OpenJDK 64-Bit Server VM (build 25.222-b10, mixed mode)
  Starting server from /opt/conda/lib/python3.7/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpljuafy9e
  JVM stdout: /tmp/tmpljuafy9e/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmpljuafy9e/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O cluster uptime:,02 secs
H2O cluster timezone:,Etc/UTC
H2O data parsing timezone:,UTC
H2O cluster version:,3.26.0.2
H2O cluster version age:,15 days
H2O cluster name:,H2O_from_python_unknownUser_yypcxi
H2O cluster total nodes:,1
H2O cluster free memory:,3.042 Gb
H2O cluster total cores:,7
H2O cluster allowed cores:,7


In [5]:
TMPDIR= tempfile.mkdtemp()

## Retrieve training data

In [6]:
# retrieve run_id for desired feature set to test
run_info = extract_run_data_for_experiment('feature_set')
RUN_ID = run_info.loc[run_info['mlflow.runName'] == 'feature_set_0'].run_id.values[0]

retrieve_artifacts(RUN_ID, 'fs_train_df.pkl', TMPDIR)

In [7]:
os.listdir(TMPDIR)

['fs_train_df.pkl']

## training data

In [8]:
train_df = pd.read_pickle(os.path.join(TMPDIR,'fs_train_df.pkl'))

In [9]:
print(train_df.columns.tolist())

['isFraud', 'TransactionID', 'TransactionDT', 'M1', 'M2', 'M3', 'M4', 'M6', 'M7', 'M8', 'M9', 'ProductCD', 'id_12', 'id_15', 'id_16', 'id_28', 'id_29', 'id_35', 'id_36', 'id_37', 'id_38', 'C12', 'C4', 'C7', 'C8', 'D3', 'D5', 'D7', 'D8', 'V1', 'V101', 'V107', 'V108', 'V111', 'V112', 'V113', 'V117', 'V119', 'V120', 'V122', 'V123', 'V125', 'V126', 'V128', 'V132', 'V133', 'V134', 'V15', 'V16', 'V167', 'V17', 'V170', 'V171', 'V176', 'V177', 'V178', 'V18', 'V186', 'V187', 'V188', 'V189', 'V190', 'V191', 'V192', 'V193', 'V195', 'V196', 'V197', 'V198', 'V199', 'V200', 'V201', 'V202', 'V211', 'V212', 'V213', 'V217', 'V218', 'V219', 'V221', 'V222', 'V229', 'V23', 'V231', 'V232', 'V233', 'V24', 'V242', 'V243', 'V244', 'V245', 'V252', 'V253', 'V254', 'V255', 'V256', 'V257', 'V258', 'V259', 'V263', 'V264', 'V273', 'V274', 'V275', 'V283', 'V302', 'V305', 'V308', 'V33', 'V34', 'V43', 'V44', 'V45', 'V52', 'V57', 'V58', 'V59', 'V60', 'V64', 'V71', 'V72', 'V73', 'V74', 'V80', 'V81', 'V86', 'V87', 'V94',

In [10]:
train, _ = train_test_split(train_df, train_size=1000, random_state=91, shuffle=True, stratify=train_df['isFraud'])

train.shape



(1000, 135)

In [11]:
train_h2o = h2o.H2OFrame(train)

Parse progress: |█████████████████████████████████████████████████████████| 100%


In [12]:
train_h2o

isFraud,TransactionID,TransactionDT,M1,M2,M3,M4,M6,M7,M8,M9,ProductCD,id_12,id_15,id_16,id_28,id_29,id_35,id_36,id_37,id_38,C12,C4,C7,C8,D3,D5,D7,D8,V1,V101,V107,V108,V111,V112,V113,V117,V119,V120,V122,V123,V125,V126,V128,V132,V133,V134,V15,V16,V167,V17,V170,V171,V176,V177,V178,V18,V186,V187,V188,V189,V190,V191,V192,V193,V195,V196,V197,V198,V199,V200,V201,V202,V211,V212,V213,V217,V218,V219,V221,V222,V229,V23,V231,V232,V233,V24,V242,V243,V244,V245,V252,V253,V254,V255,V256,V257,V258,V259,V263,V264,V273,V274,V275,V283,V302,V305,V308,V33,V34,V43,V44,V45,V52,V57,V58,V59,V60,V64,V71,V72,V73,V74,V80,V81,V86,V87,V94,addr2,id_04,id_10,D9,V194,V50,addr1
0,3028660.0,1018780.0,T,T,T,M0,F,,,,W,,,,,,,,,,0,0,0,0,,,,,1.0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0.0,0.0,,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,,,,,,,,1,0,1,0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,87,,,,,0.0,231
0,3031500.0,1068600.0,,,,,,,,,S,NotFound,New,NotFound,New,NotFound,T,F,T,T,0,3,0,3,,,,,,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,,,0.0,,1.0,1.0,1.0,0.0,0.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0.0,0.0,0.0,,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,1,0,,,,,,,,,,,,,,,,,,,,,87,,,,1.0,,330
0,3064560.0,1701580.0,T,T,T,,F,F,T,T,W,,,,,,,,,,0,0,0,0,,173.0,,,1.0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0.0,0.0,,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,,,,,,,,1,0,1,0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,87,,,,,0.0,204
0,3364840.0,9463390.0,T,T,T,,T,F,F,T,W,,,,,,,,,,0,0,0,0,24.0,24.0,,,1.0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0.0,0.0,,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,,,,,,,,1,0,1,0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,87,,,,,0.0,204
0,3323030.0,8275500.0,,,,,F,,,,W,,,,,,,,,,0,0,0,0,,,,,,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0.0,0.0,,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,,,,,,,,1,0,1,0,0.0,0.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,87,,,,,,205
0,3503710.0,13540600.0,T,T,T,M0,F,F,F,T,W,,,,,,,,,,0,0,0,0,0.0,0.0,,,1.0,0,1,1,1,1,1,1,1,1,1,1,1,59,59,0,0,0,0.0,0.0,,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,,,,,,,,2,0,1,59,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,87,,,,,0.0,204
0,3038280.0,1205320.0,T,F,F,M0,F,F,F,F,W,,,,,,,,,,0,0,0,0,2.0,2.0,,,1.0,0,1,1,1,1,1,1,1,1,1,1,1,0,54,0,0,0,0.0,0.0,,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,,,,,,,,1,0,1,54,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,87,,,,,0.0,264
0,3104980.0,2308360.0,,,,M0,F,,,,W,,,,,,,,,,0,0,0,0,1.0,,,,,0,1,1,1,1,1,1,1,1,1,1,1,77,77,0,0,0,0.0,0.0,,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,,,,,,,,2,0,1,77,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,87,,,,,0.0,231
0,3445960.0,11754800.0,T,T,T,,F,F,T,T,W,,,,,,,,,,0,0,0,0,48.0,9.0,,,1.0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0.0,0.0,,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,1.0,,,,,,,,,,,,,,,,,,0,0,1,0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,87,,,,,0.0,184
0,3011090.0,609031.0,,,,,,,,,S,NotFound,New,NotFound,New,NotFound,T,T,F,T,0,1,0,1,,,,,,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,,,0.0,,1.0,1.0,1.0,0.0,0.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,,0.0,0.0,0.0,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0,1,1,0,,,,,,,,,,,,,,,,,,,,,87,,,,1.0,,330




In [13]:
train_h2o['isFraud'] = train_h2o['isFraud'].asfactor()

In [15]:

aml = H2OAutoML(max_models=20, seed=1, exclude_algos=['DeepLearning'])
aml.train( y='isFraud', training_frame=train_h2o)

AutoML progress: |████████████████████████████████████████████████████████| 100%


In [16]:
lb = aml.leaderboard
lb.head(rows=lb.nrows)

model_id,auc,logloss,mean_per_class_error,rmse,mse
XGBoost_grid_1_AutoML_20190811_220904_model_6,0.746188,0.136099,0.357883,0.176149,0.0310285
XGBoost_grid_1_AutoML_20190811_220904_model_1,0.743286,0.133796,0.366987,0.176299,0.0310813
XGBoost_grid_1_AutoML_20190811_220904_model_4,0.739704,0.132634,0.380755,0.174842,0.0305696
XGBoost_grid_1_AutoML_20190811_220904_model_2,0.734952,0.135109,0.360474,0.176605,0.0311893
XGBoost_grid_1_AutoML_20190811_220904_model_3,0.725774,0.134466,0.355292,0.175852,0.030924
XGBoost_1_AutoML_20190811_220841,0.725418,0.134409,0.355292,0.17675,0.0312406
XGBoost_1_AutoML_20190811_220904,0.725418,0.134409,0.355292,0.17675,0.0312406
XGBoost_3_AutoML_20190811_220904,0.719423,0.133247,0.366469,0.17418,0.0303388
XGBoost_3_AutoML_20190811_220841,0.719423,0.133247,0.366469,0.17418,0.0303388
XRT_1_AutoML_20190811_220904,0.698505,0.178651,0.363064,0.181953,0.0331071




In [17]:
# Get model ids for all models in the AutoML Leaderboard
model_ids = list(aml.leaderboard['model_id'].as_data_frame().iloc[:,0])

In [18]:
model_ids

['XGBoost_grid_1_AutoML_20190811_220904_model_6',
 'XGBoost_grid_1_AutoML_20190811_220904_model_1',
 'XGBoost_grid_1_AutoML_20190811_220904_model_4',
 'XGBoost_grid_1_AutoML_20190811_220904_model_2',
 'XGBoost_grid_1_AutoML_20190811_220904_model_3',
 'XGBoost_1_AutoML_20190811_220841',
 'XGBoost_1_AutoML_20190811_220904',
 'XGBoost_3_AutoML_20190811_220904',
 'XGBoost_3_AutoML_20190811_220841',
 'XRT_1_AutoML_20190811_220904',
 'DRF_1_AutoML_20190811_220904',
 'GLM_grid_1_AutoML_20190811_220904_model_1',
 'XGBoost_2_AutoML_20190811_220841',
 'XGBoost_2_AutoML_20190811_220904',
 'StackedEnsemble_BestOfFamily_AutoML_20190811_220904',
 'GBM_5_AutoML_20190811_220904',
 'GBM_4_AutoML_20190811_220904',
 'GBM_1_AutoML_20190811_220904',
 'GBM_3_AutoML_20190811_220904',
 'GBM_grid_1_AutoML_20190811_220904_model_1',
 'GBM_2_AutoML_20190811_220904',
 'XGBoost_grid_1_AutoML_20190811_220904_model_5',
 'GBM_grid_1_AutoML_20190811_220904_model_3',
 'GBM_grid_1_AutoML_20190811_220904_model_2']

In [19]:
[mid for mid in model_ids if "GBM" in mid]

['GBM_5_AutoML_20190811_220904',
 'GBM_4_AutoML_20190811_220904',
 'GBM_1_AutoML_20190811_220904',
 'GBM_3_AutoML_20190811_220904',
 'GBM_grid_1_AutoML_20190811_220904_model_1',
 'GBM_2_AutoML_20190811_220904',
 'GBM_grid_1_AutoML_20190811_220904_model_3',
 'GBM_grid_1_AutoML_20190811_220904_model_2']

In [21]:
# Get the "All Models" Stacked Ensemble model
se = h2o.get_model([mid for mid in model_ids if "StackedEnsemble_AllModels" in mid][0])

IndexError: list index out of range

In [22]:
# Get the Stacked Ensemble metalearner model
metalearner = h2o.get_model(se.metalearner()['name'])

NameError: name 'se' is not defined

In [24]:
mdl = h2o.get_model('XGBoost_grid_1_AutoML_20190811_220904_model_6')

In [25]:
print(mdl)

Model Details
H2OXGBoostEstimator :  XGBoost
Model Key:  XGBoost_grid_1_AutoML_20190811_220904_model_6

Model Summary: 


0,1
,number_of_trees
,103.0




ModelMetricsBinomial: xgboost
** Reported on train data. **

MSE: 0.0059752581483292115
RMSE: 0.07729979397339434
LogLoss: 0.030269519343287636
Mean Per-Class Error: 0.0
AUC: 1.0
pr_auc: 0.9714285714285714
Gini: 1.0
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.19777031242847443: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,965.0,0.0,0.0,(0.0/965.0)
1,0.0,35.0,0.0,(0.0/35.0)
Total,965.0,35.0,0.0,(0.0/1000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.1977703,1.0,34.0
max f2,0.1977703,1.0,34.0
max f0point5,0.1977703,1.0,34.0
max accuracy,0.1977703,1.0,34.0
max precision,0.9656714,1.0,0.0
max recall,0.1977703,1.0,34.0
max specificity,0.9656714,1.0,0.0
max absolute_mcc,0.1977703,1.0,34.0
max min_per_class_accuracy,0.1977703,1.0,34.0


Gains/Lift Table: Avg response rate:  3.50 %, avg score:  3.61 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.7547129,28.5714286,28.5714286,1.0,0.8935814,1.0,0.8935814,0.2857143,0.2857143,2757.1428571,2757.1428571
,2,0.02,0.6268649,28.5714286,28.5714286,1.0,0.7049984,1.0,0.7992899,0.2857143,0.5714286,2757.1428571,2757.1428571
,3,0.03,0.3909585,28.5714286,28.5714286,1.0,0.5268752,1.0,0.7084850,0.2857143,0.8571429,2757.1428571,2757.1428571
,4,0.04,0.1022621,14.2857143,25.0000000,0.5,0.2383092,0.875,0.5909410,0.1428571,1.0,1328.5714286,2400.0000000
,5,0.05,0.0566517,0.0,20.0000000,0.0,0.0761581,0.7,0.4879845,0.0,1.0,-100.0,1900.0000000
,6,0.1,0.0303629,0.0,10.0000000,0.0,0.0405046,0.35,0.2642445,0.0,1.0,-100.0,900.0000000
,7,0.15,0.0229090,0.0,6.6666667,0.0,0.0258469,0.2333333,0.1847787,0.0,1.0,-100.0,566.6666667
,8,0.2,0.0186610,0.0,5.0000000,0.0,0.0208001,0.175,0.1437840,0.0,1.0,-100.0,400.0000000
,9,0.3,0.0137132,0.0,3.3333333,0.0,0.0158102,0.1166667,0.1011261,0.0,1.0,-100.0,233.3333333




ModelMetricsBinomial: xgboost
** Reported on cross-validation data. **

MSE: 0.0310285357831
RMSE: 0.17614918615508843
LogLoss: 0.13609851949362703
Mean Per-Class Error: 0.2946706143597335
AUC: 0.7461880088823094
pr_auc: 0.1869303932990314
Gini: 0.4923760177646188
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.09644685685634613: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,936.0,29.0,0.0301,(29.0/965.0)
1,24.0,11.0,0.6857,(24.0/35.0)
Total,960.0,40.0,0.053,(53.0/1000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.0964469,0.2933333,39.0
max f2,0.0725301,0.3061224,55.0
max f0point5,0.5139326,0.3968254,6.0
max accuracy,0.5139326,0.968,6.0
max precision,0.9189045,1.0,0.0
max recall,0.0059843,1.0,359.0
max specificity,0.9189045,1.0,0.0
max absolute_mcc,0.5139326,0.3103339,6.0
max min_per_class_accuracy,0.0183080,0.6963731,212.0


Gains/Lift Table: Avg response rate:  3.50 %, avg score:  2.71 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.3648427,14.2857143,14.2857143,0.5,0.6228217,0.5,0.6228217,0.1428571,0.1428571,1328.5714286,1328.5714286
,2,0.02,0.1677810,2.8571429,8.5714286,0.1,0.2344985,0.3,0.4286601,0.0285714,0.1714286,185.7142857,757.1428571
,3,0.03,0.1141361,5.7142857,7.6190476,0.2,0.1371207,0.2666667,0.3314803,0.0571429,0.2285714,471.4285714,661.9047619
,4,0.04,0.0959603,8.5714286,7.8571429,0.3,0.1060868,0.275,0.2751319,0.0857143,0.3142857,757.1428571,685.7142857
,5,0.05,0.0813334,0.0,6.2857143,0.0,0.0882574,0.22,0.2377570,0.0,0.3142857,-100.0,528.5714286
,6,0.1,0.0454427,0.5714286,3.4285714,0.02,0.0583092,0.12,0.1480331,0.0285714,0.3428571,-42.8571429,242.8571429
,7,0.15,0.0302382,1.1428571,2.6666667,0.04,0.0367276,0.0933333,0.1109313,0.0571429,0.4,14.2857143,166.6666667
,8,0.2,0.0241350,1.1428571,2.2857143,0.04,0.0269649,0.08,0.0899397,0.0571429,0.4571429,14.2857143,128.5714286
,9,0.3,0.0188787,2.0,2.1904762,0.07,0.0211004,0.0766667,0.0669932,0.2,0.6571429,100.0,119.0476190



Cross-Validation Metrics Summary: 


0,1,2,3,4,5,6,7
,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.876,0.1419401,0.975,0.475,0.96,0.98,0.99
auc,0.7300846,0.0748035,0.8398438,0.6041366,0.7405468,0.6147959,0.8510998
err,0.124,0.1419401,0.025,0.525,0.04,0.02,0.01
err_count,24.8,28.388025,5.0,105.0,8.0,4.0,2.0
f0point5,0.4946007,0.1570242,0.6944444,0.1070664,0.5405405,0.4166667,0.7142857
f1,0.432,0.1216699,0.6666667,0.16,0.5,0.3333333,0.5
f2,0.4169982,0.0911282,0.6410257,0.3164557,0.4651163,0.2777778,0.3846154
lift_top_group,18.88889,8.370289,25.0,0.0,11.111111,25.0,33.333332
logloss,0.1360985,0.0424018,0.1198581,0.2402356,0.1568427,0.0981966,0.0653596


Scoring History: 


0,1,2,3,4,5,6,7,8,9
,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_auc,training_pr_auc,training_lift,training_classification_error
,2019-08-11 22:10:07,20.171 sec,0.0,0.5,0.6931472,0.5,0.0,1.0,0.965
,2019-08-11 22:10:08,20.233 sec,5.0,0.3988401,0.5085146,0.8125833,0.5541959,25.7142857,0.025
,2019-08-11 22:10:08,20.286 sec,10.0,0.3225793,0.3873722,0.8383864,0.6151308,28.5714286,0.02
,2019-08-11 22:10:08,20.343 sec,15.0,0.2658090,0.3036547,0.8486306,0.6494421,28.5714286,0.019
,2019-08-11 22:10:08,20.396 sec,20.0,0.2245352,0.2443473,0.8489563,0.6580803,28.5714286,0.018
---,---,---,---,---,---,---,---,---,---
,2019-08-11 22:10:09,21.336 sec,85.0,0.0871425,0.0393001,0.9999408,0.9698627,28.5714286,0.001
,2019-08-11 22:10:09,21.427 sec,90.0,0.0834040,0.0361845,0.9999704,0.9706236,28.5714286,0.001
,2019-08-11 22:10:09,21.519 sec,95.0,0.0813838,0.0336461,0.9999704,0.9706236,28.5714286,0.001



See the whole table with table.as_data_frame()
Variable Importances: 


0,1,2,3
variable,relative_importance,scaled_importance,percentage
TransactionID,161.4985199,1.0,0.0900993
addr1,90.9828949,0.5633667,0.0507589
TransactionDT,61.3004379,0.3795728,0.0341992
V44,58.1841202,0.3602765,0.0324606
V308,56.8192596,0.3518253,0.0316992
---,---,---,---
D8.260.58334350585943,0.0414741,0.0002568,0.0000231
D5.44.0,0.0184128,0.0001140,0.0000103
D9.0.7083330154418945,0.0164975,0.0001022,0.0000092



See the whole table with table.as_data_frame()



In [26]:
mdl_params = mdl.get_params()

In [27]:
print([k for k in mdl_params.keys()])

['model_id', 'training_frame', 'validation_frame', 'nfolds', 'keep_cross_validation_models', 'keep_cross_validation_predictions', 'keep_cross_validation_fold_assignment', 'score_each_iteration', 'fold_assignment', 'fold_column', 'response_column', 'ignored_columns', 'ignore_const_cols', 'offset_column', 'weights_column', 'stopping_rounds', 'stopping_metric', 'stopping_tolerance', 'max_runtime_secs', 'seed', 'distribution', 'tweedie_power', 'categorical_encoding', 'quiet_mode', 'export_checkpoints_dir', 'ntrees', 'max_depth', 'min_rows', 'min_child_weight', 'learn_rate', 'eta', 'sample_rate', 'subsample', 'col_sample_rate', 'colsample_bylevel', 'col_sample_rate_per_tree', 'colsample_bytree', 'max_abs_leafnode_pred', 'max_delta_step', 'monotone_constraints', 'score_tree_interval', 'min_split_improvement', 'gamma', 'nthread', 'max_bins', 'max_leaves', 'min_sum_hessian_in_leaf', 'min_data_in_leaf', 'sample_type', 'normalize_type', 'rate_drop', 'one_drop', 'skip_drop', 'tree_method', 'grow_

In [28]:
mdl_params['max_depth']

{'__meta': {'schema_version': 3,
  'schema_name': 'ModelParameterSchemaV3',
  'schema_type': 'Iced'},
 'name': 'max_depth',
 'label': 'max_depth',
 'help': 'Maximum tree depth.',
 'required': False,
 'type': 'int',
 'default_value': 6,
 'actual_value': 15,
 'level': 'critical',
 'values': [],
 'is_member_of_frames': [],
 'is_mutually_exclusive_with': [],
 'gridable': True}

In [29]:
mdl_params['ntrees']

{'__meta': {'schema_version': 3,
  'schema_name': 'ModelParameterSchemaV3',
  'schema_type': 'Iced'},
 'name': 'ntrees',
 'label': 'ntrees',
 'help': '(same as n_estimators) Number of trees.',
 'required': False,
 'type': 'int',
 'default_value': 50,
 'actual_value': 103,
 'level': 'critical',
 'values': [],
 'is_member_of_frames': [],
 'is_mutually_exclusive_with': [],
 'gridable': True}

In [30]:
print([k for k in mdl_params.keys() if 'drop' in k])

['rate_drop', 'one_drop', 'skip_drop']


## Clean-up

In [31]:
shutil.rmtree(TMPDIR)

In [32]:
pd.__version__

'0.24.2'

In [None]:
dir(aml)