# Import Modules

In [1]:
import h2o
import pandas

In [2]:
from h2o.estimators.deepwater import H2ODeepWaterEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator

In [None]:
project_path = "/gtc-2017"

# Connect or Start H2O

In [3]:
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O cluster uptime:,2 hours 38 mins
H2O cluster version:,3.11.0.226
H2O cluster version age:,2 days
H2O cluster name:,root
H2O cluster total nodes:,1
H2O cluster free memory:,25.89 Gb
H2O cluster total cores:,20
H2O cluster allowed cores:,20
H2O cluster status:,"locked, healthy"
H2O connection url:,http://localhost:54321


# Import Data

Import data into H2O.  This is modified MNIST dataset for binomial classification.  The target is no longer the digit, but whether the digit is odd or even.

In [4]:
train = h2o.import_file(project_path+"/data/train-odd.csv.gz")
valid = h2o.import_file(project_path+"/data/test-odd.csv.gz")

Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%


This data set is a standard one (versus the image schema).  There are 785 columns, where 784 are features and the the 785th column is the target.  We're working with this schema because non-Deep Water algorithms cannot work with the image schema (i.e. the standard schema is the common denominator among all H2O algorithms).

In [5]:
features = list(range(0,784))
target = 784

Features are standardized external to algorithms to ensure it is the same across the various algorithms.  The "target" column is ensured to be a factor (i.e. categorical feature).

In [6]:
train[features] = train[features]/255
train[target] = train[target].asfactor()
valid[features] = valid[features]/255
valid[target] = valid[target].asfactor()

# Explore Data

In [7]:
train.head()

C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26,C27,C28,C29,C30,C31,C32,C33,C34,C35,C36,C37,C38,C39,C40,C41,C42,C43,C44,C45,C46,C47,C48,C49,C50,C51,C52,C53,C54,C55,C56,C57,C58,C59,C60,C61,C62,C63,C64,C65,C66,C67,C68,C69,C70,C71,C72,C73,C74,C75,C76,C77,C78,C79,C80,C81,C82,C83,C84,C85,C86,C87,C88,C89,C90,C91,C92,C93,C94,C95,C96,C97,C98,C99,C100,C101,C102,C103,C104,C105,C106,C107,C108,C109,C110,C111,C112,C113,C114,C115,C116,C117,C118,C119,C120,C121,C122,C123,C124,C125,C126,C127,C128,C129,C130,C131,C132,C133,C134,C135,C136,C137,C138,C139,C140,C141,C142,C143,C144,C145,C146,C147,C148,C149,C150,C151,C152,C153,C154,C155,C156,C157,C158,C159,C160,C161,C162,C163,C164,C165,C166,C167,C168,C169,C170,C171,C172,C173,C174,C175,C176,C177,C178,C179,C180,C181,C182,C183,C184,C185,C186,C187,C188,C189,C190,C191,C192,C193,C194,C195,C196,C197,C198,C199,C200
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0980392,0.411765,0.411765,0.411765,0.482353,0.992157,0.992157,0.992157,0.992157,1.0,0.992157,0.611765,0.215686,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.717647,0.988235,0.988235,0.988235,0.988235,0.988235,0.988235,0.988235,0.988235,0.992157,0.988235,0.988235,0.905882,0.643137,0.0705882,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.815686,0.988235,0.988235,0.988235,0.988235,0.988235,0.988235,0.988235,0.988235,0.992157,0.988235,0.988235,0.988235,0.988235,0.290196,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.027451,0.823529,0.992157,0.803922,0.305882,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.34902,0.988235,0.988235,0.988235,0.898039,0.192157,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.835294,0.988235,0.988235,0.988235,0.992157,0.831373,0.0666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.219608,0.356863,0.792157,0.996078,0.996078,0.996078,0.419608,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0156863,0.0666667,0.235294,0.882353,0.992157,0.992157,0.992157,0.992157,0.992157,0.945098,0.376471,0.0117647,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.207843,0.529412,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.12549,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0392157,0.847059,0.639216,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.682353,0.996078,0.635294,0.156863,0.580392,0.72549,0.156863,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.00784314,0.498039,0.988235,0.709804,0.521569,0.913725,0.996078,0.996078,0.886275,0.152941,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.301961,0.521569,0.592157,0.996078,0.996078,0.862745,0.364706,0.00784314,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0431373,0.32549,0.996078,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.184314,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.431373,0.992157,0.996078,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.666667,0.0156863,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.431373,0.992157,0.996078,0.992157,0.807843,0.329412,0.666667,0.94902,0.992157,0.992157,0.0941176,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.827451,0.996078,0.996078,0.996078,0.996078,1.0,0.87451,0.552941,0.054902,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.976471,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.25098,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.164706,0.462745,0.560784,1.0,0.996078,0.996078,0.996078,0.823529,0.462745,0.137255,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.4,0.945098,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.992157,0.85098,0.145098,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.305882,0.694118,0.937255,0.992157,0.992157,0.823529,0.694118,0.815686,0.992157,0.992157,0.992157,0.533333,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0196078,0.0509804,0.72549,0.772549,0.992157,0.992157,0.329412,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.427451,0.988235,0.988235,0.992157,0.988235,0.988235,0.329412,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.501961,0.937255,0.988235,0.988235,0.992157,0.905882,0.552941,0.12549,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.054902,0.839216,0.988235,0.988235,0.807843,0.870588,0.141176,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0745098,0.113725,0.501961,0.8,0.894118,0.407843,0.0156863,0.0,0.0,0.0,0.0,0,0.027451,0.309804,0.0156863,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.176471,0.882353,0.988235,0.988235,0.992157,0.988235,0.988235,0.109804,0.0,0.0,0.0,0.0,0.0745098,0.733333,0.988235,0.109804,0,0,0,0,0,0




# Cross-Validation

Set five folds for cross-validation, which is required for ensembles.

In [8]:
nfolds = 5

# Build GBM Model

In [9]:
gbm_model = H2OGradientBoostingEstimator(distribution="bernoulli",
                                        ntrees=100,
                                        nfolds=nfolds,
                                        ignore_const_cols=False,
                                        keep_cross_validation_predictions=True,
                                        fold_assignment="Modulo")
gbm_model.train(x=features, y=target, training_frame=train, model_id="gbm_model")
gbm_model.show()

gbm Model Build progress: |███████████████████████████████████████████████| 100%
Model Details
H2OGradientBoostingEstimator :  Gradient Boosting Machine
Model Key:  gbm_model


ModelMetricsBinomial: gbm
** Reported on train data. **

MSE: 0.0184588138676
RMSE: 0.135863217493
LogLoss: 0.0798629002033
Mean Per-Class Error: 0.0219261048203
AUC: 0.997690378855
Gini: 0.995380757711
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.489774795004: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,28843.0,649.0,0.022,(649.0/29492.0)
1,668.0,29840.0,0.0219,(668.0/30508.0)
Total,29511.0,30489.0,0.022,(1317.0/60000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.4897748,0.9784088,201.0
max f2,0.2507256,0.9828546,263.0
max f0point5,0.6845326,0.9832142,154.0
max accuracy,0.4897748,0.97805,201.0
max precision,0.9971834,1.0,0.0
max recall,0.0167044,1.0,381.0
max specificity,0.9971834,1.0,0.0
max absolute_mcc,0.4897748,0.9560881,201.0
max min_per_class_accuracy,0.4897748,0.9779940,201.0


Gains/Lift Table: Avg response rate: 50.85 %



0,1,2,3,4,5,6,7,8,9,10,11
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,cumulative_response_rate,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.9960332,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0196670,96.6697260,96.6697260
,2,0.02,0.9953539,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0393339,96.6697260,96.6697260
,3,0.03,0.9947663,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0590009,96.6697260,96.6697260
,4,0.04,0.9941981,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0786679,96.6697260,96.6697260
,5,0.05,0.9936576,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0983349,96.6697260,96.6697260
,6,0.1,0.9912823,1.9666973,1.9666973,1.0,1.0,0.0983349,0.1966697,96.6697260,96.6697260
,7,0.15,0.9891700,1.9666973,1.9666973,1.0,1.0,0.0983349,0.2950046,96.6697260,96.6697260
,8,0.2,0.9864508,1.9666973,1.9666973,1.0,1.0,0.0983349,0.3933395,96.6697260,96.6697260
,9,0.3,0.9761045,1.9663695,1.9665880,0.9998333,0.9999444,0.1966369,0.5899764,96.6369477,96.6587999




ModelMetricsBinomial: gbm
** Reported on cross-validation data. **

MSE: 0.0237437971294
RMSE: 0.15409022399
LogLoss: 0.0950504193947
Mean Per-Class Error: 0.0300979991223
AUC: 0.9956335263
Gini: 0.991267052601
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.527442472654: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,28706.0,786.0,0.0267,(786.0/29492.0)
1,1026.0,29482.0,0.0336,(1026.0/30508.0)
Total,29732.0,30268.0,0.0302,(1812.0/60000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.5274425,0.9701856,191.0
max f2,0.2666596,0.9769350,257.0
max f0point5,0.7337273,0.9775606,137.0
max accuracy,0.5536466,0.9698,185.0
max precision,0.9970609,1.0,0.0
max recall,0.0115552,1.0,386.0
max specificity,0.9970609,1.0,0.0
max absolute_mcc,0.5536466,0.9396763,185.0
max min_per_class_accuracy,0.4900991,0.9692540,199.0


Gains/Lift Table: Avg response rate: 50.85 %



0,1,2,3,4,5,6,7,8,9,10,11
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,cumulative_response_rate,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.9960620,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0196670,96.6697260,96.6697260
,2,0.02,0.9952707,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0393339,96.6697260,96.6697260
,3,0.03,0.9946557,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0590009,96.6697260,96.6697260
,4,0.04,0.9940933,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0786679,96.6697260,96.6697260
,5,0.05,0.9935549,1.9666973,1.9666973,1.0,1.0,0.0196670,0.0983349,96.6697260,96.6697260
,6,0.1,0.9911696,1.9666973,1.9666973,1.0,1.0,0.0983349,0.1966697,96.6697260,96.6697260
,7,0.15,0.9888268,1.9666973,1.9666973,1.0,1.0,0.0983349,0.2950046,96.6697260,96.6697260
,8,0.2,0.9859304,1.9660417,1.9665334,0.9996667,0.9999167,0.0983021,0.3933067,96.6041694,96.6533368
,9,0.3,0.9754819,1.9653861,1.9661510,0.9993333,0.9997222,0.1965386,0.5898453,96.5386128,96.6150955



Cross-Validation Metrics Summary: 


0,1,2,3,4,5,6,7
,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.97025,0.0015051,0.9664167,0.9705833,0.9698333,0.9721667,0.97225
auc,0.9956313,0.0002925,0.995079,0.9954283,0.9956071,0.9963411,0.995701
err,0.02975,0.0015051,0.0335833,0.0294167,0.0301667,0.0278333,0.02775
err_count,357.0,18.061008,403.0,353.0,362.0,334.0,333.0
f0point5,0.9709338,0.0030639,0.9636276,0.9682862,0.9737145,0.9746295,0.9744114
f1,0.9707509,0.0012852,0.9675810,0.9710062,0.9703667,0.9717619,0.9730386
f2,0.9705863,0.0016571,0.9715670,0.9737414,0.9670419,0.9689112,0.9716697
lift_top_group,1.967142,0.0210402,1.9439495,1.9805249,1.9534429,2.0191822,1.9386107
logloss,0.0950504,0.0024770,0.1005785,0.0963374,0.0955798,0.0904067,0.0923497


Scoring History: 


0,1,2,3,4,5,6,7,8
,timestamp,duration,number_of_trees,training_rmse,training_logloss,training_auc,training_lift,training_classification_error
,2017-05-08 21:37:59,3 min 3.240 sec,0.0,0.4999283,0.6930038,0.5,1.0,0.4915333
,2017-05-08 21:37:59,3 min 3.808 sec,1.0,0.4706960,0.6361105,0.9242358,1.8650539,0.136
,2017-05-08 21:38:00,3 min 4.252 sec,2.0,0.4438396,0.5862645,0.9459456,1.8735796,0.1076
,2017-05-08 21:38:00,3 min 4.678 sec,3.0,0.4190427,0.5419202,0.9535394,1.8826286,0.1015
,2017-05-08 21:38:00,3 min 5.105 sec,4.0,0.3966224,0.5029592,0.9567257,1.8798151,0.09645
,2017-05-08 21:38:01,3 min 5.528 sec,5.0,0.3775109,0.4703404,0.9608239,1.9320723,0.0826
,2017-05-08 21:38:01,3 min 5.964 sec,6.0,0.3601035,0.4410499,0.9636597,1.9284347,0.0810833
,2017-05-08 21:38:02,3 min 6.405 sec,7.0,0.3458712,0.4170579,0.9651846,1.9384672,0.08045
,2017-05-08 21:38:02,3 min 6.889 sec,8.0,0.3320683,0.3939698,0.9667601,1.9381944,0.0791


Variable Importances: 


0,1,2,3
variable,relative_importance,scaled_importance,percentage
C514,11590.8330078,1.0,0.1630561
C486,6738.7817383,0.5813889,0.0947990
C516,3642.8352051,0.3142859,0.0512462
C455,3529.8803711,0.3045407,0.0496572
C456,2601.3500977,0.2244317,0.0365949
---,---,---,---
C780,0.0,0.0,0.0
C781,0.0,0.0,0.0
C782,0.0,0.0,0.0



See the whole table with table.as_data_frame()


# Build GLM Model

In [10]:
glm_model = H2OGeneralizedLinearEstimator(family="binomial",
                                          lambda_=0.0001,
                                          alpha=0.5,
                                          nfolds=nfolds,
                                          ignore_const_cols=False,
                                          keep_cross_validation_predictions=True,
                                          fold_assignment="Modulo")
glm_model.train(x=features, y=target, training_frame=train, model_id="glm_model")
glm_model.show()

glm Model Build progress: |███████████████████████████████████████████████| 100%
Model Details
H2OGeneralizedLinearEstimator :  Generalized Linear Modeling
Model Key:  glm_model


ModelMetricsBinomialGLM: glm
** Reported on train data. **

MSE: 0.072250756588
RMSE: 0.268795008488
LogLoss: 0.247229730062
Null degrees of freedom: 59999
Residual degrees of freedom: 59378
Null deviance: 83160.4565782
Residual deviance: 29667.5676074
AIC: 30911.5676074
AUC: 0.963036495056
Gini: 0.926072990111
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.465765213777: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,26240.0,3252.0,0.1103,(3252.0/29492.0)
1,2510.0,27998.0,0.0823,(2510.0/30508.0)
Total,28750.0,31250.0,0.096,(5762.0/60000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.4657652,0.9067003,210.0
max f2,0.2460309,0.9295763,278.0
max f0point5,0.7028055,0.9196037,136.0
max accuracy,0.5122778,0.9046167,196.0
max precision,0.9998524,0.9972727,0.0
max recall,0.0001227,1.0,399.0
max specificity,0.9998524,0.9997966,0.0
max absolute_mcc,0.5122778,0.8091859,196.0
max min_per_class_accuracy,0.5147032,0.9042452,195.0


Gains/Lift Table: Avg response rate: 50.85 %



0,1,2,3,4,5,6,7,8,9,10,11
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,cumulative_response_rate,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.9999705,1.9568638,1.9568638,0.995,0.995,0.0195686,0.0195686,95.6863773,95.6863773
,2,0.02,0.9998785,1.9666973,1.9617805,1.0,0.9975,0.0196670,0.0392356,96.6697260,96.1780517
,3,0.03,0.9997311,1.9601416,1.9612342,0.9966667,0.9972222,0.0196014,0.0588370,96.0141602,96.1234212
,4,0.04,0.9995233,1.9568638,1.9601416,0.995,0.9966667,0.0195686,0.0784057,95.6863773,96.0141602
,5,0.05,0.9991997,1.9634194,1.9607972,0.9983333,0.997,0.0196342,0.0980399,96.3419431,96.0797168
,6,0.1,0.9948017,1.9529304,1.9568638,0.993,0.995,0.0976465,0.1956864,95.2930379,95.6863773
,7,0.15,0.9875684,1.9450636,1.9529304,0.989,0.993,0.0972532,0.2929396,94.5063590,95.2930379
,8,0.2,0.9763884,1.9299856,1.9471942,0.9813333,0.9900833,0.0964993,0.3894388,92.9985578,94.7194179
,9,0.3,0.9321712,1.8998296,1.9314060,0.966,0.9820556,0.1899830,0.5794218,89.9829553,93.1405970




ModelMetricsBinomialGLM: glm
** Reported on cross-validation data. **

MSE: 0.0758146924522
RMSE: 0.275344679361
LogLoss: 0.261132933933
Null degrees of freedom: 59999
Residual degrees of freedom: 59418
Null deviance: 83168.2584022
Residual deviance: 32683.666844
AIC: 33847.666844
AUC: 0.959288417562
Gini: 0.918576835125
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.470825920915: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,26122.0,3370.0,0.1143,(3370.0/29492.0)
1,2694.0,27814.0,0.0883,(2694.0/30508.0)
Total,28816.0,31184.0,0.1011,(6064.0/60000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.4708259,0.9017052,211.0
max f2,0.2209487,0.9262023,290.0
max f0point5,0.6972485,0.9146640,142.0
max accuracy,0.5182366,0.8996833,197.0
max precision,0.9997707,0.9949749,0.0
max recall,0.0001266,1.0,399.0
max specificity,0.9997707,0.9995253,0.0
max absolute_mcc,0.5182366,0.7993293,197.0
max min_per_class_accuracy,0.5182366,0.8993707,197.0


Gains/Lift Table: Avg response rate: 50.85 %



0,1,2,3,4,5,6,7,8,9,10,11
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,cumulative_response_rate,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.9999736,1.9470303,1.9470303,0.99,0.99,0.0194703,0.0194703,94.7030287,94.7030287
,2,0.02,0.9998821,1.9568638,1.9519470,0.995,0.9925,0.0195686,0.0390389,95.6863773,95.1947030
,3,0.03,0.9997400,1.9601416,1.9546786,0.9966667,0.9938889,0.0196014,0.0586404,96.0141602,95.4678554
,4,0.04,0.9995389,1.9601416,1.9560443,0.9966667,0.9945833,0.0196014,0.0782418,96.0141602,95.6044316
,5,0.05,0.9992192,1.9535859,1.9555526,0.9933333,0.9943333,0.0195359,0.0977776,95.3585945,95.5552642
,6,0.1,0.9948675,1.9496526,1.9526026,0.9913333,0.9928333,0.0974826,0.1952603,94.9652550,95.2602596
,7,0.15,0.9876386,1.9385079,1.9479044,0.9856667,0.9904444,0.0969254,0.2921857,93.8507932,94.7904375
,8,0.2,0.9766595,1.9221188,1.9414580,0.9773333,0.9871667,0.0961059,0.3882916,92.2118789,94.1457978
,9,0.3,0.9329620,1.8926183,1.9251781,0.9623333,0.9788889,0.1892618,0.5775534,89.2618330,92.5178095



Cross-Validation Metrics Summary: 


0,1,2,3,4,5,6,7
,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.8991666,0.0021769,0.9018334,0.8956667,0.8951667,0.9011667,0.902
auc,0.9593012,0.0011965,0.9598312,0.9576408,0.9570988,0.9616288,0.9603065
err,0.1008333,0.0021769,0.0981667,0.1043333,0.1048333,0.0988333,0.098
err_count,1210.0,26.122787,1178.0,1252.0,1258.0,1186.0,1176.0
f0point5,0.8952141,0.0021577,0.8993229,0.8899655,0.8951444,0.8949853,0.8966522
---,---,---,---,---,---,---,---
r2,0.6965847,0.0047312,0.7000098,0.6895126,0.6876364,0.7040451,0.7017197
recall,0.913794,0.0048005,0.9165722,0.9125269,0.9033046,0.9123339,0.9242327
residual_deviance,6536.7334,326.32788,6284.9424,6405.1924,6421.0576,7436.7417,6135.733



See the whole table with table.as_data_frame()
Scoring History: 


0,1,2,3,4,5
,timestamp,duration,iteration,negative_log_likelihood,objective
,2017-05-08 21:38:50,0.000 sec,0,41580.2282891,0.6930038
,2017-05-08 21:38:50,0.453 sec,1,20250.4765864,0.3381756
,2017-05-08 21:38:51,0.741 sec,2,16328.4722372,0.2732749
,2017-05-08 21:38:51,0.993 sec,3,15147.2338329,0.2540176
,2017-05-08 21:38:51,1.243 sec,4,14883.9992216,0.2499047
,2017-05-08 21:38:51,1.507 sec,5,14841.3651674,0.2493125
,2017-05-08 21:38:52,1.753 sec,6,14836.4664856,0.2492581
,2017-05-08 21:38:52,2.035 sec,7,14834.4282934,0.2492477
,2017-05-08 21:38:52,2.541 sec,8,14833.7838037,0.2492410


# Build Deep Water Model

In [11]:
dw_model = H2ODeepWaterEstimator(epochs=3,
                                 network="lenet",
                                 ignore_const_cols=False,
                                 image_shape=[28,28],
                                 channels=1,
                                 standardize=False,
                                 seed=1234,
                                 nfolds=nfolds,
                                 keep_cross_validation_predictions=True,
                                 fold_assignment="Modulo")
dw_model.train(x=features, y=target, training_frame=train, model_id="dw_model")
dw_model.show()

deepwater Model Build progress: |█████████████████████████████████████████| 100%
Model Details
H2ODeepWaterEstimator :  Deep Water
Model Key:  dw_model


ModelMetricsBinomial: deepwater
** Reported on train data. **

MSE: 0.012592653366
RMSE: 0.112216992323
LogLoss: 0.0470116000858
Mean Per-Class Error: 0.016362262688
AUC: 0.998386460325
Gini: 0.99677292065
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.568616807461: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,4924.0,67.0,0.0134,(67.0/4991.0)
1,96.0,4878.0,0.0193,(96.0/4974.0)
Total,5020.0,4945.0,0.0164,(163.0/9965.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.5686168,0.9835669,176.0
max f2,0.2332578,0.9870790,255.0
max f0point5,0.8573869,0.9887427,108.0
max accuracy,0.5686168,0.9836427,176.0
max precision,0.9999677,1.0,0.0
max recall,0.0003146,1.0,397.0
max specificity,0.9999677,1.0,0.0
max absolute_mcc,0.5686168,0.9673015,176.0
max min_per_class_accuracy,0.4891050,0.9831697,192.0


Gains/Lift Table: Avg response rate: 49.91 %



0,1,2,3,4,5,6,7,8,9,10,11
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,cumulative_response_rate,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0103362,0.9999989,2.0034178,2.0034178,1.0,1.0,0.0207077,0.0207077,100.3417772,100.3417772
,2,0.0202709,0.9999976,2.0034178,2.0034178,1.0,1.0,0.0199035,0.0406112,100.3417772,100.3417772
,3,0.0303061,0.9999955,2.0034178,2.0034178,1.0,1.0,0.0201045,0.0607157,100.3417772,100.3417772
,4,0.0403412,0.9999923,2.0034178,2.0034178,1.0,1.0,0.0201045,0.0808203,100.3417772,100.3417772
,5,0.0502760,0.9999889,2.0034178,2.0034178,1.0,1.0,0.0199035,0.1007238,100.3417772,100.3417772
,6,0.1000502,0.9999519,2.0034178,2.0034178,1.0,1.0,0.0997185,0.2004423,100.3417772,100.3417772
,7,0.1500251,0.9998172,2.0034178,2.0034178,1.0,1.0,0.1001206,0.3005629,100.3417772,100.3417772
,8,0.2,0.9995008,2.0034178,2.0034178,1.0,1.0,0.1001206,0.4006836,100.3417772,100.3417772
,9,0.3000502,0.9981212,2.0034178,2.0034178,1.0,1.0,0.2004423,0.6011259,100.3417772,100.3417772




ModelMetricsBinomial: deepwater
** Reported on cross-validation data. **

MSE: 0.0152526853587
RMSE: 0.123501762573
LogLoss: 0.0571221179783
Mean Per-Class Error: 0.0192123133405
AUC: 0.997677971965
Gini: 0.995355943929
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.526489247878: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,28953.0,539.0,0.0183,(539.0/29492.0)
1,615.0,29893.0,0.0202,(615.0/30508.0)
Total,29568.0,30432.0,0.0192,(1154.0/60000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.5264892,0.9810633,190.0
max f2,0.2759339,0.9840750,256.0
max f0point5,0.7901253,0.9851598,118.0
max accuracy,0.5308414,0.9807667,189.0
max precision,0.9999522,1.0,0.0
max recall,0.0000390,1.0,399.0
max specificity,0.9999522,1.0,0.0
max absolute_mcc,0.5308414,0.9615291,189.0
max min_per_class_accuracy,0.5049330,0.9806049,195.0


Gains/Lift Table: Avg response rate: 50.85 %



0,1,2,3,4,5,6,7,8,9,10,11
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,cumulative_response_rate,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0100833,0.9999970,1.9666973,1.9666973,1.0,1.0,0.0198309,0.0198309,96.6697260,96.6697260
,2,0.0201833,0.9999924,1.9666973,1.9666973,1.0,1.0,0.0198636,0.0396945,96.6697260,96.6697260
,3,0.0301,0.9999862,1.9666973,1.9666973,1.0,1.0,0.0195031,0.0591976,96.6697260,96.6697260
,4,0.0400333,0.9999779,1.9666973,1.9666973,1.0,1.0,0.0195359,0.0787334,96.6697260,96.6697260
,5,0.05,0.9999666,1.9666973,1.9666973,1.0,1.0,0.0196014,0.0983349,96.6697260,96.6697260
,6,0.1,0.9998583,1.9666973,1.9666973,1.0,1.0,0.0983349,0.1966697,96.6697260,96.6697260
,7,0.15,0.9996168,1.9666973,1.9666973,1.0,1.0,0.0983349,0.2950046,96.6697260,96.6697260
,8,0.2000167,0.9992248,1.9660419,1.9665334,0.9996668,0.9999167,0.0983349,0.3933395,96.6041912,96.6533382
,9,0.3,0.9974094,1.9660416,1.9663695,0.9996666,0.9998333,0.1965714,0.5899108,96.6041585,96.6369477



Cross-Validation Metrics Summary: 


0,1,2,3,4,5,6,7
,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
accuracy,0.9819334,0.0012491,0.9809167,0.98,0.9833333,0.9846666,0.98075
auc,0.9979531,0.0002369,0.9977207,0.9977161,0.9981530,0.9985207,0.9976552
err,0.0180667,0.0012491,0.0190833,0.02,0.0166667,0.0153333,0.01925
err_count,216.8,14.989329,229.0,240.0,200.0,184.0,231.0
f0point5,0.9827713,0.0011687,0.9818677,0.9806644,0.9842828,0.9851013,0.9819406
f1,0.9822294,0.0011406,0.9814379,0.9801784,0.9837054,0.984504,0.9813213
f2,0.981688,0.0011142,0.9810085,0.9796929,0.9831287,0.9839073,0.9807027
lift_top_group,1.967142,0.0210402,1.9439495,1.9805249,1.9534429,2.0191822,1.9386107
logloss,0.0571221,0.0052239,0.0569221,0.0686369,0.0515407,0.0474482,0.0610626


Scoring History: 


0,1,2,3,4,5,6,7,8,9,10,11
,timestamp,duration,training_speed,epochs,iterations,samples,training_rmse,training_logloss,training_auc,training_lift,training_classification_error
,2017-05-08 21:40:02,0.000 sec,,0.0,0,0.0,,,,,
,2017-05-08 21:40:02,1 min 9.668 sec,9061 obs/sec,0.0170667,1,1024.0,0.4849709,0.6634370,0.8097165,1.9833836,0.2849975
,2017-05-08 21:40:09,1 min 16.869 sec,13963 obs/sec,1.536,90,92160.0,0.1606915,0.0930246,0.9969276,2.0034178,0.0216759
,2017-05-08 21:40:16,1 min 23.070 sec,15018 obs/sec,3.0208,177,181248.0,0.1122170,0.0470116,0.9983865,2.0034178,0.0163573


# Ensemble Models

In [12]:
stack_all = H2OStackedEnsembleEstimator(base_models=[gbm_model.model_id, glm_model.model_id, dw_model.model_id])
stack_all.train(x=features, y=target, training_frame=train, validation_frame=valid, model_id="stack_all")
stack_all.model_performance()

stackedensemble Model Build progress: |███████████████████████████████████| 100%

ModelMetricsBinomialGLM: stackedensemble
** Reported on train data. **

MSE: 0.228869541099
RMSE: 0.478403115687
LogLoss: 0.639728211497
Null degrees of freedom: 59999
Residual degrees of freedom: 59997
Null deviance: 83160.4565782
Residual deviance: 76767.3853797
AIC: 76773.3853797
AUC: 0.963031815936
Gini: 0.926063631872
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.299870261979: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,26240.0,3252.0,0.1103,(3252.0/29492.0)
1,2510.0,27998.0,0.0823,(2510.0/30508.0)
Total,28750.0,31250.0,0.096,(5762.0/60000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.2998703,0.9067003,222.0
max f2,0.2587160,0.9295814,294.0
max f0point5,0.3452208,0.9196037,147.0
max accuracy,0.3085181,0.9046167,208.0
max precision,0.4064302,0.9972838,0.0
max recall,0.2215005,1.0,399.0
max specificity,0.4064302,0.9997966,0.0
max absolute_mcc,0.3085181,0.8091859,208.0
max min_per_class_accuracy,0.3089728,0.9042452,207.0


Gains/Lift Table: Avg response rate: 50.85 %



0,1,2,3,4,5,6,7,8,9,10,11
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,cumulative_response_rate,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.4064554,1.9568638,1.9568638,0.995,0.995,0.0195686,0.0195686,95.6863773,95.6863773
,2,0.02,0.4064358,1.9666973,1.9617805,1.0,0.9975,0.0196670,0.0392356,96.6697260,96.1780517
,3,0.03,0.4064046,1.9601416,1.9612342,0.9966667,0.9972222,0.0196014,0.0588370,96.0141602,96.1234212
,4,0.04,0.4063606,1.9568638,1.9601416,0.995,0.9966667,0.0195686,0.0784057,95.6863773,96.0141602
,5,0.05,0.4062920,1.9634194,1.9607972,0.9983333,0.997,0.0196342,0.0980399,96.3419431,96.0797168
,6,0.1,0.4053604,1.9529304,1.9568638,0.993,0.995,0.0976465,0.1956864,95.2930379,95.6863773
,7,0.15,0.4038297,1.9450636,1.9529304,0.989,0.993,0.0972532,0.2929396,94.5063590,95.2930379
,8,0.2,0.4014675,1.9299856,1.9471942,0.9813333,0.9900833,0.0964993,0.3894388,92.9985578,94.7194179
,9,0.3,0.3921706,1.8998296,1.9314060,0.966,0.9820556,0.1899830,0.5794218,89.9829553,93.1405970





