In [1]:
import h2o
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
from h2o.grid.grid_search import H2OGridSearch


In [2]:
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM (build 25.201-b09, mixed mode)
  Starting server from C:\ProgramData\Anaconda3\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\HarshithaGS\AppData\Local\Temp\tmpoxz2gfkm
  JVM stdout: C:\Users\HarshithaGS\AppData\Local\Temp\tmpoxz2gfkm\h2o_HarshithaGS_started_from_python.out
  JVM stderr: C:\Users\HarshithaGS\AppData\Local\Temp\tmpoxz2gfkm\h2o_HarshithaGS_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321... successful.


0,1
H2O cluster uptime:,02 secs
H2O cluster timezone:,America/New_York
H2O data parsing timezone:,UTC
H2O cluster version:,3.22.1.6
H2O cluster version age:,15 days
H2O cluster name:,H2O_from_python_HarshithaGS_g7qivf
H2O cluster total nodes:,1
H2O cluster free memory:,3.499 Gb
H2O cluster total cores:,12
H2O cluster allowed cores:,12


In [3]:
# Import a sample binary outcome train/test set into H2O
train = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
test = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv")

Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%


In [4]:
# Identify predictors and response
x = train.columns
y = "response"
x.remove(y)

In [5]:
# For binary classification, response should be a factor
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

In [6]:
# Number of CV folds (to generate level-one data for stacking)
nfolds = 5

In [7]:
# There are a few ways to assemble a list of models to stack together:
# 1. Train individual models and put them in a list
# 2. Train a grid of models
# 3. Train several grids of models
# Note: All base models must have the same cross-validation folds and
# the cross-validated predicted values must be kept.

In [8]:
# 1. Generate a 2-model ensemble (GBM + RF)

# Train and cross-validate a GBM
my_gbm = H2OGradientBoostingEstimator(distribution="bernoulli",
                                      ntrees=10,
                                      max_depth=3,
                                      min_rows=2,
                                      learn_rate=0.2,
                                      nfolds=nfolds,
                                      fold_assignment="Modulo",
                                      keep_cross_validation_predictions=True,
                                      seed=1)
my_gbm.train(x=x, y=y, training_frame=train)


# Train and cross-validate a RF
my_rf = H2ORandomForestEstimator(ntrees=50,
                                 nfolds=nfolds,
                                 fold_assignment="Modulo",
                                 keep_cross_validation_predictions=True,
                                 seed=1)
my_rf.train(x=x, y=y, training_frame=train)

gbm Model Build progress: |███████████████████████████████████████████████| 100%
drf Model Build progress: |███████████████████████████████████████████████| 100%


In [18]:
# Compare to base learner performance on the test set
perf_gbm_test = my_gbm.model_performance(test)

In [19]:
str(perf_gbm_test)


ModelMetricsBinomial: gbm
** Reported on test data. **

MSE: 0.20579980230216902
RMSE: 0.45365163099251504
LogLoss: 0.5991607617229083
Mean Per-Class Error: 0.3132779098342524
AUC: 0.7522686229794354
pr_auc: 0.7687783743434573
Gini: 0.5045372459588708
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.42008076282776335: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,948.0,1367.0,0.5905,(1367.0/2315.0)
1,325.0,2360.0,0.121,(325.0/2685.0)
Total,1273.0,3727.0,0.3384,(1692.0/5000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.4200808,0.7361198,265.0
max f2,0.2710360,0.8594103,354.0
max f0point5,0.5443464,0.7160750,160.0
max accuracy,0.4843246,0.6828,209.0
max precision,0.7880804,0.9777778,4.0
max recall,0.1433346,1.0,396.0
max specificity,0.8039157,0.9995680,0.0
max absolute_mcc,0.5443464,0.3747276,160.0
max min_per_class_accuracy,0.5061039,0.6768898,192.0


Gains/Lift Table: Avg response rate: 53.70 %, avg score: 52.40 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.0178,0.7880971,1.8203503,1.8203503,0.9775281,0.7932167,0.9775281,0.7932167,0.0324022,0.0324022,82.0350260,82.0350260
,2,0.0316,0.7790391,1.7002672,1.7679089,0.9130435,0.7797825,0.9493671,0.7873498,0.0234637,0.0558659,70.0267185,76.7908917
,3,0.0406,0.7716384,1.7794331,1.7704635,0.9555556,0.7766063,0.9507389,0.7849683,0.0160149,0.0718808,77.9433064,77.0463531
,4,0.0532,0.7663790,1.5074931,1.7081811,0.8095238,0.7679393,0.9172932,0.7809351,0.0189944,0.0908752,50.7493128,70.8181067
,5,0.1,0.7284882,1.6393704,1.6759777,0.8803419,0.7457716,0.9,0.7644786,0.0767225,0.1675978,63.9370354,67.5977654
,6,0.1502,0.6927837,1.4689844,1.6067962,0.7888446,0.7139049,0.8628495,0.7475758,0.0737430,0.2413408,46.8984398,60.6796153
,7,0.2014,0.6700969,1.5130354,1.5829602,0.8125,0.6767689,0.8500497,0.7295752,0.0774674,0.3188082,51.3035382,58.2960247
,8,0.3022,0.6245242,1.2968875,1.4875395,0.6964286,0.6478638,0.7988087,0.7023200,0.1307263,0.4495345,29.6887470,48.7539546
,9,0.4002,0.5717529,1.1895261,1.4145627,0.6387755,0.5979999,0.7596202,0.6767744,0.1165736,0.5661080,18.9526090,41.4562737





''

In [20]:
perf_rf_test = my_rf.model_performance(test)

In [21]:
str(perf_rf_test)


ModelMetricsBinomial: drf
** Reported on test data. **

MSE: 0.19688703075675026
RMSE: 0.44371954065237
LogLoss: 0.5772693100340657
Mean Per-Class Error: 0.29644855227224276
AUC: 0.7697982150254795
pr_auc: 0.7939742395344842
Gini: 0.539596430050959
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.38868789450688795: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,962.0,1353.0,0.5844,(1353.0/2315.0)
1,302.0,2383.0,0.1125,(302.0/2685.0)
Total,1264.0,3736.0,0.331,(1655.0/5000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.3886879,0.7422520,279.0
max f2,0.2619649,0.8588397,344.0
max f0point5,0.5250260,0.7289438,202.0
max accuracy,0.5131663,0.7038,209.0
max precision,0.9771504,1.0,0.0
max recall,0.0828127,1.0,391.0
max specificity,0.9771504,1.0,0.0
max absolute_mcc,0.5250260,0.4059950,202.0
max min_per_class_accuracy,0.5168985,0.7019438,207.0


Gains/Lift Table: Avg response rate: 53.70 %, avg score: 52.32 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.9077049,1.7504655,1.7504655,0.94,0.9331315,0.94,0.9331315,0.0175047,0.0175047,75.0465549,75.0465549
,2,0.02,0.8786037,1.7504655,1.7504655,0.94,0.8909492,0.94,0.9120403,0.0175047,0.0350093,75.0465549,75.0465549
,3,0.0306,0.86,1.7567900,1.7526564,0.9433962,0.8696806,0.9411765,0.8973667,0.0186220,0.0536313,75.6789993,75.2656370
,4,0.04,0.8481222,1.7037125,1.7411546,0.9148936,0.8543995,0.935,0.8872694,0.0160149,0.0696462,70.3712508,74.1154562
,5,0.05,0.8313960,1.7877095,1.7504655,0.96,0.8381898,0.94,0.8774535,0.0178771,0.0875233,78.7709497,75.0465549
,6,0.1,0.7754190,1.6759777,1.7132216,0.9,0.7997436,0.92,0.8385986,0.0837989,0.1713222,67.5977654,71.3221601
,7,0.15,0.7346817,1.5493482,1.6585971,0.832,0.7534243,0.8906667,0.8102071,0.0774674,0.2487896,54.9348231,65.8597145
,8,0.2,0.6960847,1.5567970,1.6331471,0.836,0.7140283,0.877,0.7861624,0.0778399,0.3266294,55.6797020,63.3147114
,9,0.3,0.6290259,1.3109870,1.5257604,0.704,0.6626385,0.8193333,0.7449878,0.1310987,0.4577281,31.0986965,52.5760397





''

In [9]:
# Train a stacked ensemble using the GBM and GLM above
ensemble = H2OStackedEnsembleEstimator(model_id="my_ensemble_binomial",
                                       base_models=[my_gbm, my_rf])
ensemble.train(x=x, y=y, training_frame=train)

stackedensemble Model Build progress: |███████████████████████████████████| 100%


In [None]:
# Eval ensemble performance on the test data
perf_stack_test = ensemble.model_performance(test)

In [28]:
str(perf_stack_test)


ModelMetricsBinomialGLM: stackedensemble
** Reported on test data. **

MSE: 0.19312363680099362
RMSE: 0.4394583447847971
LogLoss: 0.5671382268190297
Null degrees of freedom: 4999
Residual degrees of freedom: 4997
Null deviance: 6905.196403260961
Residual deviance: 5671.382268190297
AIC: 5677.382268190297
AUC: 0.7735482703283179
pr_auc: 0.796927497555573
Gini: 0.5470965406566357
Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.3677843916096537: 


0,1,2,3,4
,0.0,1.0,Error,Rate
0,1138.0,1177.0,0.5084,(1177.0/2315.0)
1,402.0,2283.0,0.1497,(402.0/2685.0)
Total,1540.0,3460.0,0.3158,(1579.0/5000.0)


Maximum Metrics: Maximum metrics at their respective thresholds



0,1,2,3
metric,threshold,value,idx
max f1,0.3677844,0.7430431,266.0
max f2,0.1298463,0.8592392,372.0
max f0point5,0.6071355,0.7338109,154.0
max accuracy,0.4908473,0.7044,208.0
max precision,0.9337883,1.0,0.0
max recall,0.0756540,1.0,391.0
max specificity,0.9337883,1.0,0.0
max absolute_mcc,0.5294496,0.4110972,189.0
max min_per_class_accuracy,0.5048412,0.7032397,201.0


Gains/Lift Table: Avg response rate: 53.70 %, avg score: 52.17 %



0,1,2,3,4,5,6,7,8,9,10,11,12,13
,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain
,1,0.01,0.9142942,1.7877095,1.7877095,0.96,0.9215329,0.96,0.9215329,0.0178771,0.0178771,78.7709497,78.7709497
,2,0.02,0.9053093,1.7132216,1.7504655,0.92,0.9092916,0.94,0.9154123,0.0171322,0.0350093,71.3221601,75.0465549
,3,0.03,0.8938733,1.8621974,1.7877095,1.0,0.8996858,0.96,0.9101701,0.0186220,0.0536313,86.2197393,78.7709497
,4,0.04,0.8881068,1.7504655,1.7783985,0.94,0.8907679,0.955,0.9053195,0.0175047,0.0711359,75.0465549,77.8398510
,5,0.05,0.8801271,1.7132216,1.7653631,0.92,0.8846009,0.948,0.9011758,0.0171322,0.0882682,71.3221601,76.5363128
,6,0.1,0.8413675,1.6685289,1.7169460,0.896,0.8601703,0.922,0.8806730,0.0834264,0.1716946,66.8528864,71.6945996
,7,0.15,0.8039281,1.5716946,1.6685289,0.844,0.8223962,0.896,0.8612474,0.0785847,0.2502793,57.1694600,66.8528864
,8,0.2,0.7683339,1.4972067,1.6256983,0.804,0.7850165,0.873,0.8421897,0.0748603,0.3251397,49.7206704,62.5698324
,9,0.3,0.6884214,1.3035382,1.5183116,0.7,0.7297989,0.8153333,0.8047261,0.1303538,0.4554935,30.3538175,51.8311608





''

In [22]:
baselearner_best_auc_test = max(perf_gbm_test.auc(), perf_rf_test.auc())
stack_auc_test = perf_stack_test.auc()

In [23]:
str(baselearner_best_auc_test)

'0.7697982150254795'

In [24]:
str(stack_auc_test)

'0.7735482703283179'

In [25]:
print("Best Base-learner Test AUC:  {0}".format(baselearner_best_auc_test))
print("Ensemble Test AUC:  {0}".format(stack_auc_test))

Best Base-learner Test AUC:  0.7697982150254795
Ensemble Test AUC:  0.7735482703283179


In [26]:
# Generate predictions on a test set (if neccessary)
pred = ensemble.predict(test)

stackedensemble prediction progress: |████████████████████████████████████| 100%


In [27]:
pred

predict,p0,p1
0,0.667059,0.332941
1,0.58339,0.41661
1,0.605868,0.394132
1,0.19097,0.80903
1,0.453361,0.546639
1,0.314529,0.685471
1,0.281925,0.718075
0,0.663363,0.336637
0,0.712379,0.287621
1,0.604028,0.395972


