In this workbook, we will not need much of the standard python libraries. H2O library has its own functions and models that are compatible with it.

In [None]:
import h2o
from h2o.automl import H2OAutoML
import numpy as np  # linear algebra
import pandas as pd  #
from sklearn.metrics import roc_auc_score
import os
print(os.listdir("../input"))

In [None]:
## Init the H2O and getting the data in: 
h2o.init(max_mem_size='16G')
#################
htrain = h2o.import_file('../input/train.csv')
htest_sub = h2o.import_file('../input/test.csv')

htrain = htrain.drop(['ID_code'])
htest_sub = htest_sub.drop(['ID_code'])
                            
x = htrain.columns
y = "target"
x.remove(y)
# For binary classification, response should be a factor
htrain[y] = htrain[y].asfactor()

print("Train set size:", htrain.shape, "Test set size:", htest_sub.shape)

In [None]:
## Setting up our AUTO ML function :
folds=5
aml = H2OAutoML(seed = 334103, 
                 max_models=12, # setting the function to test 12 different combinations or models
                max_runtime_secs = 31000,  # this parameter serves us well in case the models spend too much time running
                stopping_metric = "AUC",  # since we have a Yes/No problem, we will need to optimize our AUC metric
                nfolds = folds) 

aml.train(x=x, y=y, training_frame=htrain,)  #training our models

# Here we will get a full list of all the tested models, with the different evaluation metrics and predicted parameters
lb = aml.leaderboard
lb.head(rows=lb.nrows)  # Print all rows instead of default (10 rows)



In [None]:
## Preparing the submission file:

sub_pred = aml.leader.predict(htest_sub)
sub_pred = sub_pred.as_data_frame()
print('predict shape:', sub_pred.shape)

sample_submission = pd.read_csv('../input/sample_submission.csv')
sample_submission['target'] = sub_pred.p1
sample_submission.to_csv('h2o_AutoML_submission_v3.csv', index=False)