In [None]:
# Common libraries
import pandas as pd
from sklearn.model_selection import train_test_split


In [None]:
# Framework library 
import h2o 
from h2o.automl import H2OAutoML
h2o.init()

In [None]:
# Reading dataset
data = pd.read_csv('bank.csv')

In [None]:
# Divide dataset into train and test
train_data, test_data = train_test_split(data,test_size=0.2,random_state=25)

In [None]:
# Create datasets in the right format for H2OAutoML
train_data = h2o.H2OFrame(train_data)
test_data = h2o.H2OFrame(test_data)

In [None]:
# Role definition
x = train_data.columns
y = 'y'
x.remove(y)

In [None]:
# Setup framework (si queremos rankear los modelos con respecto a accuracy debemos incluir sort_metric="accuracy"; de otra forma utiliza auc )
automl = H2OAutoML(max_runtime_secs=300,seed=1)

In [None]:
# Fit the model
automl.train(x=x,y=y,training_frame=train_data)

In [None]:
lb = automl.leaderboard
lb.head(rows=lb.nrows)

In [None]:
# Show best model
best_model = automl.leader
best_model

In [None]:
# Evaluate performance on both train and test set
performance_train = best_model.model_performance(test_data=train_data)
performance_test = best_model.model_performance(test_data=test_data)

In [None]:
# Report R2 & RMSE for both sets
print("Train Accuracy Score:",performance_train.accuracy())
print("Train AUC Score:",performance_train.auc())
print("Test Accuracy Score:",performance_test.accuracy())
print("Test AUC Score:",performance_test.auc())

In [None]:
# Save the best model to disk
h2o.save_model(model=best_model,path='/Users/carlosq/Desktop/h2omodel',force=True)