# Automatic Machine Learning with H2O AutoML

### Importing Packages

In [None]:
import pandas as pd
pd.options.display.max_rows = 999
import numpy as np
import matplotlib.pyplot as plt

### Loading and Exploring the Data

In [None]:
xls = pd.ExcelFile('/mnt/share/datasets/h2o-automl/bank_term_deposit_marketing_analysis.xlsx')

In [None]:
xls.sheet_names

In [None]:
client_info = pd.read_excel(xls, 'CLIENT_INFO')
loan_history = pd.read_excel(xls, 'LOAN_HISTORY')
marketing_history = pd.read_excel(xls, 'MARKETING HISTORY')
subscription_history = pd.read_excel(xls, 'SUBSCRIPTION HISTORY')

In [None]:
client_info.head()

In [None]:
loan_history.head()

In [None]:
marketing_history.head()

In [None]:
subscription_history.head()

In [None]:
df = pd.merge(client_info, loan_history, on=['ID'])
df = pd.merge(df, marketing_history, on=['ID'])
df = pd.merge(df, subscription_history, on=['ID'])
df.head()

In [None]:
df = df.drop(['ID'], axis=1)

### Data Prep & Start H2O

In [None]:
import h2o
h2o.init()

In [None]:
h2o_df = h2o.H2OFrame(df)

In [None]:
h2o_df.describe()

In [None]:
train, test = h2o_df.split_frame(ratios=[.75])

# Identify predictors and response
x = train.columns
y = "TERM_DEPOSIT"
x.remove(y)

### Run H2O AutoML

In [None]:
from h2o.automl import H2OAutoML

In [None]:
aml = H2OAutoML(max_runtime_secs=600,
                #exclude_algos=['DeepLearning'],
                seed=1,
                #stopping_metric='logloss',
                #sort_metric='logloss',
                balance_classes=False,
                project_name='Completed'
)
%time aml.train(x=x, y=y, training_frame=train)

### AutoML Leaderboard and Ensemble Exploration

In [None]:
# View the AutoML Leaderboard
lb = aml.leaderboard
lb.head(rows=lb.nrows)  # Print all rows instead of default (10 rows)

In [None]:
# Get the "All Models" Stacked Ensemble model
se = aml.leader

# Get the Stacked Ensemble metalearner model
metalearner = h2o.get_model(se.metalearner().model_id)

In [None]:
metalearner.varimp()

### Baselearner Model Exploration

In [None]:
#IMPORTANT: the model name is generated with timestamps, so use the output from
#           previous cell to change the model name accordingly 

model = h2o.get_model('XGBoost_grid__1_AutoML_20200816_185147_model_2')

In [None]:
model.model_performance(test)

In [None]:
model.varimp_plot(num_of_features=20)

In [None]:
model.partial_plot(train, cols=["DURATION"], figsize=(5,5));

In [None]:
# save the model, change the path accordingly
model_path = h2o.save_model(model=model, path='save_model', force=True)
print(model_path)