In [26]:
import autosklearn.classification
import pandas as pd
import numpy as np
import sklearn.model_selection
import sklearn.metrics
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
import h2o
from h2o.automl import H2OAutoML
from tpot import TPOTClassifier
from sklearn.metrics import log_loss
from sklearn.metrics import classification_report
import time

In [27]:
def autosklearn_classification(X_train, y_train, X_test):
    
    #time_left_for_this_task=3600, per_run_time_limit=360. Use these arguments for a faster less accurate search
    automl = autosklearn.classification.AutoSklearnClassifier() 
    automl.fit(X_train, y_train)
    predictions = automl.predict(X_test)
    predictions_proba = automl.predict_proba(X_test)[:,1]
    text_file = open("models_tried.txt", "w+")
    n = text_file.write(str(automl.show_models()))
    n = text_file.write(str(automl.sprint_statistics()))
    text_file.close()
    return (predictions,predictions_proba)
    

In [5]:
def tpot_classification(X_train, y_train, X_test):
    
    tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=1)
    tpot.fit(X_train, y_train)
    predictions = tpot.predict(X_test)
    predictions_proba = tpot.predict_proba(X_test)[:,1]
    return (predictions, predictions_proba)


In [28]:
def h2o_classifications(X_train, y_train, X_test, target):
    
    h2o.init()
    aml = H2OAutoML()
    train_hf = pd.concat([X_train, y_train], axis=1)
    train_hf = h2o.H2OFrame(train_hf)
    train_hf[target] = train_hf[target].asfactor()
    aml.train(y = target, training_frame = train_hf)
    response = aml.predict(h2o.H2OFrame(X_test))
    print("various models tested:")
    lb = h2o.automl.get_leaderboard(aml, extra_columns = 'ALL')
    print(lb)
    return response

In [41]:
def metric_calculator(framework, dataset, y_test, predictions):
    
    if framework =="H2O":   
        print("Framework: ", framework)
        ll = log_loss(y_test, predictions.as_data_frame().iloc[:,1:])
        print("log loss: ",ll)
        accuracy = sklearn.metrics.accuracy_score(predictions.as_data_frame().iloc[:,0], y_test.astype('int64'))
        print("accuracy: ",accuracy)
        print("Classification report")
        print(classification_report(y_test.astype('int64'), predictions.as_data_frame().iloc[:,0]))
    else:
        print("Framework: ", framework)
        ll = log_loss(y_test, predictions[1])
        print("log loss: ",ll)
        accuracy = sklearn.metrics.accuracy_score(predictions[0], y_test)
        print("accuracy: ",accuracy)
        print("Classification report")
        print(classification_report(y_test, predictions[0]))
            

### Dataset 1 - Wine quality dataset

In [30]:
ds1_df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv",sep=";")

X, y = ds1_df.iloc[:,:-1], ds1_df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

<b> Model Auto-sklearn </b>

In [None]:
start_time = time.time()
ds1_autosklearn_predictions = autosklearn_classification(X_train, y_train, X_test)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 44905 instead
  http_address["port"], self.http_server.port




In [20]:
metric_calculator("auto-sklearn", "wine quality", y_test, ds1_autosklearn_predictions)
end_time = time.time()
print("total time elapsed: ",end_time - start_time)

Framework:  auto-sklearn
log loss: 0.4268143501
accuracy:  0.6579591836734694
Classification report
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.56      0.14      0.22        37
           5       0.72      0.63      0.67       368
           6       0.61      0.84      0.71       544
           7       0.78      0.44      0.56       233
           8       0.91      0.24      0.38        41

    accuracy                           0.66      1225
   macro avg       0.60      0.38      0.42      1225
weighted avg       0.68      0.66      0.64      1225

total time elapsed:  4493.640509843826


  _warn_prf(average, modifier, msg_start, len(result))


<b> Model TPOT </b>

In [7]:
start_time = time.time()
ds1_tpot_predictions = tpot_classification(X_train, y_train, X_test)

HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=300.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.6553237316725056

Generation 2 - Current best internal CV score: 0.6553237316725056

Generation 3 - Current best internal CV score: 0.6555928747520807

Generation 4 - Current best internal CV score: 0.6555928747520807

Generation 5 - Current best internal CV score: 0.6555928747520807

Best pipeline: KNeighborsClassifier(RobustScaler(input_matrix), n_neighbors=70, p=1, weights=distance)


In [18]:
metric_calculator("tpot", "wine quality", y_test, ds1_tpot_predictions)
end_time = time.time()
print("total time elapsed: ",end_time - start_time)

Framework:  tpot
accuracy:  0.6644897959183673
Classification report


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       1.00      0.03      0.05        37
           5       0.73      0.60      0.66       368
           6       0.61      0.85      0.71       544
           7       0.76      0.50      0.60       233
           8       1.00      0.32      0.48        41

    accuracy                           0.66      1225
   macro avg       0.68      0.38      0.42      1225
weighted avg       0.70      0.66      0.65      1225

total time elapsed:  2303.344337940216


<b> Model H2O </b>

In [31]:
start_time = time.time()
ds1_h2o_predictions = h2o_classifications(X_train, y_train, X_test, ds1_df.columns[-1])
end_time = time.time()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,7 hours 20 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.32.0.2
H2O_cluster_version_age:,"28 days, 1 hour and 30 minutes"
H2O_cluster_name:,H2O_from_python_jupyter_htc3nt
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,6.260 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
gbm prediction progress: |████████████████████████████████████████████████| 100%
various models tested:


model_id,mean_per_class_error,logloss,rmse,mse,training_time_ms,predict_time_per_row_ms
GBM_grid__1_AutoML_20201215_170856_model_11,0.621339,0.94484,0.534286,0.285462,960,0.10901
GBM_grid__1_AutoML_20201215_170856_model_2,0.633987,0.933119,0.529272,0.280129,1010,0.144423
DRF_1_AutoML_20201215_170856,0.634961,1.32625,0.542834,0.294669,802,0.087003
GBM_grid__1_AutoML_20201215_170856_model_7,0.639168,1.06498,0.551942,0.30464,1683,0.102785
StackedEnsemble_BestOfFamily_AutoML_20201215_170856,0.639593,0.861119,0.533881,0.285029,20783,0.116453
XRT_1_AutoML_20201215_170856,0.640323,1.3555,0.543687,0.295595,1076,0.087979
GBM_grid__1_AutoML_20201215_170856_model_6,0.644557,0.966353,0.541955,0.293715,1141,0.097296
GBM_1_AutoML_20201215_170856,0.644745,0.950081,0.544774,0.296778,702,0.100604
GBM_grid__1_AutoML_20201215_170856_model_3,0.644906,0.949204,0.536944,0.288308,996,0.114888
GBM_grid__1_AutoML_20201215_170856_model_8,0.645515,0.933002,0.530349,0.28127,873,0.148182





In [34]:
metric_calculator("H2O", "wine quality",y_test, ds1_h2o_predictions)
print("total time elapsed: ",end_time - start_time)

Framework:  H2O
accuracy:  0.6538775510204081
Classification report
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.54      0.19      0.28        37
           5       0.72      0.61      0.66       368
           6       0.62      0.79      0.69       544
           7       0.67      0.53      0.59       233
           8       0.82      0.34      0.48        41

    accuracy                           0.65      1225
   macro avg       0.56      0.41      0.45      1225
weighted avg       0.66      0.65      0.64      1225

total time elapsed:  2548.4360597133636


### Dataset 2 - Spambase


In [43]:
X, y = fetch_openml('spambase', version=1, return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

<b> Model Auto-sklearn </b>

In [12]:
start_time = time.time()
ds2_autosklearn_predictions = autosklearn_classification(X_train, y_train, X_test)
end_time = time.time()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 43301 instead
  http_address["port"], self.http_server.port
  self._dask_client.shutdown()




In [30]:
metric_calculator("auto-sklearn", "spambase", y_test, ds2_autosklearn_predictions)
print("total time elapsed: ",end_time - start_time)

Framework:  auto-sklearn
log loss: 0.3834920926
accuracy:  0.9591659426585578
Classification report
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       701
           1       0.97      0.93      0.95       450

    accuracy                           0.96      1151
   macro avg       0.96      0.95      0.96      1151
weighted avg       0.96      0.96      0.96      1151

total time elapsed:  3627.5266301631927


<b> Model TPOT </b>

In [20]:
start_time = time.time()
ds2_tpot_predictions = tpot_classification(X_train, y_train, X_test)
end_time = time.time()

HBox(children=(HTML(value='Optimization Progress'), FloatProgress(value=0.0, max=300.0), HTML(value='')))


Generation 1 - Current best internal CV score: 0.946376811594203

Generation 2 - Current best internal CV score: 0.9489855072463769

Generation 4 - Current best internal CV score: 0.9515942028985507

Generation 5 - Current best internal CV score: 0.952463768115942

Best pipeline: GradientBoostingClassifier(BernoulliNB(input_matrix, alpha=0.01, fit_prior=True), learning_rate=0.1, max_depth=6, max_features=0.5, min_samples_leaf=15, min_samples_split=5, n_estimators=100, subsample=0.7500000000000001)


In [21]:
metric_calculator("tpot", "spambase", y_test,ds2_tpot_predictions)
print("total time elapsed: ",end_time - start_time)

Framework:  tpot
log loss:  0.12303342451402823
accuracy:  0.9548218940052129
Classification report
              precision    recall  f1-score   support

           0       0.96      0.97      0.96       701
           1       0.95      0.94      0.94       450

    accuracy                           0.95      1151
   macro avg       0.95      0.95      0.95      1151
weighted avg       0.95      0.95      0.95      1151

total time elapsed:  1716.766785621643


<b> Model H2O </b>

In [24]:
start_time = time.time()
ds2_h2o_predictions = h2o_classifications(X_train, y_train, X_test, y.name)
end_time = time.time()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,5 hours 47 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.32.0.2
H2O_cluster_version_age:,27 days
H2O_cluster_name:,H2O_from_python_jupyter_htc3nt
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,6.481 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |█████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
stackedensemble prediction progress: |████████████████████████████████████| 100%
various models tested:


model_id,auc,logloss,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms
StackedEnsemble_BestOfFamily_AutoML_20201215_153559,0.987964,0.139223,0.97895,0.0487678,0.192587,0.0370897,1482,0.293166
GBM_grid__1_AutoML_20201215_153559_model_8,0.98789,0.131962,0.981636,0.0480269,0.191442,0.0366501,2075,0.040763
GBM_grid__1_AutoML_20201215_153559_model_6,0.987889,0.134067,0.98187,0.0514181,0.194017,0.0376426,1371,0.041011
GBM_grid__1_AutoML_20201215_153559_model_7,0.987771,0.138903,0.982409,0.0519122,0.197647,0.0390644,2181,0.037368
GBM_4_AutoML_20201215_153559,0.987762,0.133117,0.981817,0.0509013,0.191474,0.0366624,2778,0.035418
GBM_grid__1_AutoML_20201215_153559_model_12,0.987742,0.132913,0.981618,0.0532595,0.191936,0.0368396,1599,0.040924
StackedEnsemble_AllModels_AutoML_20201215_153559,0.987596,0.140303,0.979251,0.0476822,0.193475,0.0374324,2013,0.346716
GBM_grid__1_AutoML_20201215_153559_model_10,0.987505,0.132808,0.978592,0.0505196,0.191338,0.0366101,1865,0.036145
GBM_grid__1_AutoML_20201215_153559_model_2,0.987416,0.134459,0.981273,0.0494716,0.192618,0.0371015,11159,0.039521
GBM_grid__1_AutoML_20201215_153559_model_4,0.987188,0.135127,0.981163,0.0493145,0.193593,0.0374782,1210,0.040797





In [44]:
metric_calculator("H2O", "spambase",y_test,ds2_h2o_predictions )
print("total time elapsed: ",end_time - start_time)

Framework:  H2O
log loss:  0.1360053817
accuracy:  0.9635099913119027
Classification report
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       701
           1       0.95      0.95      0.95       450

    accuracy                           0.96      1151
   macro avg       0.96      0.96      0.96      1151
weighted avg       0.96      0.96      0.96      1151

total time elapsed:  2644.824376106262


### Dataset 3 - Bank Marketing

In [None]:
Marketing = pd.read_csv("bank-marketing.csv",sep=",")

X, y = Marketing.iloc[:,:-1], Marketing.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

<b> Model Auto-sklearn </b>

In [25]:
start_time = time.time()

for column in Marketing.columns:
    if str(Marketing[column].dtypes) =="object":
        Marketing[column] = Marketing[column].astype("category")
        
X, y = Marketing.iloc[:,:-1], Marketing.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

ds3_autosklearn_predictions = autosklearn_classification(X_train, y_train, X_test)

In [53]:
metric_calculator("auto-sklearn", "spambase", y_test, ds3_autosklearn_predictions)
print("total time elapsed: ",end_time - start_time)

Framework:  auto-sklearn
log loss: 0.3834920926
accuracy:  0.9181314946100806
Classification report
              precision    recall  f1-score   support

          no       0.94      0.97      0.95      9132
         yes       0.68      0.52      0.59      1165

    accuracy                           0.92     10297
   macro avg       0.81      0.75      0.77     10297
weighted avg       0.91      0.92      0.91     10297

total time elapsed:  3608.513154029846


<b> Model TPOT </b>

In [None]:
start_time = time.time()

Marketing.rename(columns={'y': 'class'}, inplace=True)

Preprocessing: 

In [8]:
for cat in ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome' ,'class']:
    print("Number of levels in category '{0}': \b {1:2.2f} ".format(cat, Marketing[cat].unique().size))

Number of levels in category 'job': 12.00 
Number of levels in category 'marital': 4.00 
Number of levels in category 'education': 8.00 
Number of levels in category 'default': 3.00 
Number of levels in category 'housing': 3.00 
Number of levels in category 'loan': 3.00 
Number of levels in category 'contact': 2.00 
Number of levels in category 'month': 10.00 
Number of levels in category 'day_of_week': 5.00 
Number of levels in category 'poutcome': 3.00 
Number of levels in category 'class': 2.00 


In [9]:
for cat in ['contact', 'poutcome','class', 'marital', 'default', 'housing', 'loan']:
    print("Levels for catgeory '{0}': {1}".format(cat, Marketing[cat].unique()))

Levels for catgeory 'contact': ['telephone' 'cellular']
Levels for catgeory 'poutcome': ['nonexistent' 'failure' 'success']
Levels for catgeory 'class': ['no' 'yes']
Levels for catgeory 'marital': ['married' 'single' 'divorced' 'unknown']
Levels for catgeory 'default': ['no' 'unknown' 'yes']
Levels for catgeory 'housing': ['no' 'yes' 'unknown']
Levels for catgeory 'loan': ['no' 'yes' 'unknown']


In [10]:
Marketing['marital'] = Marketing['marital'].map({'married':0,'single':1,'divorced':2,'unknown':3})
Marketing['default'] = Marketing['default'].map({'no':0,'yes':1,'unknown':2})
Marketing['housing'] = Marketing['housing'].map({'no':0,'yes':1,'unknown':2})
Marketing['loan'] = Marketing['loan'].map({'no':0,'yes':1,'unknown':2})
Marketing['contact'] = Marketing['contact'].map({'telephone':0,'cellular':1})
Marketing['poutcome'] = Marketing['poutcome'].map({'nonexistent':0,'failure':1,'success':2})
Marketing['class'] = Marketing['class'].map({'no':0,'yes':1})

In [11]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

job_Trans = mlb.fit_transform([{str(val)} for val in Marketing['job'].values])
education_Trans = mlb.fit_transform([{str(val)} for val in Marketing['education'].values])
month_Trans = mlb.fit_transform([{str(val)} for val in Marketing['month'].values])
day_of_week_Trans = mlb.fit_transform([{str(val)} for val in Marketing['day_of_week'].values])

In [18]:
marketing_new = Marketing.drop(['marital','default','housing','loan','contact','poutcome','class','job','education','month','day_of_week'], axis=1)
marketing_new = np.hstack((marketing_new.values, job_Trans, education_Trans, month_Trans, day_of_week_Trans))

In [26]:
X, y = marketing_new[:,:-1], marketing_new[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [32]:
ds3_tpot_predictions = tpot_classification(X_train, y_train, X_test)
end_time = time.time()

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=300.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: 1.0

Generation 2 - Current best internal CV score: 1.0

Generation 3 - Current best internal CV score: 1.0

Generation 4 - Current best internal CV score: 1.0

Generation 5 - Current best internal CV score: 1.0

Best pipeline: DecisionTreeClassifier(input_matrix, criterion=gini, max_depth=6, min_samples_leaf=20, min_samples_split=14)


In [35]:
metric_calculator("auto-sklearn", "spambase", y_test, ds3_tpot_predictions)
print("total time elapsed: ",end_time - start_time)

Framework:  auto-sklearn
log loss:  9.992007221626413e-16
accuracy:  1.0
Classification report
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      8233
         1.0       1.00      1.00      1.00      2064

    accuracy                           1.00     10297
   macro avg       1.00      1.00      1.00     10297
weighted avg       1.00      1.00      1.00     10297

total time elapsed:  6585.911735057831


<b> Model H2O </b>

In [36]:
start_time = time.time()
ds3_h2o_predictions = h2o_classifications(X_train, y_train, X_test, y.name)
end_time = time.time()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,8 hours 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.32.0.2
H2O_cluster_version_age:,"28 days, 2 hours and 39 minutes"
H2O_cluster_name:,H2O_from_python_jupyter_htc3nt
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,6.068 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
xgboost prediction progress: |████████████████████████████████████████████| 100%
various models tested:


model_id,mean_per_class_error,logloss,rmse,mse,training_time_ms,predict_time_per_row_ms
XGBoost_grid__1_AutoML_20201215_181738_model_14,0.627128,1.01138,0.535489,0.286748,903,0.027723
XGBoost_grid__1_AutoML_20201215_181738_model_10,0.629865,1.00557,0.53418,0.285348,898,0.030828
XGBoost_grid__1_AutoML_20201215_181738_model_12,0.631833,0.962851,0.535378,0.28663,1037,0.029354
XGBoost_grid__1_AutoML_20201215_181738_model_3,0.634195,1.00199,0.540761,0.292423,1314,0.041379
GBM_grid__1_AutoML_20201215_181738_model_9,0.634416,0.951743,0.528679,0.279501,919,0.12812
GBM_grid__1_AutoML_20201215_181738_model_6,0.635169,1.0008,0.541092,0.29278,1411,0.123829
GBM_2_AutoML_20201215_181738,0.636868,0.934192,0.541759,0.293503,815,0.120627
DRF_1_AutoML_20201215_181738,0.63918,1.37008,0.54246,0.294262,812,0.092236
XGBoost_grid__1_AutoML_20201215_181738_model_16,0.639245,0.970909,0.5375,0.288906,1129,0.032678
StackedEnsemble_BestOfFamily_AutoML_20201215_181738,0.641548,0.863639,0.5342,0.285369,20652,0.132155





In [40]:
metric_calculator("H2O", "bank-marketing",y_test,ds3_h2o_predictions )

print("total time elapsed: ",end_time - start_time)

Framework:  H2O
log loss:  0.3944518914
accuracy:  0.6473469387755102
Classification report
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.42      0.22      0.29        37
           5       0.66      0.64      0.65       368
           6       0.63      0.76      0.69       544
           7       0.69      0.55      0.61       233
           8       0.68      0.32      0.43        41

    accuracy                           0.65      1225
   macro avg       0.52      0.41      0.44      1225
weighted avg       0.65      0.65      0.64      1225

total time elapsed:  2644.824376106262
