In [8]:
import autosklearn.classification
import pandas as pd
import numpy as np
import sklearn.model_selection
import sklearn.metrics
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml
import h2o
from h2o.automl import H2OAutoML
from tpot import TPOTClassifier

In [9]:
def autosklearn_classification(X_train, y_train, X_test):
    automl = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=180,
                                                                per_run_time_limit=180)
    automl.fit(X_train, y_train)
    predictions = automl.predict(X_test)
    return predictions
    

In [10]:
def topt_classification(X_train, y_train, X_test):
    tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=1)
    tpot.fit(X_train, y_train)
    predictions = tpot_ds2.predict(X_test)
    return predictions


In [22]:
def h2o_classifications(X_train, y_train, X_test, target):
    h2o.init()
    aml = H2OAutoML(max_runtime_secs = 1800)
    
    train = pd.concat([X_train, y_train], axis=1)
    train_hf = h2o.H2OFrame(train)
    train_hf[target] = train_hf[target].asfactor()
    
    aml.train(y = target, training_frame = train_hf)
    response = aml.predict(h2o.H2OFrame(X_test))
    predictions = np.array(response[:,2].as_data_frame())
    
    lb = h2o.automl.get_leaderboard(aml, extra_columns = 'ALL')
    print(lb)
    return predictions

In [13]:
def metric_calculator(framework, dataset, predictions, y_test):
    
    ll = log_loss(y_test, predictions)
    accuracy = sklearn.metrics.accuracy_score(y_test, y_hat)
    
    with open('results.csv', 'a') as fout:
            fout.write('{0}, {1}, {2}, {3}'.format(framework, dataset, accuracy, ll))
            

### Dataset 1

In [14]:

ds1_df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv",sep=";")

X, y = ds1_df.iloc[:,:-1], ds1_df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
ds1_autosklearn_predictions = autosklearn_classification(X_train, y_train, X_test)

In [None]:
metric_calculator("auto-sklearn", "wine quality", ds1_autosklearn_predictions, y_test)

In [None]:
ds1_tpot_predictions = tpot_classification(X_train, y_train, X_test)

In [None]:
metric_calculator("tpot", "wine quality", ds1_tpot_predictions, y_test)

In [None]:
ds1_h2o_predictions = h2o_classifications(X_train, y_train, X_test, ds1_df.columns[-1])

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,5 hours 12 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.32.0.2
H2O_cluster_version_age:,27 days
H2O_cluster_name:,H2O_from_python_jupyter_rh2145
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.453 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |███████████████████████████████████████████████████████

In [None]:
metric_calculator("H2O", "wine quality", ds1_h2o_predictions, y_test)