In [15]:
import pandas as pd
import numpy as np
import sklearn

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston

In [17]:
from hypster import HyPSTERRegressor

In [18]:
SEED = 42

# Load Boston Housing Dataset

In [19]:
boston = load_boston()
X = pd.DataFrame(data=boston["data"], columns=boston["feature_names"])
y = boston["target"]

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=SEED)

# Fit HyPSTER On the Data

In [21]:
frameworks = ["sklearn", "xgboost", "lightgbm"]
model_types = ["tree_based", "linear"]

In [22]:
reg = HyPSTERRegressor(frameworks = frameworks,
                       model_types = model_types,
                       scoring="neg_mean_squared_error",
                       cv=5,
                       max_iter=1000,
                       n_jobs=-1,
                       random_state=SEED)

In [23]:
%%time
reg.fit(X_train, y_train, n_trials=100)

LightGBM Regressor Score: 27.76872
LightGBM Regressor Score: 30.18914
XGBoost Linear Regressor Score: 30.66527
LightGBM Regressor Score: 30.61751
XGBoost Tree-Based Regressor Score: 17.40761
LightGBM Regressor Score: 36.95402
LightGBM Regressor Score: 33.44547
LightGBM Regressor Score: 26.91017
XGBoost Tree-Based Regressor Score: 18.85306
XGBoost Tree-Based Regressor Score: 22.30604
XGBoost Tree-Based Regressor Score: 21.71341
XGBoost Tree-Based Regressor Score: 21.77465
XGBoost Tree-Based Regressor Score: 21.87899
XGBoost Tree-Based Regressor Score: 25.85964
XGBoost Tree-Based Regressor Score: 31.49441
XGBoost Tree-Based Regressor Score: 16.14125
XGBoost Tree-Based Regressor Score: 17.38471
XGBoost Tree-Based Regressor Score: 14.693
XGBoost Tree-Based Regressor Score: 14.58885
XGBoost Tree-Based Regressor Score: 15.16528
XGBoost Tree-Based Regressor Score: 17.83203
XGBoost Tree-Based Regressor Score: 14.51442
XGBoost Tree-Based Regressor Score: 12.72301
XGBoost Tree-Based Regressor Sc

# Review Results On Test

In [24]:
np.sqrt(reg.best_score_)

3.566932409718397

In [25]:
preds = reg.predict(X_test)
np.sqrt(sklearn.metrics.mean_squared_error(y_test, preds))

3.159247269783644