# Installation

In [None]:
# ! apt-get update
! apt-get install -y --allow-unauthenticated swig

In [None]:
! pip install pyrfr
! pip install Cython
! pip install auto-sklearn

# Hello World

In [17]:
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import autosklearn.regression
import numpy as np
from joblib import dump, load
import warnings
warnings.filterwarnings('ignore')


def main():
    X, y = sklearn.datasets.load_boston(return_X_y=True)
    feature_types = (['numerical'] * 3) + ['categorical'] + (['numerical'] * 9)
    X_train, X_test, y_train, y_test = \
        sklearn.model_selection.train_test_split(X, y, random_state=1)

    automl = autosklearn.regression.AutoSklearnRegressor(
        time_left_for_this_task=36,
        per_run_time_limit=30
    )
    automl.fit(X_train, y_train, dataset_name='boston', feat_type=feature_types)

    print(automl.show_models())
    predictions = automl.predict(X_test)
    print("R2 score:", sklearn.metrics.r2_score(y_test, predictions))
    print("MAE score:", sklearn.metrics.mean_absolute_error(y_test, predictions))
    print("RMSE score:", np.sqrt(sklearn.metrics.mean_squared_error(y_test, predictions)))
    dump(automl, 'model.joblib')

In [18]:
main()

[(1.000000, SimpleRegressionPipeline({'regressor:random_forest:min_weight_fraction_leaf': 0.0, 'regressor:random_forest:min_samples_leaf': 1, 'regressor:random_forest:min_impurity_decrease': 0.0, 'regressor:__choice__': 'random_forest', 'regressor:random_forest:max_depth': 'None', 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.01, 'regressor:random_forest:bootstrap': 'True', 'regressor:random_forest:max_leaf_nodes': 'None', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'standardize', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'regressor:random_forest:n_estimators': 100, 'regressor:random_forest:criterion': 'mse', 'categorical_encoding:__choice__': 'one_hot_encoding', 'regressor:random_forest:min_samples_split': 2, 'regressor:random_forest:max_features': 1.0, 'imputation:strategy': 'mean'},
dataset_properties={
  'sparse': False,
  'multiclass': False,
  'multilabel': False,
  'task': 4,
  'target_type': 'regression',
  's

In [11]:
def test_joblib():
    X, y = sklearn.datasets.load_boston(return_X_y=True)
    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=1)
    automl = load('model.joblib')
    print(automl.predict(X_test))
    return automl

In [12]:
model = test_joblib()

[31.93320679 28.00055138 18.16911995 20.18052711 21.23598923 19.53523979
 31.19931969 18.1902433  20.10526726 24.37034248 27.94699951 31.7838662
 20.57941277 20.52455956 20.06424049 19.82341904 12.09422947 40.75170822
 24.75791977 15.43895897 19.31015961 17.56243286 25.37583931 23.86864944
 27.35204559 10.21085278 14.18272331 19.39023941 36.63186737 12.57532921
 25.74746857 17.80457748 45.99836426 15.01874821 22.6590007  20.6357103
 14.84772945 31.92104019 13.0055241  19.09192055 23.73333931 24.48852051
 26.4352578  15.69578873 15.13011959 10.30772198 48.27728729 13.78410984
 20.24770798 18.50683144 20.83745392 20.68928001 25.31775703 20.37415504
 12.04224358 24.35852615 12.47096027 25.67648384 18.50561378 40.53051559
 14.44738232 27.54619972 13.44244015 15.35492653 17.37219467 34.65542892
 41.62956131 24.72120018 20.48512077 19.88568047 23.88447968  8.92400002
 18.23648064 19.80686516 19.58072052 19.96345398 44.3084726  23.59571236
 29.075784   34.99973145 17.33505981 20.32866371 32.7