In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, KFold
import autosklearn.regression
import autosklearn.metrics

In [10]:
# change the file name to import different train & test sets
train = pd.read_csv('train_shuffled_16features.csv')
test = pd.read_csv('test_shuffled_16features.csv')

In [11]:
# split test/train for train data
X = train.iloc[:, :-1]
y = train['FUEL_CONSUMPTION']

# normalize the data
norm = MinMaxScaler().fit(train.iloc[:, :-1])
X = norm.transform(X)

# split train and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=42)

In [12]:
X_test = test.iloc[:, :-1]
X_test = norm.transform(X_test)
y_test = test['FUEL_CONSUMPTION']

In [13]:
# change the regressor's name to run a 1h automl training accordingly 
automl = autosklearn.regression.AutoSklearnRegressor(seed=42,
                                                     time_left_for_this_task=3600,
                                                     per_run_time_limit=180,
                                                     metric=autosklearn.metrics.root_mean_squared_error,
                                                     include={'regressor':["adaboost"]}
                                                     )

In [14]:
automl.fit(X_train, y_train)

In [15]:
x = automl.predict(X_valid)
print("automl RMSE", np.sqrt(metrics.mean_squared_error(y_true=y_valid, y_pred= x)))
print("automl MAPE", metrics.mean_absolute_percentage_error(y_true=y_valid, y_pred= x))

In [16]:
automl.leaderboard()

In [17]:
automl.show_models()