In [2]:
import pandas as pd
import numpy as np

In [11]:
# import dataset
import openml

dataset = openml.datasets.get_dataset(44061)
X, y, _, attribute_names = dataset.get_data(target=dataset.default_target_attribute)



In [12]:
X = X.astype(float)
y = y.astype(float)

In [13]:
columns_to_encode = ['X3', 'X4', 'X6']
X = pd.get_dummies(X, columns=columns_to_encode)

In [14]:
# split dataset, split again to reduce number of samples used in tuning
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_values = X_train.head(400)
y_values = y_train.head(400)
print(X.shape)

(4209, 379)


In [15]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPRegressor

In [6]:
param_grid = {'hidden_layer_sizes': [(379,150,379,200), (450,379,150), (379, 650, 379, 379), (450, 150, 379, 150), (379, 379, 379), (150, 650, 379), (450, 150, 379, 150, 200)],
              'activation': ['relu','tanh','logistic'],
              'alpha': [0.0001, 0.001, 0.01],
              'solver': ['adam'],
              'max_iter': [500]}

In [7]:
nn = MLPRegressor()
random_grid = RandomizedSearchCV(nn, param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
nn_random = random_grid.fit(X_values, y_values)



In [8]:
# show best param combination
nn_random.best_params_

{'solver': 'adam',
 'max_iter': 500,
 'hidden_layer_sizes': (379, 150, 379, 200),
 'alpha': 0.0001,
 'activation': 'tanh'}

In [9]:
def get_accuracy(model, X_test, y_test):
    predictions = model.predict(X_test)
    errors = abs(predictions - y_test)
    avg_perf_error = 100 * np.mean(errors / y_test)
    accuracy = 100 - avg_perf_error

    print('Average Error: {:0.4f}'.format(np.mean(errors)))
    print('Accuracy = {:0.2f}%.'.format(accuracy))
    return accuracy

In [82]:
tuned = nn_random.best_estimator_
tuned_accuracy = get_accuracy(tuned, X_test.apply(pd.to_numeric), y_test)

print('Improvement: {:0.2f}%.'.format( 100 * (tuned_accuracy - orig_accuracy) / orig_accuracy))



Average Error: 8.9942
Accuracy = 91.31%.
Average Error: 29.9038
Accuracy = 71.53%.
Improvement: -21.66%.


In [16]:
from sklearn.model_selection import GridSearchCV



In [89]:
grid = {'solver': ['adam'],
        'max_iter': [500],
        'hidden_layer_sizes': [(100, 150, 100), (50, 100, 50, 100, 50), (100, 70, 100, 50), (100, 60), (50, 100), (100, 50, 100)],
        'alpha': [0.0001, 0.001],
        'activation': ['relu', 'logistic', 'tanh']}

nn = MLPRegressor()
grid = GridSearchCV(nn, grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
nn_grid = grid.fit(X_values, y_values)

 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


In [90]:
nn_grid.best_params_

{'activation': 'relu',
 'alpha': 0.0001,
 'hidden_layer_sizes': (100, 150, 100),
 'max_iter': 500,
 'solver': 'adam'}

In [92]:
best_grid_nn = nn_grid.best_estimator_
grid_accuracy = get_accuracy(best_grid_nn, X_test.apply(pd.to_numeric), y_test)
grid_accuracy

Average Error: 9.6977
Accuracy = 90.68%.


90.67773314869325

In [None]:
# {'activation': 'relu',
#  'alpha': 0.0001,
#  'hidden_layer_sizes': (100, 150, 100),
#  'max_iter': 500,
#  'solver': 'adam'}