In [1]:
import os
import time
import pandas as pd
import numpy as np
from scipy.stats import uniform
import matplotlib.pyplot as plt
import matplotlib_inline
import seaborn as sns
import mglearn

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, GridSearchCV, RandomizedSearchCV

from sklearn.neural_network import MLPRegressor

# import helperfunctions
from helperfunctions import fun_load_file, fun_preprocessing, fun_split_X_y
from helperfunctions import fun_convert_time, fun_fit_gridsearch_time
from helperfunctions import fun_train_score, fun_test_score, fun_best_model
from helperfunctions import plot_feature_importances

# start time count and load data
start_script = time.time()
data = fun_load_file(path='..\\01_data\\01_TSP', name='combined_train_instances_dennis.xlsx')
train_data = fun_preprocessing(data)
X, y = fun_split_X_y(train_data)

# create a train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.99, random_state=0)

# save number of features and train sizes
n_features = X_train.shape[1]
train_size = f'{int(np.round(100 * len(X_train)/len(X)))} %'

# **Neural Networks - Multi Layer Perceptron**

In [3]:
# compare the scaling methods
best_MAPE = 100
for i in [StandardScaler(), MinMaxScaler(), RobustScaler()]:

    # scale the train set first
    scaler = i
    X_train_scaled = scaler.fit_transform(X_train)

    # create model and fit it on train set (default parameters)
    mlp = MLPRegressor(hidden_layer_sizes=(100,), alpha=0.0001,
                       activation='relu', solver='adam', max_iter=1000, random_state=0)

    # estimate model performance with cross validation on the train set (scoring: MAPE and RMSE)
    print('Method:', i)
    MAPE, RMSE, computation_time = fun_train_score(mlp, X_train_scaled, y_train, cv=3, return_results=True)

    # save best result
    if MAPE < best_MAPE:
        best_MAPE = MAPE
        best_RMSE = RMSE
        best_computation_time = computation_time

# save results to dictionary
results_dict = {}
results_dict['Neural Network'] = {'MAPE': MAPE, 'RMSE': RMSE, 'CV computation time': best_computation_time, 'Train size': train_size}

Method: StandardScaler()
  CV MAPE train data:  16.5366 %
  CV RMSE train data:  3.6718
  CV computation time: 4 sec
Method: MinMaxScaler()
  CV MAPE train data:  11.7943 %
  CV RMSE train data:  2.5538
  CV computation time: 3 sec
Method: RobustScaler()
  CV MAPE train data:  14.3652 %
  CV RMSE train data:  3.0809
  CV computation time: 3 sec


# **Compare Results**

In [4]:
display(pd.DataFrame(results_dict).sort_values(by='MAPE', axis=1))
print('Total script computation time:', fun_convert_time(start=start_script, end=time.time()))

Unnamed: 0,Neural Network
CV computation time,3 sec
MAPE,14.3652
RMSE,3.0809
Train size,1 %


Total script computation time: 28 min, 41 sec
