In [1]:
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GroupKFold, GridSearchCV

from sklearn.neural_network import MLPRegressor

# Import helperfunctions
from ML_functions import fun_load_settings, fun_load_data, fun_preprocessing, fun_fit_tuning
from ML_functions import fun_scaled_neg_MAPE, fun_tuning_results, fun_scores

# Set the default optimization problem for the case of manual executing the script (choose either "TSP" or "CVRP")
default_optimization_problem = "TSP"

# Call the function to define optimization_problem based on how the notebook is executed
# If the notebook is run by the script "main.ipynb", load optimization_problem from "settings.json". Otherwise use the default optimization problem from above
optimization_problem = fun_load_settings(default_optimization_problem)

# Load data
data, _ = fun_load_data(optimization_problem)

# Do the train test split during the preprocessing
X_train, X_test, y_train, y_test, train_data = fun_preprocessing(data, train_size=0.8)

The notebook was executed by another notebook. :)
Optimization problem: 'TSP'


# **Neural Network - Multi Layer Perceptron**
**Further tested parameters for the TSP:**
- "standardscaler": [StandardScaler(), MinMaxScaler()] -> StandardScaler() performed better
- "mlpregressor__hidden_layer_sizes": [(256, 128), (128, 64, 32), (256, 128, 64), (100, 100, 100)], <br>
-> (256, 128, 64) performed best; to tune faster it's done with (64, 32, 16)
- "mlpregressor__learning_rate": ["constant", "adaptive"] -> "adaptive" performed better
- "mlpregressor__warm_start": [True, False] -> Had no impact on the performance
- higher values of the parameter "learning_rate_init" may cause NaN scores

**Further tested parameters for the CVRP:**
- CVRP: simply increasing the number of neurons in the hidden layers did not work out. Therefore, tuning and test score computation is done with <br> 
"mlpregressor__hidden_layer_sizes": (64, 32, 16) <br>

In [2]:
# Define a pipeline
pipe = make_pipeline(StandardScaler(), 
                     MLPRegressor(hidden_layer_sizes=(64, 32, 16), activation="relu", learning_rate="adaptive", 
                                  max_iter=1000, random_state=42))

# Define parameter grid
if (optimization_problem == "TSP"):
    param_grid = {"mlpregressor__solver": ["sgd", "adam"],
                  "mlpregressor__alpha": [0.0001, 0.001, 0.01, 0.1, 1],
                  "mlpregressor__batch_size": [32, 64, 128], # "auto" did not perform
                  "mlpregressor__learning_rate_init": [0.0001, 0.001], # 0.01 did not perform
                  "mlpregressor__early_stopping": [True, False]}

elif (optimization_problem == "CVRP"):
    param_grid = {"mlpregressor__solver": ["sgd", "adam"],
                  "mlpregressor__alpha": [0.001, 0.01, 0.1, 1], 
                  "mlpregressor__batch_size": [32, 64, 128], # "auto" did not perform
                  "mlpregressor__learning_rate_init": [0.0001, 0.0005], # 0.001 caused NaN scores
                  "mlpregressor__early_stopping": [False]} # True did not perform

# Grid search
grid_search = GridSearchCV(estimator=pipe, param_grid=param_grid, 
                           cv=GroupKFold(n_splits=3).split(X_train, y_train, groups=X_train.index.get_level_values(level="Instance ID")), 
                           scoring=fun_scaled_neg_MAPE, refit=False, verbose=True, n_jobs=-1)
tuning_details = fun_fit_tuning(grid_search, X_train, y_train, file_name=f"{optimization_problem}_NN")

# Estimate model performance with cross validation on the train set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(grid_search, X_train, y_train)
model_results_dict.update(tuning_details)

# View grid search CV scores of all parameter combinations
results_df = fun_tuning_results(grid_search, param_grid)

Fitting 3 folds for each of 120 candidates, totalling 360 fits


{'Search type': 'GridSearchCV',
 'Parameter combinations': 120,
 'Total tuning time': '1h, 37m',
 'Total tuning fit time': '6h, 26m',
 'Total tuning prediction time': '16s'}

CV MAPE (scaled) train data: 3.7600000000000002 %


**Best model / parameter combination:**

{'mlpregressor__alpha': 0.1,
 'mlpregressor__batch_size': 32,
 'mlpregressor__early_stopping': False,
 'mlpregressor__learning_rate_init': 0.001,
 'mlpregressor__solver': 'sgd'}

**Cross validation scores of different parameter combinations:**

Unnamed: 0,solver,alpha,batch_size,learning_rate_init,early_stopping,mean_test_score,converted_mean_fit_time
0,sgd,0.1,32,0.001,False,-0.037593,"6m, 10s"
1,sgd,0.01,32,0.001,False,-0.038869,"6m, 8s"
2,sgd,0.1,64,0.001,False,-0.039058,"4m, 20s"
3,sgd,0.001,32,0.001,False,-0.039243,"4m, 44s"
4,sgd,0.0001,32,0.001,False,-0.039617,"4m, 48s"
5,sgd,0.0001,64,0.001,False,-0.040276,"3m, 3s"
6,sgd,0.01,64,0.001,False,-0.040297,"3m, 12s"
7,sgd,0.001,128,0.001,False,-0.040464,"2m, 58s"
8,sgd,0.1,32,0.0001,False,-0.040498,"6m, 37s"
9,sgd,0.01,32,0.0001,False,-0.040656,"8m, 21s"


Unnamed: 0,solver,alpha,batch_size,learning_rate_init,early_stopping,mean_test_score,converted_mean_fit_time
60,adam,0.001,128,0.0001,False,-0.043999,"6m, 19s"
61,sgd,0.01,64,0.001,True,-0.044011,48s
62,adam,0.1,32,0.0001,False,-0.044018,"9m, 31s"
63,sgd,1.0,32,0.0001,False,-0.044046,"4m, 39s"
64,adam,1.0,128,0.0001,False,-0.044245,"4m, 56s"
65,sgd,0.0001,128,0.001,True,-0.044316,38s
66,sgd,0.01,128,0.001,True,-0.044727,33s
67,sgd,0.1,128,0.001,True,-0.044745,36s
68,sgd,1.0,64,0.001,True,-0.044788,45s
69,sgd,1.0,128,0.001,True,-0.044867,34s
