In [1]:
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GroupKFold, GridSearchCV

from sklearn.neural_network import MLPRegressor

# Import helperfunctions
from ML_functions import fun_load_settings, fun_load_data, fun_preprocessing, fun_fit_tuning
from ML_functions import fun_scaled_neg_MAPE, fun_tuning_results, fun_scores

# Set the default optimization problem for the case of manual executing the script (choose either "TSP" or "CVRP")
default_optimization_problem = "CVRP"

# Call the function to define optimization_problem based on how the notebook is executed
# If the notebook is run by the script "main.ipynb", load optimization_problem from "settings.json". Otherwise use the default optimization problem from above
optimization_problem = fun_load_settings(default_optimization_problem)

# Load data
data, _ = fun_load_data(optimization_problem)

# Do the train test split during the preprocessing
X_train, X_test, y_train, y_test, train_data = fun_preprocessing(data, train_size=0.8)

The notebook is executed directly. :)
Optimization Problem: 'CVRP'


# **Neural Network - Multi Layer Perceptron**
**Further tested parameters for the TSP:**
- "standardscaler": [StandardScaler(), MinMaxScaler()] -> StandardScaler() performed better
- "mlpregressor__hidden_layer_sizes": [(256, 128), (128, 64, 32), (256, 128, 64), (100, 100, 100)], <br>
-> (256, 128, 64) performed best; to tune faster it's done with (64, 32, 16)
- "mlpregressor__learning_rate": ["constant", "adaptive"] -> "adaptive" performed better
- "mlpregressor__warm_start": [True, False] -> Had no impact on the performance
- higher values of the parameter "learning_rate_init" may cause NaN scores

**Further tested parameters for the CVRP:**
- CVRP: simply increasing the number of neurons in the hidden layers did not work out. Therefore, tuning and test score computation is done with <br> 
"mlpregressor__hidden_layer_sizes": (128, 64, 32) <br>
-> set parameter early_stopping to False to reduce the number of possible parameter combinations

In [2]:
# Define the parameter hidden_layer_sizes depending on the problem setting
if (optimization_problem == "TSP"): hls = (64, 32, 16)
elif (optimization_problem == "CVRP"): hls = (128, 64, 32)
print("Hidden layer sizes:", hls)

# Define a pipeline
pipe = make_pipeline(StandardScaler(), 
                     MLPRegressor(hidden_layer_sizes=hls, activation="relu", learning_rate="adaptive",  
                                  max_iter=1000, random_state=42))

# Define parameter grid
if (optimization_problem == "TSP"):
    param_grid = {"mlpregressor__solver": ["sgd", "adam"],
                  "mlpregressor__alpha": [0.0001, 0.001, 0.01, 0.1, 1],
                  "mlpregressor__batch_size": [32, 64, 128], # "auto" did not perform
                  "mlpregressor__learning_rate_init": [0.0001, 0.001], # 0.01 did not perform
                  "mlpregressor__early_stopping": [True, False]}

elif (optimization_problem == "CVRP"):
    param_grid = {"mlpregressor__solver": ["sgd", "adam"],
                  "mlpregressor__alpha": [0.001, 0.01, 0.1, 1], 
                  "mlpregressor__batch_size": [32, 64, 128], # "auto" did not perform
                  "mlpregressor__learning_rate_init": [0.0001, 0.0005], # 0.001 caused NaN scores
                  "mlpregressor__early_stopping": [False]} # True did not perform

# Grid search
grid_search = GridSearchCV(estimator=pipe, param_grid=param_grid, 
                           cv=GroupKFold(n_splits=3).split(X_train, y_train, groups=X_train["Instance ID"]), 
                           scoring=fun_scaled_neg_MAPE, refit=False, verbose=True, n_jobs=-1)
tuning_details = fun_fit_tuning(grid_search, X_train, y_train, file_name=optimization_problem + "_NN")

# Estimate model performance with cross validation on the train set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(grid_search, X_train, y_train)
model_results_dict.update(tuning_details)

# View grid search CV scores of all parameter combinations
results_df = fun_tuning_results(grid_search, param_grid)

Hidden layer sizes: (64, 32, 16)
Fitting 3 folds for each of 120 candidates, totalling 360 fits


{'Parameter combinations': 120,
 'Total tuning time': '2h, 57m',
 'Grid search total fit time': '11h, 42m',
 'Grid search total prediction time': '24s'}

CV MAPE (scaled) train data:  3.7393 %


**Best model / parameter combination:**

{'mlpregressor__alpha': 0.1,
 'mlpregressor__batch_size': 32,
 'mlpregressor__early_stopping': False,
 'mlpregressor__learning_rate_init': 0.001,
 'mlpregressor__solver': 'sgd'}

**Cross validation scores of different parameter combinations:**

Unnamed: 0,alpha,solver,batch_size,learning_rate_init,early_stopping,mean_test_score,converted_mean_fit_time
0,0.1,sgd,32,0.001,False,-0.037393,"10m, 38s"
1,0.1,sgd,64,0.001,False,-0.038645,"6m, 26s"
2,0.01,sgd,32,0.001,False,-0.038702,"9m, 40s"
3,0.001,sgd,32,0.001,False,-0.039165,"9m, 8s"
4,0.001,sgd,64,0.001,False,-0.039584,"5m, 20s"
5,0.0001,sgd,32,0.001,False,-0.039584,"8m, 27s"
6,0.0001,sgd,64,0.001,False,-0.039668,"5m, 56s"
7,0.01,sgd,64,0.001,False,-0.039676,"5m, 8s"
8,0.1,sgd,32,0.0001,False,-0.039938,"11m, 48s"
9,0.1,sgd,32,0.001,True,-0.039971,"2m, 18s"


Unnamed: 0,alpha,solver,batch_size,learning_rate_init,early_stopping,mean_test_score,converted_mean_fit_time
60,0.0001,sgd,64,0.001,True,-0.043603,"1m, 21s"
61,1.0,adam,64,0.001,False,-0.043703,"3m, 34s"
62,1.0,sgd,32,0.0001,False,-0.043755,"7m, 51s"
63,0.001,adam,128,0.0001,False,-0.044077,"13m, 1s"
64,0.0001,adam,128,0.0001,False,-0.044204,"14m, 1s"
65,0.001,sgd,128,0.001,True,-0.044369,"1m, 5s"
66,1.0,sgd,64,0.001,True,-0.044649,"1m, 18s"
67,0.1,sgd,128,0.001,True,-0.044866,59s
68,0.01,adam,128,0.0001,False,-0.044893,"13m, 58s"
69,0.0001,sgd,128,0.001,True,-0.04501,57s
