In [1]:
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GroupKFold, GridSearchCV

from sklearn.neural_network import MLPRegressor

# Import helperfunctions
from ML_functions import fun_load_settings, fun_load_data, fun_preprocessing, fun_fit_tuning
from ML_functions import fun_scaled_neg_MAPE, fun_tuning_results, fun_scores

# Set the default optimization problem for the case of manual executing the script (choose either "TSP" or "CVRP")
default_optimization_problem = "TSP"

# Call the function to define optimization_problem based on how the notebook is executed
# If the notebook is run by the script "main.ipynb", load optimization_problem from "settings.json". Otherwise use the default optimization problem from above
optimization_problem = fun_load_settings(default_optimization_problem)

# Load data
data, _ = fun_load_data(optimization_problem)

# Do the train test split during the preprocessing
X_train, X_test, y_train, y_test, train_data = fun_preprocessing(data, train_size=0.8)

The notebook is executed directly. :)
Optimization problem: 'TSP'


In [None]:
X_train

Unnamed: 0_level_0,Unnamed: 1_level_0,Number Customers,X Ratio,Y Ratio,X Depot,Y Depot,Depot Distance Ratio,Closest Customer Distance (CCD) Ratio,2nd CCD Ratio,3rd CCD Ratio,4th CCD Ratio,...,X Max,Y Max,X Min,Y Min,Correlation,Skewness X,Skewness Y,Savings Ratio,Marginal Cost Ratio,Total Cost
Index,Instance ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,1,6,0.259951,1.437562,2.380844,66.016752,0.320244,1.262503,1.068225,1.045933,1.274274,...,83.228495,58.267699,11.757432,0.942942,-0.026968,0.267574,-0.668709,0.022796,0.021584,227.291186
1,1,6,1.840139,1.174308,2.380844,66.016752,1.517015,1.419479,1.246244,1.253743,1.131794,...,83.228495,58.267699,11.757432,0.942942,-0.026968,0.267574,-0.668709,2.161204,2.365312,227.291186
2,1,6,0.730341,0.844653,2.380844,66.016752,0.851033,0.386302,0.876273,0.782974,0.736554,...,83.228495,58.267699,11.757432,0.942942,-0.026968,0.267574,-0.668709,0.186036,0.176144,227.291186
3,1,6,0.931506,0.869513,2.380844,66.016752,0.954254,0.386302,0.880063,0.735429,0.758701,...,83.228495,58.267699,11.757432,0.942942,-0.026968,0.267574,-0.668709,0.514850,0.487476,227.291186
4,1,6,1.196190,1.647306,2.380844,66.016752,0.939229,1.267963,0.985226,0.860173,0.895617,...,83.228495,58.267699,11.757432,0.942942,-0.026968,0.267574,-0.668709,0.101425,0.096032,227.291186
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87195,8800,14,0.197630,0.811400,38.864948,59.493924,0.752539,1.616903,1.515414,1.352940,1.396064,...,97.790503,96.315364,5.582894,8.787735,-0.073397,-0.368805,-0.649954,0.221144,0.734886,322.725634
87196,8800,14,1.379250,0.994184,38.864948,59.493924,0.979788,0.334684,0.341777,0.577589,0.540410,...,97.790503,96.315364,5.582894,8.787735,-0.073397,-0.368805,-0.649954,0.818470,0.713184,322.725634
87197,8800,14,1.709226,1.028454,38.864948,59.493924,1.436241,0.725161,0.591611,0.520134,0.595205,...,97.790503,96.315364,5.582894,8.787735,-0.073397,-0.368805,-0.649954,1.571307,1.369178,322.725634
87198,8800,14,1.520505,1.000961,38.864948,59.493924,1.175314,0.537908,0.460755,0.403243,0.426675,...,97.790503,96.315364,5.582894,8.787735,-0.073397,-0.368805,-0.649954,0.760859,0.662984,322.725634


# **Neural Network - Multi Layer Perceptron**
**Further tested parameters for the TSP:**
- "standardscaler": [StandardScaler(), MinMaxScaler()] -> StandardScaler() performed better
- "mlpregressor__hidden_layer_sizes": [(256, 128), (128, 64, 32), (256, 128, 64), (100, 100, 100)], <br>
-> (256, 128, 64) performed best; to tune faster it's done with (64, 32, 16)
- "mlpregressor__learning_rate": ["constant", "adaptive"] -> "adaptive" performed better
- "mlpregressor__warm_start": [True, False] -> Had no impact on the performance
- higher values of the parameter "learning_rate_init" may cause NaN scores

**Further tested parameters for the CVRP:**
- CVRP: simply increasing the number of neurons in the hidden layers did not work out. Therefore, tuning and test score computation is done with <br> 
"mlpregressor__hidden_layer_sizes": (64, 32, 16) <br>

In [None]:
# Define a pipeline
pipe = make_pipeline(StandardScaler(), 
                     MLPRegressor(hidden_layer_sizes=(64, 32, 16), activation="relu", learning_rate="adaptive", 
                                  max_iter=1000, random_state=42))

# Define parameter grid
if (optimization_problem == "TSP"):
    param_grid = {"mlpregressor__solver": ["sgd", "adam"],
                  "mlpregressor__alpha": [0.0001, 0.001, 0.01, 0.1, 1],
                  "mlpregressor__batch_size": [32, 64, 128], # "auto" did not perform
                  "mlpregressor__learning_rate_init": [0.0001, 0.001], # 0.01 did not perform
                  "mlpregressor__early_stopping": [True, False]}

elif (optimization_problem == "CVRP"):
    param_grid = {"mlpregressor__solver": ["sgd", "adam"],
                  "mlpregressor__alpha": [0.001, 0.01, 0.1, 1], 
                  "mlpregressor__batch_size": [32, 64, 128], # "auto" did not perform
                  "mlpregressor__learning_rate_init": [0.0001, 0.0005], # 0.001 caused NaN scores
                  "mlpregressor__early_stopping": [False]} # True did not perform

# Grid search
grid_search = GridSearchCV(estimator=pipe, param_grid=param_grid, 
                           cv=GroupKFold(n_splits=3).split(X_train, y_train, groups=X_train.index.get_level_values(level="Instance ID")), 
                           scoring=fun_scaled_neg_MAPE, refit=False, verbose=True, n_jobs=-1)
tuning_details = fun_fit_tuning(grid_search, X_train, y_train, file_name=optimization_problem + "_NN")

# Estimate model performance with cross validation on the train set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(grid_search, X_train, y_train)
model_results_dict.update(tuning_details)

# View grid search CV scores of all parameter combinations
results_df = fun_tuning_results(grid_search, param_grid)

Fitting 3 folds for each of 120 candidates, totalling 360 fits


{'Search type': 'GridSearchCV',
 'Parameter combinations': 120,
 'Total tuning time': '1h, 36m',
 'Total tuning fit time': '6h, 20m',
 'Total tuning prediction time': '14s'}

CV MAPE (scaled) train data: 3.7293 %


**Best model / parameter combination:**

{'mlpregressor__alpha': 0.1,
 'mlpregressor__batch_size': 32,
 'mlpregressor__early_stopping': False,
 'mlpregressor__learning_rate_init': 0.001,
 'mlpregressor__solver': 'sgd'}

**Cross validation scores of different parameter combinations:**

Unnamed: 0,solver,alpha,batch_size,learning_rate_init,early_stopping,mean_test_score,converted_mean_fit_time
0,sgd,0.1,32,0.001,False,-0.037293,"6m, 3s"
1,sgd,0.1,64,0.001,False,-0.038691,"4m, 30s"
2,sgd,0.01,32,0.001,False,-0.038822,"6m, 45s"
3,sgd,0.001,32,0.001,False,-0.038841,"5m, 9s"
4,sgd,0.0001,32,0.001,False,-0.038852,"4m, 52s"
5,sgd,0.1,32,0.001,True,-0.038928,"1m, 35s"
6,sgd,0.1,32,0.0001,False,-0.039743,"7m, 5s"
7,sgd,1.0,64,0.001,False,-0.039919,"3m, 24s"
8,sgd,0.1,128,0.001,False,-0.040047,"2m, 48s"
9,sgd,1.0,128,0.001,False,-0.040211,"2m, 43s"


Unnamed: 0,solver,alpha,batch_size,learning_rate_init,early_stopping,mean_test_score,converted_mean_fit_time
60,adam,0.001,128,0.001,False,-0.044044,"2m, 7s"
61,adam,0.01,128,0.0001,False,-0.044229,"6m, 38s"
62,adam,0.001,64,0.0001,False,-0.044237,"7m, 32s"
63,sgd,1.0,64,0.001,True,-0.044399,51s
64,sgd,0.1,128,0.001,True,-0.04453,44s
65,sgd,0.001,128,0.001,True,-0.044574,41s
66,adam,0.0001,128,0.001,False,-0.044619,"1m, 33s"
67,adam,1.0,32,0.0001,False,-0.04475,"8m, 8s"
68,sgd,0.1,32,0.0001,True,-0.044957,"1m, 36s"
69,sgd,0.01,128,0.001,True,-0.045005,43s


In [None]:
from ML_functions import fun_load_best_params

# Load best parameters of the model
best_params = fun_load_best_params(optimization_problem, model_abbreviation="NN")

# Create pipeline
pipe = make_pipeline(StandardScaler(), 
                     MLPRegressor(hidden_layer_sizes=(256, 128, 64), activation="relu", learning_rate="adaptive", 
                                  max_iter=1000, random_state=42))
pipe.set_params(**best_params)

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict_all = fun_scores(pipe, X_train, y_train, X_test, y_test, compute_test_scores=True)

{'mlpregressor__alpha': 0.1,
 'mlpregressor__batch_size': 32,
 'mlpregressor__early_stopping': False,
 'mlpregressor__learning_rate_init': 0.001,
 'mlpregressor__solver': 'sgd'}

CV MAPE (scaled) train data: 3.4978000000000002 %
CV RMSE (scaled) train data: 0.9602
CV computation time: 12m, 34s

MAPE (scaled) test data: 3.2965 %
RMSE (scaled) test data: 0.8945
Model fit time: 21m, 32s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,2.2002,2.4206,2.6936,2.8096,3.0934,3.1978,3.5462,3.9624,4.252,3.2965
RMSE,0.8208,0.8352,0.8629,0.8284,0.8542,0.8708,0.8891,0.9865,0.9701,0.8945


**Exclude feature categories**

In [None]:
train_data.columns

Index(['Number Customers', 'X Ratio', 'Y Ratio', 'X Depot', 'Y Depot',
       'Depot Distance Ratio', 'Closest Customer Distance (CCD) Ratio',
       '2nd CCD Ratio', '3rd CCD Ratio', '4th CCD Ratio', '5th CCD Ratio',
       '6th CCD Ratio', '7th CCD Ratio', '8th CCD Ratio',
       'Mean Distance To Other Customers Ratio',
       'Gravity Center Distance Ratio', 'Cluster', 'Number Clusters',
       'Cluster Size', 'X Centroid', 'Y Centroid',
       'Centroid Distance To Depot Ratio', 'Cluster Area Ratio',
       'Cluster Density Ratio', 'X Std', 'Y Std', 'X Max', 'Y Max', 'X Min',
       'Y Min', 'Correlation', 'Skewness X', 'Skewness Y', 'Savings Ratio',
       'Marginal Cost Ratio', 'Total Cost', 'Shapley Value'],
      dtype='object')

In [None]:
# View all features
display(train_data.columns)

# Get all features categories with their features
instance_features = ["Number Customers", "X Ratio", "Y Ratio", "X Depot", "Y Depot"]
distance_features = ["Depot Distance Ratio", "Closest Customer Distance (CCD) Ratio", "2nd CCD Ratio", "3rd CCD Ratio", "4th CCD Ratio", "5th CCD Ratio", 
                     "6th CCD Ratio", "7th CCD Ratio", "8th CCD Ratio", "Mean Distance To Other Customers Ratio", "Gravity Center Distance Ratio"]
cluster_features = ["Cluster", "Number Clusters", "Cluster Size", "X Centroid", "Y Centroid", "Centroid Distance To Depot Ratio", "Cluster Area Ratio", "Cluster Density Ratio"] # "Distance To Closest Other Cluster Ratio"
statistical_features = ["X Std", "Y Std", "X Max", "Y Max", "X Min", "Y Min", "Correlation", "Skewness X", "Skewness Y"]
cost_features = ["Savings Ratio", "Marginal Cost Ratio", "Total Cost"] #"Shapley Value"

# Combine lists to one complete list and one dictionary
all_features = instance_features + distance_features + cluster_features + statistical_features + cost_features
feature_categories_dict = {#"Distance features": distance_features,
                           "Cluster features": cluster_features,
                           #"Statistical features": statistical_features
                           }

Index(['Number Customers', 'X Ratio', 'Y Ratio', 'X Depot', 'Y Depot',
       'Depot Distance Ratio', 'Closest Customer Distance (CCD) Ratio',
       '2nd CCD Ratio', '3rd CCD Ratio', '4th CCD Ratio', '5th CCD Ratio',
       '6th CCD Ratio', '7th CCD Ratio', '8th CCD Ratio',
       'Mean Distance To Other Customers Ratio',
       'Gravity Center Distance Ratio', 'Cluster', 'Number Clusters',
       'Cluster Size', 'X Centroid', 'Y Centroid',
       'Centroid Distance To Depot Ratio', 'Cluster Area Ratio',
       'Cluster Density Ratio', 'X Std', 'Y Std', 'X Max', 'Y Max', 'X Min',
       'Y Min', 'Correlation', 'Skewness X', 'Skewness Y', 'Savings Ratio',
       'Marginal Cost Ratio', 'Total Cost', 'Shapley Value'],
      dtype='object')

In [None]:
import numpy as np
from sklearn.base import clone
from IPython.display import Markdown

# Create a dictionary to store all the dictionaries with the results
model_results_dict_all["MAPE difference"] = None
model_results_dict_all["RMSE difference"] = None
model_results_dict_all["Used features"] = len(all_features)
results_dict1 = {"All features": model_results_dict_all}

# Exclude iteratively all three feature categories from the features and compute the train score
for key in feature_categories_dict.keys():
    display(Markdown(f"**############### Excluded feature category: {key} ###############**"))
    
    # Select only the used features in the train and test set
    used_features = [i for i in all_features if i not in feature_categories_dict[key]]
    X_train_small = X_train[used_features]
    X_test_small = X_test[used_features]
    print("Number of excluded features:", len(feature_categories_dict[key]))
    print("Number of used features: {}\n".format(len(used_features)))
    #display(used_features)
    
    # Clone the pipeline to get an unfitted version
    pipe = clone(pipe)

    # Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
    model_results_dict_new = fun_scores(pipe, X_train_small, y_train, X_test_small, y_test, compute_test_scores=True)

    # Compare the new results with the results of all categories
    MAPE_diff = np.round(model_results_dict_new["MAPE"]["Test data"] - model_results_dict_all["MAPE"]["Test data"], 4)
    RMSE_diff = np.round(model_results_dict_new["RMSE"]["Test data"] - model_results_dict_all["RMSE"]["Test data"], 4)
    model_results_dict_new["MAPE difference"] = MAPE_diff
    model_results_dict_new["RMSE difference"] = RMSE_diff
    print("\nMAPE difference: {} - {} = {} %".format(model_results_dict_new["MAPE"]["Test data"], model_results_dict_all["MAPE"]["Test data"], MAPE_diff))
    print("RMSE difference: {} - {} = {}\n".format(model_results_dict_new["RMSE"]["Test data"], model_results_dict_all["RMSE"]["Test data"], RMSE_diff))
    model_results_dict_new["Used features"] = len(used_features)

    # Add the dictionary to the results dictionary
    results_dict1[key] = model_results_dict_new

**############### Excluded feature category: Cluster features ###############**

Number of excluded features: 8
Number of used features: 28

CV MAPE (scaled) train data: 3.5343 %
CV RMSE (scaled) train data: 0.9578
CV computation time: 10m, 10s

MAPE (scaled) test data: 3.3369999999999997 %
RMSE (scaled) test data: 0.901
Model fit time: 16m, 40s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,2.2878,2.4678,2.713,2.8512,3.0606,3.2803,3.6413,4.0047,4.2511,3.337
RMSE,0.8159,0.8549,0.8561,0.8626,0.8695,0.8916,0.9162,0.9832,0.9432,0.901



MAPE difference: 3.3369999999999997 - 3.2965 = 0.0405 %
RMSE difference: 0.901 - 0.8945 = 0.0065

