In [None]:
import warnings
import pandas as pd
import numpy as np

from sklearn.exceptions import ConvergenceWarning
from sklearn.base import clone
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from sklearn.neural_network import MLPRegressor

# Import helperfunctions
from ML_functions import fun_load_data, fun_preprocessing, fun_load_best_params
from ML_functions import fun_scores

# Assign string "TSP" or "CVRP" to the following variable to define the optimization problem
optimization_problem = "TSP"

# Load data
data, _ = fun_load_data(optimization_problem)

# Do the train test split during the preprocessing
X_train, X_test, y_train, y_test, train_data = fun_preprocessing(data, train_size=0.8)

# **Feature Importance**
### **1. Importance of feature categories**
**Compute train and test score with all features: Neural Network - Multi Layer Perceptron**

In [None]:
# Load best parameters of the model
best_params = fun_load_best_params(optimization_problem, model_abbreviation="NN")

# Create pipeline
pipe = make_pipeline(StandardScaler(), 
                     MLPRegressor(hidden_layer_sizes=(256, 128, 64), activation="relu", learning_rate="adaptive", 
                                  max_iter=1000, random_state=42))
pipe.set_params(**best_params)

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict_all = fun_scores(pipe, X_train, y_train, X_test, y_test, compute_test_scores=True)

{'mlpregressor__alpha': 0.1,
 'mlpregressor__batch_size': 32,
 'mlpregressor__early_stopping': False,
 'mlpregressor__learning_rate_init': 0.001,
 'mlpregressor__solver': 'sgd'}

CV MAPE (scaled) train data: 8.0 %
CV RMSE (scaled) train data: 2.2
CV computation time: 2s

MAPE (scaled) test data: 6.61 %
RMSE (scaled) test data: 1.84
Model fit time: 2s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,5.81,5.51,6.14,6.15,6.27,6.42,6.76,7.09,7.88,6.61
RMSE,2.19,1.93,1.95,1.87,1.81,1.77,1.69,1.82,1.78,1.84


### **3. Principal component analysis (PCA)**

In [None]:
# Suppress ConvergenceWarning (model will probably not converge within 1000 iterations)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Create a dictionary to store all the dictionaries with the results
results_dict2 = {"All features": model_results_dict_all}

# Create pipeline
pipe = make_pipeline(StandardScaler(),
                     PCA(n_components=10), # Apply PCA to the training data and reduce it to n_components 
                     MLPRegressor(hidden_layer_sizes=(256, 128, 64), activation="relu", learning_rate="adaptive", 
                                  max_iter=1000, random_state=42))
pipe.set_params(**best_params)

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict_new = fun_scores(pipe, X_train, y_train, X_test, y_test, compute_test_scores=True)

# Compare the new results with the results of all categories
MAPE_diff = np.round(model_results_dict_new["MAPE"]["Test data"] - model_results_dict_all["MAPE"]["Test data"], 2)
RMSE_diff = np.round(model_results_dict_new["RMSE"]["Test data"] - model_results_dict_all["RMSE"]["Test data"], 2)
model_results_dict_new["MAPE difference"] = MAPE_diff
model_results_dict_new["RMSE difference"] = RMSE_diff
print("\nMAPE difference: {} - {} = {} %".format(model_results_dict_new["MAPE"]["Test data"], model_results_dict_all["MAPE"]["Test data"], MAPE_diff))
print("RMSE difference: {} - {} = {}\n".format(model_results_dict_new["RMSE"]["Test data"], model_results_dict_all["RMSE"]["Test data"], RMSE_diff))

# Add the dictionary to the results dictionary
results_dict2["PCA"] = model_results_dict_new

CV MAPE (scaled) train data: 19.49 %
CV RMSE (scaled) train data: 5.12
CV computation time: 2s

MAPE (scaled) test data: 19.56 %
RMSE (scaled) test data: 4.9
Model fit time: 2s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,18.18,18.23,19.17,18.84,20.13,19.98,20.24,18.49,21.15,19.56
RMSE,6.07,5.78,5.77,5.02,4.87,4.95,4.4,4.21,4.16,4.9



MAPE difference: 19.56 - 6.61 = 12.95 %
RMSE difference: 4.9 - 1.84 = 3.06

