In [1]:
import os
import sys
import pandas as pd
import numpy as np

from sklearn.metrics import mean_absolute_percentage_error, root_mean_squared_error

# Add the parent directory to the Python path to load funtions from file ML_funtions
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
sys.path.append(parent_directory)

# Import helperfunctions
from ML_functions import fun_load_data, fun_preprocessing

# Assign string "TSP" or "CVRP" to the following variable to define the optimization problem
optimization_problem = "TSP_benchmarks"

# Load data
data, start_script = fun_load_data(optimization_problem)

# Do the train test split during the preprocessing
X_train, X_test, y_train, y_test, train_data = fun_preprocessing(data, train_size=0.8, keep_SHAPO=True)

# **Evaluation of the SHAPO benchmark defined in Levinger et al., 2021**

In [2]:
# Display the Data Frame with all predictions of the two proxies SHAPO and Φ BLEND
display(train_data)

# Check if all instances of SHAPO are scaled
a = []

print("Check whether all instances are scaled correctly:")
for id in range(1, max(train_data["Instance ID"]) + 1):
    sum_shapo = train_data[train_data["Instance ID"] == id]["SHAPO"].sum()
    total_costs = train_data[train_data["Instance ID"] == id]["Total Costs"].iloc[0]
    sum_shapo = np.round(sum_shapo, 2)
    total_costs = np.round(total_costs, 2)
    a.append(sum_shapo == total_costs)
    if (sum_shapo != total_costs):
        print(" - Instance {}: Sum of predictions: {}, Total Costs: {}".format(id, sum_shapo, total_costs))

print("Number of instances incorrect scaled (Sum of predictins is not equal to the total costs):", np.sum([i == False for i in a]))

Unnamed: 0,Instance ID,Number Customers,X,Y,X Depot,Y Depot,Depot Distance,Total Costs,Shapley Value,SHAPO,Φ DEPOT,Φ MOAT,Φ BLEND
0,1,6,11.757432,50.848731,2.380844,66.016752,17.832253,227.291186,6.805996,6.847093,12.131437,30.438918,14.026705
1,1,6,83.228495,41.537025,2.380844,66.016752,84.472480,227.291186,73.361446,72.126416,57.467361,53.908965,69.969918
2,1,6,33.032921,29.876631,2.380844,66.016752,47.388376,227.291186,21.568006,21.751330,32.238724,31.852306,33.359091
3,1,6,42.131509,30.755973,2.380844,66.016752,53.136032,227.291186,25.980268,26.403603,36.148904,33.513918,37.963247
4,1,6,54.103013,58.267699,2.380844,66.016752,52.299433,227.291186,30.191952,30.346193,35.579758,31.791732,36.391426
...,...,...,...,...,...,...,...,...,...,...,...,...,...
89995,9000,14,35.411268,13.512220,83.308855,83.076767,84.459488,370.421950,32.946966,33.551630,38.970270,26.559562,36.319218
89996,9000,14,94.027698,4.059342,83.308855,83.076767,79.741125,370.421950,70.014213,69.654750,36.793181,32.775583,38.105393
89997,9000,14,15.703090,62.406463,83.308855,83.076767,70.695127,370.421950,37.349798,42.540654,32.619286,28.146275,31.451366
89998,9000,14,46.611090,57.482706,83.308855,83.076767,44.741277,370.421950,16.911138,12.089902,20.643976,26.437231,19.409313


Check whether all instances are scaled correctly:
 - Instance 4575: Sum of predictions: 342.24, Total Costs: 341.96
 - Instance 7130: Sum of predictions: 383.16, Total Costs: 382.87
 - Instance 7618: Sum of predictions: 344.32, Total Costs: 344.15
Number of instances incorrect scaled (Sum of predictins is not equal to the total costs): 3


In [3]:
# Compute train errors
MAPE_train = np.round(mean_absolute_percentage_error(y_true=y_train, y_pred=X_train["SHAPO"]), 4) * 100
RMSE_train = np.round(root_mean_squared_error(y_true=y_train, y_pred=X_train["SHAPO"]), 4)

# Compute test errors
MAPE_test = np.round(mean_absolute_percentage_error(y_true=y_test, y_pred=X_test["SHAPO"]), 4) * 100
RMSE_test = np.round(root_mean_squared_error(y_true=y_test, y_pred=X_test["SHAPO"]), 4)

# Connect the train and test scores in a dictionary for the MAPE and RMSE
mape_scores = {"Train data": MAPE_train, "Test data": MAPE_test}
rmse_scores = {"Train data": RMSE_train, "Test data": RMSE_test}

# Create a Data Frame with the train and test scores
scores_df = pd.DataFrame(data=[mape_scores.values(), rmse_scores.values()], columns=["Train Score", "Test Score"], index=["MAPE", "RMSE"])

# Compute error measures in the test set for each instance size group individually
MAPE_cat = X_test.groupby(by="Number Customers").apply(lambda group: mean_absolute_percentage_error(y_true=y_test.loc[group.index], y_pred=X_test["SHAPO"].loc[group.index]))
RMSE_cat = X_test.groupby(by="Number Customers").apply(lambda group: root_mean_squared_error(y_true=y_test.loc[group.index], y_pred=X_test["SHAPO"].loc[group.index]))

# Round results and merge them into a data frame
MAPE_cat = np.round(MAPE_cat, 4) * 100
RMSE_cat = np.round(RMSE_cat, 4)
cat_scores_df = pd.DataFrame(data=[MAPE_cat, RMSE_cat], index=["MAPE", "RMSE"])
cat_scores_df["Mean"] = [MAPE_test, RMSE_test]
display(scores_df, cat_scores_df)

# Save data frames with results into an excel file
file_path = "..\\04_test_results/" + optimization_problem + "_shapo_results.xlsx"

# Use ExcelWriter to write multiple DataFrames to the same file
with pd.ExcelWriter(file_path) as writer:
    scores_df.to_excel(writer, sheet_name="scores")
    cat_scores_df.to_excel(writer, sheet_name="cat_scores")
print("File saved succesfully.")

Unnamed: 0,Train Score,Test Score
MAPE,3.82,3.77
RMSE,1.6151,1.6016


Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,1.31,1.6,2.26,2.9,3.46,4.2,4.51,5.08,5.37,3.77
RMSE,0.9076,0.9762,1.1378,1.3578,1.5303,1.7732,1.7436,1.8843,1.9207,1.6016


File saved succesfully.


# **Evaluatoin of the Blended Proxy defined in Aziz et al., 2016**

In [4]:
# Compute train errors
MAPE_train = np.round(mean_absolute_percentage_error(y_true=y_train, y_pred=X_train["Φ BLEND"]), 4) * 100
RMSE_train = np.round(root_mean_squared_error(y_true=y_train, y_pred=X_train["Φ BLEND"]), 4)

# Compute test errors
MAPE_test = np.round(mean_absolute_percentage_error(y_true=y_test, y_pred=X_test["Φ BLEND"]), 4) * 100
RMSE_test = np.round(root_mean_squared_error(y_true=y_test, y_pred=X_test["Φ BLEND"]), 4)

# Connect the train and test scores in a dictionary for the MAPE and RMSE
mape_scores = {"Train data": MAPE_train, "Test data": MAPE_test}
rmse_scores = {"Train data": RMSE_train, "Test data": RMSE_test}

# Create a Data Frame with the train and test scores
scores_df = pd.DataFrame(data=[mape_scores.values(), rmse_scores.values()], columns=["Train Score", "Test Score"], index=["MAPE", "RMSE"])

# Compute error measures in the test set for each instance size group individually
MAPE_cat = X_test.groupby(by="Number Customers").apply(lambda group: mean_absolute_percentage_error(y_true=y_test.loc[group.index], y_pred=X_test["Φ BLEND"].loc[group.index]))
RMSE_cat = X_test.groupby(by="Number Customers").apply(lambda group: root_mean_squared_error(y_true=y_test.loc[group.index], y_pred=X_test["Φ BLEND"].loc[group.index]))

# Round results and merge them into a data frame
MAPE_cat = np.round(MAPE_cat, 4) * 100
RMSE_cat = np.round(RMSE_cat, 4)
cat_scores_df = pd.DataFrame(data=[MAPE_cat, RMSE_cat], index=["MAPE", "RMSE"])
cat_scores_df["Mean"] = [MAPE_test, RMSE_test]
display(scores_df, cat_scores_df)

# Save data frames with results into an excel file
file_path = "..\\04_test_results/" + optimization_problem + "_belended_proxy_results.xlsx"

# Use ExcelWriter to write multiple DataFrames to the same file
with pd.ExcelWriter(file_path) as writer:
    scores_df.to_excel(writer, sheet_name="scores")
    cat_scores_df.to_excel(writer, sheet_name="cat_scores")
print("File saved succesfully.")

Unnamed: 0,Train Score,Test Score
MAPE,27.86,28.04
RMSE,9.0866,9.0903


Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,38.11,32.83,30.53,28.62,27.1,26.24,25.6,24.89,26.62,28.04
RMSE,11.4652,10.2759,9.3751,8.9125,8.7107,8.8522,8.5327,8.7449,8.4625,9.0903


File saved succesfully.
