In [1]:
import time
import warnings
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, PolynomialFeatures, StandardScaler, MinMaxScaler 
from sklearn.compose import ColumnTransformer

from sklearn.exceptions import ConvergenceWarning
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import xgboost as xgb
from sklearn.svm import LinearSVR, SVR
from sklearn.neural_network import MLPRegressor

# Import helperfunctions
from ML_functions import fun_load_settings, fun_load_data, fun_preprocessing, fun_load_best_params
from ML_functions import fun_convert_time
from ML_functions import fun_scores

# Set the default optimization problem for the case of manual executing the script (choose either "TSP" or "CVRP")
default_optimization_problem = "TSP"

# Call the function to define optimization_problem based on how the notebook is executed
# If the notebook is run by the script "main.ipynb", load optimization_problem from "settings.json". Otherwise use the default optimization problem from above
optimization_problem = fun_load_settings(default_optimization_problem)

# Load data and start the time count for the script within the function fun_load_data
data, start_script = fun_load_data(optimization_problem)

# Do the train test split during the preprocessing
X_train, X_test, y_train, y_test, train_data = fun_preprocessing(data, train_size=0.8)

The notebook is executed directly. :)
Optimization Problem: 'TSP'


# **A. Instance-based models**
### **1. K-nearest Neighbor - KNN**

In [2]:
# Load best parameters of the model
best_params = fun_load_best_params(optimization_problem, model_abbreviation="KNN")

# Create a pipline and set best_params as parameters
pipe = Pipeline(steps=[("scaler", None), 
                       ("knn", KNeighborsRegressor())])
pipe.set_params(**best_params)

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(model=pipe, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, compute_test_scores=True)

# Create a dictionary to store the results
results_dict = {"KNN": model_results_dict}

{'knn__n_neighbors': 8, 'scaler': StandardScaler()}

CV MAPE (scaled) train data:  23.7741 %
CV RMSE (scaled) train data: 4.6775
CV computation time: 30s

MAPE (scaled) test data:  22.416 %
RMSE (scaled) test data: 4.5123
Model fit time: 0s
Model prediction time: 2s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,26.9853,23.0869,23.0567,22.5353,23.1626,20.6273,21.4564,20.4195,23.2276,22.416
RMSE,6.9669,5.667,5.1174,4.7273,4.421,4.0579,3.7169,3.7094,3.6311,4.5123


# **B. Linear Models**
### **1. Linear Regression**

In [3]:
if (optimization_problem == "TSP"):

    # Define the model pipeline
    lr = LinearRegression()

    # Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
    model_results_dict = fun_scores(lr, X_train, y_train, X_test, y_test, apply_scaling=True, compute_test_scores=True)
    results_dict["Linear Regression"] = model_results_dict

else: print("This cell is only executed for the TSP!")

CV MAPE (scaled) train data:  20.6429 %
CV RMSE (scaled) train data: 4.2858
CV computation time: 3s

MAPE (scaled) test data:  21.0844 %
RMSE (scaled) test data: 4.3982
Model fit time: 0s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,22.1609,18.5325,18.6898,16.4951,16.6162,14.3913,19.0307,21.8103,35.7541,21.0844
RMSE,6.3504,5.2883,4.4824,3.8846,3.6128,3.1841,3.5297,4.0782,5.3381,4.3982


### **2. Rigde Regression (L2-Regularization)**

In [4]:
if (optimization_problem == "TSP"):
    
    # Load best parameters of the model
    best_params = fun_load_best_params(optimization_problem, model_abbreviation="Ridge")
    
    # Define the model pipeline
    ridge = Ridge(solver="svd", **best_params)

    # Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
    model_results_dict = fun_scores(ridge, X_train, y_train, X_test, y_test, apply_scaling=True, compute_test_scores=True)
    results_dict["Ridge Regression"] = model_results_dict

else: print("This cell is only executed for the TSP!")

{'alpha': 1}

CV MAPE (scaled) train data:  20.6427 %
CV RMSE (scaled) train data: 4.2856
CV computation time: 3s

MAPE (scaled) test data:  21.0891 %
RMSE (scaled) test data: 4.3983
Model fit time: 0s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,22.1643,18.5393,18.6895,16.4784,16.6164,14.3957,19.0336,21.814,35.7811,21.0891
RMSE,6.3478,5.2859,4.4821,3.8827,3.6128,3.1849,3.5308,4.0787,5.3406,4.3983


### **3. Polynomial Regression**
**Preprocessor to create interactions and polynomial features**

In [5]:
# Load most important features from script "b1_feature_selection.ipynb"
top_features = list(pd.read_csv(f"02_best_features/{optimization_problem}_top_features"))

# Define the preprocessing steps for continuous features
onehot_transformer = Pipeline(steps=[("binning", KBinsDiscretizer(n_bins=10, encode="ordinal", strategy="uniform")),
                                     ("onehot", OneHotEncoder(sparse_output=False, handle_unknown="ignore"))])

poly_transformer = Pipeline(steps=[("poly", PolynomialFeatures(degree=3, interaction_only=False, include_bias=True))])

# Combine preprocessing steps using ColumnTransformer
preprocessor = ColumnTransformer(transformers=[("onehot", onehot_transformer, top_features),
                                               ("poly", poly_transformer, top_features)],
                                               remainder="passthrough")

# Define the model pipeline
pipe = Pipeline(steps=[("preprocessor", preprocessor),
                       ("lr", LinearRegression())])

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(pipe, X_train, y_train, X_test, y_test, apply_scaling=True, compute_test_scores=True)
results_dict["Polynomial Regression"] = model_results_dict

CV MAPE (scaled) train data:  7.362299999999999 %
CV RMSE (scaled) train data: 1.8885
CV computation time: 33s

MAPE (scaled) test data:  7.238700000000001 %
RMSE (scaled) test data: 1.8597
Model fit time: 18s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,5.2732,5.708,6.6465,6.6716,7.5816,6.5973,7.3335,7.8547,9.1552,7.2387
RMSE,2.0435,1.9491,1.9844,1.9201,1.9027,1.72,1.7111,1.8052,1.8649,1.8597


# **C. Decision Tree**

In [6]:
if (optimization_problem == "TSP"):
    
    # Load best parameters of the model
    best_params = fun_load_best_params(optimization_problem, model_abbreviation="DT")

    # Create model
    tree = DecisionTreeRegressor(**best_params, random_state=42)

    # Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
    model_results_dict = fun_scores(tree, X_train, y_train, X_test, y_test, compute_test_scores=True)
    results_dict["Decision Tree"] = model_results_dict

else: print("This cell is only executed for the TSP!")

{'min_samples_leaf': 19,
 'min_impurity_decrease': 0.0001,
 'max_leaf_nodes': 2000,
 'max_depth': 45}

CV MAPE (scaled) train data:  11.6332 %
CV RMSE (scaled) train data: 3.886
CV computation time: 2s

MAPE (scaled) test data:  11.4436 %
RMSE (scaled) test data: 3.7323
Model fit time: 2s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,11.3174,12.0265,11.5868,11.2419,11.625,10.9698,11.5128,11.13,11.7287,11.4436
RMSE,5.0715,4.907,4.2937,4.0477,3.7463,3.3705,3.2538,3.0274,2.9806,3.7323


# **D. Ensembles of Decision Trees**
### **1. Random Forest**

In [7]:
# Load best parameters of the model
best_params = fun_load_best_params(optimization_problem, model_abbreviation="RF")

# Create model
forest = RandomForestRegressor(n_estimators=500, **best_params, n_jobs=-1, random_state=42)

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(forest, X_train, y_train, X_test, y_test, compute_test_scores=True)
results_dict["Random Forest"] = model_results_dict

{'min_samples_split': 11,
 'min_samples_leaf': 6,
 'min_impurity_decrease': 0.0001,
 'max_leaf_nodes': None,
 'max_features': 25,
 'max_depth': 13}

CV MAPE (scaled) train data:  7.9083 %
CV RMSE (scaled) train data: 2.3605
CV computation time: 4m, 20s

MAPE (scaled) test data:  7.7351 %
RMSE (scaled) test data: 2.2446
Model fit time: 2m, 26s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,6.5581,7.1463,7.41,7.2005,7.6177,7.3522,7.883,8.2568,8.837,7.7351
RMSE,2.7158,2.6226,2.5643,2.2481,2.2607,2.0956,2.0438,2.0515,2.0474,2.2446


### **2. Gradient Boosting Regression Trees**

In [8]:
if (optimization_problem == "TSP"):
    
    # Load best parameters of the model
    best_params = fun_load_best_params(optimization_problem, model_abbreviation="GBRT")

    # Create model
    gbrt = GradientBoostingRegressor(**best_params, random_state=42)

    # Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
    model_results_dict = fun_scores(gbrt, X_train, y_train, X_test, y_test, compute_test_scores=True)
    results_dict["GBRT"] = model_results_dict

else: print("This cell is only executed for the TSP!")

{'n_estimators': 120,
 'min_samples_split': 6,
 'min_samples_leaf': 23,
 'max_leaf_nodes': 1500,
 'max_features': 25,
 'max_depth': 10,
 'learning_rate': 0.1}

CV MAPE (scaled) train data:  6.1202 %
CV RMSE (scaled) train data: 1.8355
CV computation time: 2m, 39s

MAPE (scaled) test data:  5.811100000000001 %
RMSE (scaled) test data: 1.7169
Model fit time: 2m, 57s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,4.9207,5.2847,5.5931,5.4526,5.8007,5.3347,5.9932,6.1885,6.6861,5.8111
RMSE,2.0705,1.9768,1.9386,1.7253,1.7199,1.5545,1.5559,1.5991,1.6237,1.7169


### **3. Extreme Gradient Boosting: XGBoost-Package**

In [9]:
# Load best parameters of the model
best_params = fun_load_best_params(optimization_problem, model_abbreviation="XGBoost")

# Create model
xgboost = xgb.XGBRegressor(n_estimators=750, objective="reg:squarederror", **best_params, random_state=42)

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(xgboost, X_train, y_train, X_test, y_test, compute_test_scores=True)
results_dict["XGBoost"] = model_results_dict

{'colsample_bytree': 1.0,
 'learning_rate': 0.05,
 'max_depth': 7,
 'subsample': 0.6}

CV MAPE (scaled) train data:  5.4142 %
CV RMSE (scaled) train data: 1.5627
CV computation time: 24s

MAPE (scaled) test data:  5.2724 %
RMSE (scaled) test data: 1.4827
Model fit time: 8s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,4.4762,4.8422,4.7988,4.9949,5.1514,4.9781,5.4432,5.6247,6.1217,5.2724
RMSE,1.7699,1.7315,1.6164,1.4454,1.4908,1.379,1.3398,1.4001,1.4194,1.4827


# **E. Linear Support Vector Machines (SVM) and Kernel Machines**
**Linear SVM**

In [10]:
if (optimization_problem == "TSP"):
    
    # Load best parameters of the model
    best_params = fun_load_best_params(optimization_problem, model_abbreviation="SVM")

    # Define the model pipeline
    pipe = Pipeline(steps=[("scaler", None), 
                           ("SVM", LinearSVR(max_iter=10000, random_state=42))])
    pipe.set_params(**best_params)

    # Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
    model_results_dict = fun_scores(pipe, X_train, y_train, X_test, y_test, compute_test_scores=True)
    results_dict["SVM"] = model_results_dict

else: print("This cell is only executed for the TSP!")

{'SVM__C': 10, 'SVM__epsilon': 0.1, 'scaler': StandardScaler()}

CV MAPE (scaled) train data:  19.3978 %
CV RMSE (scaled) train data: 4.0675
CV computation time: 31s

MAPE (scaled) test data:  19.7359 %
RMSE (scaled) test data: 4.174
Model fit time: 50s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,22.733,19.3968,19.2609,16.8381,16.5873,13.9153,17.7463,19.2937,29.6933,19.7359
RMSE,6.5312,5.4482,4.5922,3.9155,3.5635,2.9959,3.1994,3.5596,4.5067,4.174


**Kernel Machine with Gaussian Kernel**

In [11]:
# Suppress ConvergenceWarning (model will probably not converge within 500,000 iterations)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Load best parameters of the model
best_params = fun_load_best_params(optimization_problem, model_abbreviation="KM")

# Define the model pipeline
pipe = Pipeline(steps=[("scaler", None), 
                       ("SVM", SVR(kernel="rbf", cache_size=2000, max_iter=500000))])
pipe.set_params(**best_params)

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(pipe, X_train, y_train, X_test, y_test, compute_test_scores=True)
results_dict["Kernel Machine"] = model_results_dict

{'SVM__C': 100,
 'SVM__epsilon': 1,
 'SVM__gamma': 'scale',
 'scaler': MinMaxScaler()}

CV MAPE (scaled) train data:  4.3228 %
CV RMSE (scaled) train data: 1.1145
CV computation time: 4m, 57s

MAPE (scaled) test data:  4.1867 %
RMSE (scaled) test data: 1.0653
Model fit time: 6m, 17s
Model prediction time: 23s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,3.6736,3.0239,3.4447,3.4647,3.7622,3.9352,4.4039,4.8451,5.5795,4.1867
RMSE,1.2048,1.0055,1.008,0.9734,0.982,0.997,1.0383,1.1464,1.1679,1.0653


# **F. Neural Network - Multi Layer Perceptron**

In [12]:
# Load best parameters of the model
best_params = fun_load_best_params(optimization_problem, model_abbreviation="NN")

# Define the parameter hidden_layer_sizes depending on the problem setting
if (optimization_problem == "TSP"): hls = (256, 128, 64)
elif (optimization_problem == "CVRP"): hls = (128, 64, 32)
print("Hidden layer sizes:", hls)

# Create pipeline
pipe = make_pipeline(StandardScaler(), 
                     MLPRegressor(hidden_layer_sizes=hls, activation="relu", learning_rate="adaptive", 
                                  max_iter=1000, random_state=42))
pipe.set_params(**best_params)

# Estimate model performance with cross-validation on the train set and get scores on test set (scoring: MAPE and RMSE)
model_results_dict = fun_scores(pipe, X_train, y_train, X_test, y_test, compute_test_scores=True)
results_dict["Neural Network"] = model_results_dict

{'mlpregressor__alpha': 0.1,
 'mlpregressor__batch_size': 32,
 'mlpregressor__early_stopping': False,
 'mlpregressor__learning_rate_init': 0.001,
 'mlpregressor__solver': 'sgd'}

Hidden layer sizes: (256, 128, 64)
CV MAPE (scaled) train data:  3.5156 %
CV RMSE (scaled) train data: 0.9565
CV computation time: 17m, 6s

MAPE (scaled) test data:  3.3343 %
RMSE (scaled) test data: 0.8999
Model fit time: 27m, 2s
Model prediction time: 0s


**MAPE and RMSE on test data per instance size:**

Number Customers,6,7,8,9,10,11,12,13,14,Mean
MAPE,2.2156,2.3114,2.6244,2.8952,3.1252,3.2157,3.7293,3.934,4.3603,3.3343
RMSE,0.8039,0.8375,0.8582,0.8582,0.8375,0.8806,0.9356,0.9706,0.9739,0.8999


# **G. Compare Results**

In [13]:
# Get model names, the scores for each model and the computation times
model_names = results_dict.keys()
cv_times = [value["CV computation time"] for value in results_dict.values()]
fit_times = [value["Model fit time"] for value in results_dict.values()]
prediction_times = [value["Model prediction time"] for value in results_dict.values()]
MAPE_train_scores = np.round([value["MAPE"]["Train data"] for value in results_dict.values()], 2) # Round MAPE scores
MAPE_test_scores = np.round([value["MAPE"]["Test data"] for value in results_dict.values()], 2) # Round MAPE scores
RMSE_train_scores = [value["RMSE"]["Train data"] for value in results_dict.values()]
RMSE_test_scores = [value["RMSE"]["Test data"] for value in results_dict.values()]

# Show train and test scores for each model and the computation times in a Data Frame
run_times_df = pd.DataFrame([cv_times, fit_times, prediction_times], index=["CV times", "Fit times", "Prediction times"], columns=model_names)
MAPE_df = pd.DataFrame(data=[MAPE_train_scores, MAPE_test_scores], columns=model_names, index=["Train set", "Test set"]).sort_values(by="Test set", axis=1)
MAPE_df.columns.name = "MAPE scores"
RMSE_df = pd.DataFrame(data=[RMSE_train_scores, RMSE_test_scores], columns=model_names, index=["Train set", "Test set"]).sort_values(by="Test set", axis=1)
RMSE_df.columns.name = "RMSE scores"
display(run_times_df, MAPE_df, RMSE_df)

# Show scores per instance size for each model
MAPE_cat_scores = [value["Scores per instance size"].loc["MAPE"] for value in results_dict.values()]
MAPE_cat_scores = [i.round(2) for i in MAPE_cat_scores] # Round MAPE scores
RMSE_cat_scores = [value["Scores per instance size"].loc["RMSE"] for value in results_dict.values()]
MAPE_cat_scores_df = pd.DataFrame(data=MAPE_cat_scores, index=model_names).sort_values(by="Mean")
MAPE_cat_scores_df.columns.name = "MAPE scores per instance size"
RMSE_cat_scores_df = pd.DataFrame(data=RMSE_cat_scores, index=model_names).sort_values(by="Mean")
RMSE_cat_scores_df.columns.name = "RMSE scores per instance size"
display(MAPE_cat_scores_df, RMSE_cat_scores_df)

# Save data frames with results into an excel file
file_path = str(f"04_test_results/{optimization_problem}_results.xlsx")

# Use ExcelWriter to write multiple DataFrames to the same file
with pd.ExcelWriter(file_path) as writer:
    run_times_df.to_excel(writer, sheet_name="run_times")
    MAPE_df.to_excel(writer, sheet_name="MAPE_scores")
    RMSE_df.to_excel(writer, sheet_name="RMSE_scores")
    MAPE_cat_scores_df.to_excel(writer, sheet_name="MAPE_cat_scores")
    RMSE_cat_scores_df.to_excel(writer, sheet_name="RMSE_cat_scores")

# Print total script run time
print("Total script computation time:", fun_convert_time(start=start_script, end=time.time()))

Unnamed: 0,KNN,Linear Regression,Ridge Regression,Polynomial Regression,Decision Tree,Random Forest,Gradient Boosting Regression Trees,XGBoost,Linear SVM,Kernel Machine,Neural Network
CV times,27s,3s,3s,33s,2s,"4m, 20s","2m, 39s",24s,31s,"4m, 57s","17m, 6s"
Fit times,0s,0s,0s,18s,2s,"2m, 26s","2m, 57s",8s,50s,"6m, 17s","27m, 2s"
Prediction times,2s,0s,0s,0s,0s,0s,0s,0s,0s,23s,0s


MAPE Scores,Neural Network,Kernel Machine,XGBoost,Gradient Boosting Regression Trees,Polynomial Regression,Random Forest,Decision Tree,Linear SVM,Linear Regression,Ridge Regression,KNN
Train Set,3.52,4.32,5.41,6.12,7.36,7.91,11.63,19.4,20.64,20.64,23.77
Test Set,3.33,4.19,5.27,5.81,7.24,7.74,11.44,19.74,21.08,21.09,22.42


RMSE Scores,Neural Network,Kernel Machine,XGBoost,Gradient Boosting Regression Trees,Polynomial Regression,Random Forest,Decision Tree,Linear SVM,Linear Regression,Ridge Regression,KNN
Train Set,0.9565,1.1145,1.5627,1.8355,1.8885,2.3605,3.886,4.0675,4.2858,4.2856,4.6775
Test Set,0.8999,1.0653,1.4827,1.7169,1.8597,2.2446,3.7323,4.174,4.3982,4.3983,4.5123


MAPE Scores per instance size,6,7,8,9,10,11,12,13,14,Mean
Neural Network,2.22,2.31,2.62,2.9,3.13,3.22,3.73,3.93,4.36,3.33
Kernel Machine,3.67,3.02,3.44,3.46,3.76,3.94,4.4,4.85,5.58,4.19
XGBoost,4.48,4.84,4.8,4.99,5.15,4.98,5.44,5.62,6.12,5.27
Gradient Boosting Regression Trees,4.92,5.28,5.59,5.45,5.8,5.33,5.99,6.19,6.69,5.81
Polynomial Regression,5.27,5.71,6.65,6.67,7.58,6.6,7.33,7.85,9.16,7.24
Random Forest,6.56,7.15,7.41,7.2,7.62,7.35,7.88,8.26,8.84,7.74
Decision Tree,11.32,12.03,11.59,11.24,11.62,10.97,11.51,11.13,11.73,11.44
Linear SVM,22.73,19.4,19.26,16.84,16.59,13.92,17.75,19.29,29.69,19.74
Linear Regression,22.16,18.53,18.69,16.5,16.62,14.39,19.03,21.81,35.75,21.08
Ridge Regression,22.16,18.54,18.69,16.48,16.62,14.4,19.03,21.81,35.78,21.09


RMSE Scores per instance size,6,7,8,9,10,11,12,13,14,Mean
Neural Network,0.8039,0.8375,0.8582,0.8582,0.8375,0.8806,0.9356,0.9706,0.9739,0.8999
Kernel Machine,1.2048,1.0055,1.008,0.9734,0.982,0.997,1.0383,1.1464,1.1679,1.0653
XGBoost,1.7699,1.7315,1.6164,1.4454,1.4908,1.379,1.3398,1.4001,1.4194,1.4827
Gradient Boosting Regression Trees,2.0705,1.9768,1.9386,1.7253,1.7199,1.5545,1.5559,1.5991,1.6237,1.7169
Polynomial Regression,2.0435,1.9491,1.9844,1.9201,1.9027,1.72,1.7111,1.8052,1.8649,1.8597
Random Forest,2.7158,2.6226,2.5643,2.2481,2.2607,2.0956,2.0438,2.0515,2.0474,2.2446
Decision Tree,5.0715,4.907,4.2937,4.0477,3.7463,3.3705,3.2538,3.0274,2.9806,3.7323
Linear SVM,6.5312,5.4482,4.5922,3.9155,3.5635,2.9959,3.1994,3.5596,4.5067,4.174
Linear Regression,6.3504,5.2883,4.4824,3.8846,3.6128,3.1841,3.5297,4.0782,5.3381,4.3982
Ridge Regression,6.3478,5.2859,4.4821,3.8827,3.6128,3.1849,3.5308,4.0787,5.3406,4.3983


Total script computation time: 1h, 12m
