In [1]:
import os
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib_inline

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, GridSearchCV, RandomizedSearchCV
from sklearn.pipeline import Pipeline, make_pipeline

from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import xgboost as xgb
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

# import helperfunctions
from helperfunctions import fun_load_file, fun_preprocessing, fun_split_X_y
from helperfunctions import fun_convert_time
from helperfunctions import fun_train_score, fun_best_model, fun_test_score, fun_category_scores
from helperfunctions import fun_feature_weights, plot_feature_importances

# start time count and load data
start_script = time.time()
data = fun_load_file(path='01_data\\01_TSP', name='combined_train_instances_dennis.xlsx')
train_data = fun_preprocessing(data)
X, y = fun_split_X_y(train_data)

# create a train and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# create a smaller train set for svm
X_train_small, X_test_small, y_train_small, y_test_small = train_test_split(X, y, test_size=0.6, random_state=0)

# save number of features and train sizes
n_features = X_train.shape[1]
train_size = f'{int(np.round(100 * len(X_train)/len(X)))} %'
train_size_small = f'{int(np.round(100 * len(X_train_small)/len(X)))} %'

# **Neural Network**

In [6]:
# create pipeline and fit it on train set
pipe = make_pipeline(StandardScaler(), MLPRegressor(max_iter=1000))
pipe.fit(X_train, y_train)

# model evaluation - compare train and test set scores
fun_train_score(pipe, X_train, y_train, cv=3, return_results=False)
fun_test_score(pipe, X_test, y_test, print_params=False)

# view error measures per instance size
MAPE, RMSE, df = fun_category_scores(pipe, X_test, y_test, display_df=True)

  CV MAPE train data:  3.9143 %
  CV RMSE train data:  0.9194
  CV computation time: 51 sec

MAPE test data: 3.5254 %
RMSE test data: 0.8443
MAPE and RMSE per instance size:


Number Customers,6,7,8,9,10,11,12,13,14
MAPE,2.3849,2.3641,2.8053,3.0264,3.373,3.6444,3.8261,3.993,4.6234
RMSE,0.7212,0.6851,0.7326,0.7617,0.778,0.8667,0.8875,0.9222,0.9751
