# Model 8b - MLPClassifier with standardization & hyperparameter tuning (5-fold cross-validation)

In [1]:
# import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats
import math
import os

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV, train_test_split, cross_validate, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, f1_score, r2_score

from openpyxl import load_workbook

In [2]:
# get data
parentdir = os.path.dirname(os.getcwd())
data = pd.read_excel(parentdir+'\\Data\\SWedge Results.xlsx',sheet_name = "Probabilistic Values", engine='openpyxl')
data.head()

Unnamed: 0,Wedge ID,Safety Factor,Ln Safety Factor,Safety Factor Class,Safety Factor Class_1,Safety Factor Class_2,Safety Factor Class_3,Wedge Volume (m3),Wedge Weight (MN),Plunge Line of Intersection (°),...,Water Pressure Joint 2 (MPa),Water Pressure Basal Joint (MPa),Water Pressure Tension Crack (MPa),Ponded Water Depth (m),Seismic Alpha,Seismic Plunge (°),Seismic Trend (°),Maximum Persistence Joint 1 (m),Maximum Persistence Joint 2 (m),Maximum Persistence Basal Joint (m)
0,BFA 60 [0],1.082239,0.079032,11.0,6.0,1.05,1.05,6349.248707,171.429715,31.846178,...,,,,,,,,0,0,
1,BFA 60 [1],1.203906,0.185571,13.0,7.0,1.35,1.35,7701.148241,207.931003,31.905513,...,,,,,,,,0,0,
2,BFA 60 [2],0.896601,-0.109144,9.0,5.0,0.75,0.75,2341.641868,63.22433,36.973415,...,,,,,,,,0,0,
3,BFA 60 [3],0.680996,-0.384199,7.0,4.0,0.75,0.75,155.345062,4.194317,54.969435,...,,,,,,,,0,0,
4,BFA 60 [4],1.263948,0.23424,13.0,7.0,1.35,1.35,7468.340623,201.645197,29.688564,...,,,,,,,,0,0,


In [3]:
print(np.shape(data))

(5000, 92)


In [4]:
# get data specifically for the modelling (i.e., the inputs and output)
data_model = data[["Safety Factor","Safety Factor Class_2","Dip of Joint 1 (°)","Dip Direction of Joint 1 (°)","Dip of Joint 2 (°)","Dip Direction of Joint 2 (°)","Dip of Slope (°)","Dip Direction of Slope (°)","Friction Angle of Joint 1 (°)","Friction Angle of Joint 2 (°)"]]
print(np.shape(data_model))
data_model.head()

(5000, 10)


Unnamed: 0,Safety Factor,Safety Factor Class_2,Dip of Joint 1 (°),Dip Direction of Joint 1 (°),Dip of Joint 2 (°),Dip Direction of Joint 2 (°),Dip of Slope (°),Dip Direction of Slope (°),Friction Angle of Joint 1 (°),Friction Angle of Joint 2 (°)
0,1.082239,1.05,39.265808,120.865923,51.646228,221.979277,58.840543,182.626968,29.567773,29.522638
1,1.203906,1.35,38.981309,128.836961,57.766382,235.428421,63.804918,181.820235,32.713619,29.079492
2,0.896601,0.75,42.032968,117.504566,62.427355,217.726775,58.134485,180.398207,29.660213,27.455866
3,0.680996,0.75,69.264568,137.90691,66.183726,246.195109,61.968796,182.439496,30.866657,34.401616
4,1.263948,1.35,46.728166,121.226945,50.803809,241.060589,60.832522,179.091174,28.789453,28.613525


In [5]:
# remove any realizations that are not kinematically possible and any duplicates
data_model =  data_model.dropna()
data_model = data_model.drop_duplicates()
print(np.shape(data_model))

(4992, 10)


In [6]:
data_model['Safety Factor Class_2'] = data_model['Safety Factor Class_2'].apply(str)

### Hyperparameter tuning

In [7]:
# hyperparameter grid for MLPClassifier
# number of neurons in hidden layer(s)
# note: 1 hidden layer - > (x,), 2 hidden layers -> (x,y), 3 hidden layers -> (x,y,z), etc
hidden_layer_sizes = [(10,),(15,),(20,),(10,10),(15,10),(20,10)]

# activation function for hidden layer
activation = ['tanh','relu']

# solver for weight optimization
solver = ['sgd','adam']

# alpha (strength of L2 regularization term)
# https://scikit-learn.org/stable/auto_examples/neural_networks/plot_mlp_alpha.html
alpha = [0.0001,0.001,0.01,0.1,1]

# initial learning rate
learning_rate_init = [0.001,0.01,0.1]

# maximum number of iterations
max_iter = [200,500]


# create the random grid
param_grid = {'mlpclassifier__hidden_layer_sizes': hidden_layer_sizes,
              'mlpclassifier__activation': activation,
              'mlpclassifier__solver': solver,
              'mlpclassifier__alpha': alpha,
              'mlpclassifier__learning_rate_init': learning_rate_init,
              'mlpclassifier__max_iter': max_iter}

print(param_grid)

{'mlpclassifier__hidden_layer_sizes': [(10,), (15,), (20,), (10, 10), (15, 10), (20, 10)], 'mlpclassifier__activation': ['tanh', 'relu'], 'mlpclassifier__solver': ['sgd', 'adam'], 'mlpclassifier__alpha': [0.0001, 0.001, 0.01, 0.1, 1], 'mlpclassifier__learning_rate_init': [0.001, 0.01, 0.1], 'mlpclassifier__max_iter': [200, 500]}


In [8]:
# random_state=123, early_stopping=True, validation_fraction=0.2

# function to get train & test R2 and RMSE for specified dataset size where hyperparameter tuning was performed
def hyperparam_results(data, dataset_size, param_grid):
    random_state_val = [0,1,42,123]
    param_grid = param_grid
    
    accuracy_train_subsample_list = []
    f1_train_subsample_list = []
    accuracy_test_subsample_list = []
    f1_test_subsample_list = []

    r2_train_subsample_list = []
    r2_test_subsample_list = []
    rmse_train_subsample_list = []
    rmse_test_subsample_list = []


    for x in range(0,4):
        # get subsample of data
        data_subsample = data_model.sample(n = dataset_size,random_state = 1)

        # train/test split with different random_state values (0, 1, 42, and 123)
        train_subsample, test_subsample = train_test_split(data_subsample, test_size=0.2, random_state=random_state_val[x])

        x_train_subsample = train_subsample[["Dip of Joint 1 (°)","Dip Direction of Joint 1 (°)","Dip of Joint 2 (°)","Dip Direction of Joint 2 (°)","Dip of Slope (°)","Dip Direction of Slope (°)","Friction Angle of Joint 1 (°)","Friction Angle of Joint 2 (°)"]]
        y_train_subsample = train_subsample[["Safety Factor Class_2"]]
        y_train_subsample = np.ravel(y_train_subsample)
        y_train_regr = train_subsample[["Safety Factor"]]
        y_train_regr = np.ravel(y_train_regr)
        
        x_test_subsample = test_subsample[["Dip of Joint 1 (°)","Dip Direction of Joint 1 (°)","Dip of Joint 2 (°)","Dip Direction of Joint 2 (°)","Dip of Slope (°)","Dip Direction of Slope (°)","Friction Angle of Joint 1 (°)","Friction Angle of Joint 2 (°)"]]
        y_test_subsample = test_subsample[["Safety Factor Class_2"]]
        y_test_subsample = np.ravel(y_test_subsample)
        y_test_regr = test_subsample[["Safety Factor"]]
        y_test_regr = np.ravel(y_test_regr)

        # train mlp model with hyperparameter tuning (5-fold cross-validation w/ randomized search)
        # make pipeline for mlp with pre-processing (standardizing the data)
        pipe_mlp = make_pipeline(StandardScaler(), MLPClassifier(random_state = 123,early_stopping=True,validation_fraction=0.2))
        random_search = RandomizedSearchCV(estimator=pipe_mlp, param_distributions=param_grid,n_iter=100, n_jobs=-1,cv=5,random_state=123)

        random_search.fit(x_train_subsample, y_train_subsample)
        
        ypred_mlp = random_search.predict(x_train_subsample)
        ypred_mlp = np.reshape(ypred_mlp,(len(ypred_mlp),1))
        
        ypred_mlp = np.asarray(np.ravel(ypred_mlp))
        ypred_mlp_float = ypred_mlp.astype(float)

        # training accuracy, f1, r2, and rmse
        accuracy_train_subsample = random_search.score(x_train_subsample,y_train_subsample)
        f1_train_subsample = f1_score(y_train_subsample,ypred_mlp,average='macro')

        # pseudo-regression (compare with actual value i.e., y_train_regr)
        r2_train_subsample = r2_score(y_train_regr,ypred_mlp_float)
        rmse_train_subsample = math.sqrt(mean_squared_error(y_train_regr,ypred_mlp_float))
        
        # append training accuracy, f1, r2, and rmse to their respective lists
        accuracy_train_subsample_list.append(accuracy_train_subsample)
        f1_train_subsample_list.append(f1_train_subsample)
       
        r2_train_subsample_list.append(r2_train_subsample)
        rmse_train_subsample_list.append(rmse_train_subsample)
    
        # test the tuned mlp model
        # predict y test
        ypred_test_mlp = random_search.predict(x_test_subsample)
        ypred_test_mlp = np.reshape(ypred_test_mlp,(len(ypred_test_mlp),1))
        
        ypred_test_mlp = np.asarray(np.ravel(ypred_test_mlp))
        ypred_test_mlp_float = ypred_test_mlp.astype(float)

        # test accuracy, f1, r2, and rmse
        accuracy_test_subsample = random_search.score(x_test_subsample,y_test_subsample)
        f1_test_subsample = f1_score(y_test_subsample,ypred_test_mlp,average='macro')

        # pseudo-regression (compare with actual value i.e., y_test_regr)
        r2_test_subsample = r2_score(y_test_regr,ypred_test_mlp_float)
        rmse_test_subsample = math.sqrt(mean_squared_error(y_test_regr,ypred_test_mlp_float))
        
        # append test accuracy, f1, r2, and rmse to their respective lists
        accuracy_test_subsample_list.append(accuracy_test_subsample)
        f1_test_subsample_list.append(f1_test_subsample)
        
        r2_test_subsample_list.append(r2_test_subsample)
        rmse_test_subsample_list.append(rmse_test_subsample)
        
    return accuracy_train_subsample_list, f1_train_subsample_list, r2_train_subsample_list, rmse_train_subsample_list, accuracy_test_subsample_list, f1_test_subsample_list, r2_test_subsample_list, rmse_test_subsample_list
    

In [9]:
# MLP model results for dataset size = 100 data points
accuracy_train_100, f1_train_100, r2_train_100, rmse_train_100, accuracy_test_100, f1_test_100, r2_test_100, rmse_test_100 = hyperparam_results(data_model,100,param_grid)



In [10]:
# training and test accuracy for MLP trained on 100 data points for four different random_state values in train/test split
print(accuracy_train_100)
print(accuracy_test_100)

[0.8125, 0.825, 0.95, 0.9]
[0.45, 0.5, 0.4, 0.55]


In [11]:
# training and test f1 for MLP trained on 100 data points for four different random_state values in train/test split
print(f1_train_100)
print(f1_test_100)

[0.5420677502310155, 0.6410880785880786, 0.7944698510114331, 0.6969327491504911]
[0.18727106227106227, 0.21036106750392464, 0.20476190476190473, 0.3068783068783069]


In [12]:
# training and test r2 for MLP trained on 100 data points for four different random_state values in train/test split
print(r2_train_100)
print(r2_test_100)

[0.2120658951095975, 0.33304160095674074, 0.11668673569739607, 0.3541425517895398]
[0.2978687714729469, 0.03456543351328567, 0.3913809138840727, 0.6692524985102211]


In [13]:
# training and test rmse for MLP trained on 100 data points for four different random_state values in train/test split
print(rmse_train_100)
print(rmse_test_100)

[1.169699130057668, 1.0964819007128974, 1.257761005550985, 1.0820791151498201]
[0.565409196621737, 0.4242449580075721, 0.38802945260653465, 0.2250151772709672]


In [14]:
# train - test accuracy for MLP trained on 100 data points for four different random_state values in train/test split
accuracy_diff_100 = np.asarray(accuracy_train_100) - np.asarray(accuracy_test_100)
accuracy_diff_100

array([0.3625, 0.325 , 0.55  , 0.35  ])

In [15]:
# train - test f1 for MLP trained on 100 data points for four different random_state values in train/test split
f1_diff_100 = np.asarray(f1_train_100) - np.asarray(f1_test_100)
f1_diff_100

array([0.35479669, 0.43072701, 0.58970795, 0.39005444])

In [16]:
# train - test r2 for MLP trained on 100 data points for four different random_state values in train/test split
r2_diff_100 = np.asarray(r2_train_100) - np.asarray(r2_test_100)
r2_diff_100

array([-0.08580288,  0.29847617, -0.27469418, -0.31510995])

In [17]:
# test - train rmse for MLP trained on 100 data points for four different random_state values in train/test split
rmse_diff_100 = np.asarray(rmse_test_100) - np.asarray(rmse_train_100)
rmse_diff_100

array([-0.60428993, -0.67223694, -0.86973155, -0.85706394])

In [18]:
# MLP model results for dataset size = 150 data points
accuracy_train_150, f1_train_150, r2_train_150, rmse_train_150, accuracy_test_150, f1_test_150, r2_test_150, rmse_test_150 = hyperparam_results(data_model,150,param_grid)



In [19]:
# training and test accuracy for MLP trained on 150 data points for four different random_state values in train/test split
print(accuracy_train_150)
print(accuracy_test_150)

[0.8333333333333334, 0.9, 0.775, 0.9583333333333334]
[0.5666666666666667, 0.4666666666666667, 0.6, 0.7]


In [20]:
# training and test f1 for MLP trained on 150 data points for four different random_state values in train/test split
print(f1_train_150)
print(f1_test_150)

[0.4681241411252576, 0.5533387445887445, 0.4307005494505494, 0.84158141304993]
[0.27308006535947715, 0.38936839557957575, 0.3384615384615385, 0.4313840155945419]


In [21]:
# training and test r2 for MLP trained on 150 data points for four different random_state values in train/test split
print(r2_train_150)
print(r2_test_150)

[0.16142362453125791, 0.29312123312776317, 0.6374004881292118, 0.4160078694617573]
[0.44475838059332606, 0.8051201103542172, 0.07279361579955423, 0.650448485443785]


In [22]:
# training and test rmse for MLP trained on 150 data points for four different random_state values in train/test split
print(rmse_train_150)
print(rmse_test_150)

[1.0095659297732924, 0.9618700796634159, 0.29576470861873944, 0.8769249556438409]
[0.5333144741106971, 0.16445282686877985, 1.9973232851522682, 0.1778520925757154]


In [23]:
# train - test accuracy for MLP trained on 150 data points for four different random_state values in train/test split
accuracy_diff_150 = np.asarray(accuracy_train_150) - np.asarray(accuracy_test_150)
accuracy_diff_150

array([0.26666667, 0.43333333, 0.175     , 0.25833333])

In [24]:
# train - test f1 for MLP trained on 150 data points for four different random_state values in train/test split
f1_diff_150 = np.asarray(f1_train_150) - np.asarray(f1_test_150)
f1_diff_150

array([0.19504408, 0.16397035, 0.09223901, 0.4101974 ])

In [25]:
# train - test r2 for MLP trained on 150 data points for four different random_state values in train/test split
r2_diff_150 = np.asarray(r2_train_150) - np.asarray(r2_test_150)
r2_diff_150

array([-0.28333476, -0.51199888,  0.56460687, -0.23444062])

In [26]:
# test - train rmse for MLP trained on 150 data points for four different random_state values in train/test split
rmse_diff_150 = np.asarray(rmse_test_150) - np.asarray(rmse_train_150)
rmse_diff_150

array([-0.47625146, -0.79741725,  1.70155858, -0.69907286])

In [27]:
# MLP model results for dataset size = 200 data points
accuracy_train_200, f1_train_200, r2_train_200, rmse_train_200, accuracy_test_200, f1_test_200, r2_test_200, rmse_test_200 = hyperparam_results(data_model,200,param_grid)



In [28]:
# training and test accuracy for MLP trained on 200 data points for four different random_state values in train/test split
print(accuracy_train_200)
print(accuracy_test_200)

[0.78125, 0.825, 0.79375, 0.75]
[0.55, 0.625, 0.55, 0.725]


In [29]:
# training and test f1 for MLP trained on 200 data points for four different random_state values in train/test split
print(f1_train_200)
print(f1_test_200)

[0.47465338891462094, 0.5815991838336704, 0.4662791707776228, 0.3689795015722702]
[0.4121594308351524, 0.25236870811905876, 0.24858870967741936, 0.4518518518518519]


In [30]:
# training and test r2 for MLP trained on 200 data points for four different random_state values in train/test split
print(r2_train_200)
print(r2_test_200)

[0.3162380450079776, 0.411945071538376, 0.30030261178485196, 0.31252484755600174]
[0.5578740096622675, 0.8494890408482471, 0.6554242772671843, 0.5991875753742458]


In [31]:
# training and test rmse for MLP trained on 200 data points for four different random_state values in train/test split
print(rmse_train_200)
print(rmse_test_200)

[0.8448007111638296, 0.7722392000369038, 0.8424100824822758, 0.8485572411306079]
[0.23060231831892627, 0.18900583690233103, 0.2823866753363167, 0.1760982087655691]


In [32]:
# train - test accuracy for MLP trained on 200 data points for four different random_state values in train/test split
accuracy_diff_200 = np.asarray(accuracy_train_200) - np.asarray(accuracy_test_200)
print(accuracy_diff_200)

# train - test f1 for MLP trained on 200 data points for four different random_state values in train/test split
f1_diff_200 = np.asarray(f1_train_200) - np.asarray(f1_test_200)
print(f1_diff_200)

# train - test r2 for MLP trained on 200 data points for four different random_state values in train/test split
r2_diff_200 = np.asarray(r2_train_200) - np.asarray(r2_test_200)
print(r2_diff_200)

# test - train rmse for MLP trained on 200 data points for four different random_state values in train/test split
rmse_diff_200 = np.asarray(rmse_test_200) - np.asarray(rmse_train_200)
print(rmse_diff_200)

[0.23125 0.2     0.24375 0.025  ]
[ 0.06249396  0.32923048  0.21769046 -0.08287235]
[-0.24163596 -0.43754397 -0.35512167 -0.28666273]
[-0.61419839 -0.58323336 -0.56002341 -0.67245903]


In [33]:
# MLP model results for dataset size = 250 data points
accuracy_train_250, f1_train_250, r2_train_250, rmse_train_250, accuracy_test_250, f1_test_250, r2_test_250, rmse_test_250 = hyperparam_results(data_model,250,param_grid)



In [34]:
# training and test accuracy for MLP trained on 250 data points for four different random_state values in train/test split
print(accuracy_train_250)
print(accuracy_test_250)

[0.735, 0.805, 0.725, 0.835]
[0.6, 0.64, 0.54, 0.68]


In [35]:
# training and test f1 for MLP trained on 250 data points for four different random_state values in train/test split
print(f1_train_250)
print(f1_test_250)

[0.35741307749880924, 0.5212046055796056, 0.27333253702818916, 0.5537234474233498]
[0.2275081595004967, 0.31063988095238093, 0.2883838383838384, 0.2891158826118176]


In [36]:
# training and test r2 for MLP trained on 250 data points for four different random_state values in train/test split
print(r2_train_250)
print(r2_test_250)

[0.4874107679690216, 0.41218527833964835, 0.2003807387449117, 0.42368028225967325]
[0.04034429911664417, 0.7809227371887649, 0.6676542643096885, 0.6317517897048427]


In [37]:
# training and test rmse for MLP trained on 250 data points for four different random_state values in train/test split
print(rmse_train_250)
print(rmse_test_250)

[0.3262864197954048, 0.7042384722826881, 0.8305347233465304, 0.6995681476658714]
[1.610682042638754, 0.21427131665069563, 0.19613637485906046, 0.26375260614027163]


In [38]:
# train - test accuracy for MLP trained on 250 data points for four different random_state values in train/test split
accuracy_diff_250 = np.asarray(accuracy_train_250) - np.asarray(accuracy_test_250)
print(accuracy_diff_250)

# train - test f1 for MLP trained on 250 data points for four different random_state values in train/test split
f1_diff_250 = np.asarray(f1_train_250) - np.asarray(f1_test_250)
print(f1_diff_250)

# train - test r2 for MLP trained on 250 data points for four different random_state values in train/test split
r2_diff_250 = np.asarray(r2_train_250) - np.asarray(r2_test_250)
print(r2_diff_250)

# test - train rmse for MLP trained on 250 data points for four different random_state values in train/test split
rmse_diff_250 = np.asarray(rmse_test_250) - np.asarray(rmse_train_250)
print(rmse_diff_250)

[0.135 0.165 0.185 0.155]
[ 0.12990492  0.21056472 -0.0150513   0.26460756]
[ 0.44706647 -0.36873746 -0.46727353 -0.20807151]
[ 1.28439562 -0.48996716 -0.63439835 -0.43581554]


In [39]:
# MLP model results for dataset size = 750 data points
accuracy_train_750, f1_train_750, r2_train_750, rmse_train_750, accuracy_test_750, f1_test_750, r2_test_750, rmse_test_750 = hyperparam_results(data_model,750,param_grid)



In [40]:
# training and test accuracy for MLP trained on 750 data points for four different random_state values in train/test split
print(accuracy_train_750)
print(accuracy_test_750)

[0.87, 0.91, 0.9266666666666666, 0.9133333333333333]
[0.8733333333333333, 0.86, 0.8866666666666667, 0.8666666666666667]


In [41]:
# training and test f1 for MLP trained on 750 data points for four different random_state values in train/test split
print(f1_train_750)
print(f1_test_750)

[0.6153132959073898, 0.6365934449085269, 0.630473993818772, 0.6832370958887254]
[0.7280659808381582, 0.61178646707102, 0.5631666327499661, 0.6806412623803928]


In [42]:
# training and test r2 for MLP trained on 750 data points for four different random_state values in train/test split
print(r2_train_750)
print(r2_test_750)

[0.8870194439139656, 0.5968538641778821, 0.5847872630689986, 0.8998208568495705]
[0.3875574413614641, 0.884408955376615, 0.9207541939147925, 0.41696609227022896]


In [43]:
# training and test rmse for MLP trained on 750 data points for four different random_state values in train/test split
print(rmse_train_750)
print(rmse_test_750)

[0.15918901023245036, 0.4149537674482491, 0.4109225184336074, 0.14449556835391578]
[0.7840751784310942, 0.14910471932741823, 0.1471597361039898, 0.7887987955446003]


In [44]:
# train - test accuracy for MLP trained on 750 data points for four different random_state values in train/test split
accuracy_diff_750 = np.asarray(accuracy_train_750) - np.asarray(accuracy_test_750)
print(accuracy_diff_750)

# train - test f1 for MLP trained on 750 data points for four different random_state values in train/test split
f1_diff_750 = np.asarray(f1_train_750) - np.asarray(f1_test_750)
print(f1_diff_750)

# train - test r2 for MLP trained on 750 data points for four different random_state values in train/test split
r2_diff_750 = np.asarray(r2_train_750) - np.asarray(r2_test_750)
print(r2_diff_750)

# test - train rmse for MLP trained on 750 data points for four different random_state values in train/test split
rmse_diff_750 = np.asarray(rmse_test_750) - np.asarray(rmse_train_750)
print(rmse_diff_750)

[-0.00333333  0.05        0.04        0.04666667]
[-0.11275268  0.02480698  0.06730736  0.00259583]
[ 0.499462   -0.28755509 -0.33596693  0.48285476]
[ 0.62488617 -0.26584905 -0.26376278  0.64430323]


In [45]:
# MLP model results for dataset size = 2000 data points
accuracy_train_2000, f1_train_2000, r2_train_2000, rmse_train_2000, accuracy_test_2000, f1_test_2000, r2_test_2000, rmse_test_2000 = hyperparam_results(data_model,2000,param_grid)

In [46]:
# training and test accuracy for MLP trained on 2000 data points for four different random_state values in train/test split
print(accuracy_train_2000)
print(accuracy_test_2000)

[0.95125, 0.938125, 0.959375, 0.93125]
[0.9025, 0.9, 0.9225, 0.9125]


In [47]:
# training and test f1 for MLP trained on 2000 data points for four different random_state values in train/test split
print(f1_train_2000)
print(f1_test_2000)

[0.824761759917753, 0.6973564998181045, 0.8071732564346895, 0.6025404636017492]
[0.6776275909858499, 0.6514562925259408, 0.7156862717278015, 0.6644690414255633]


In [48]:
# training and test r2 for MLP trained on 2000 data points for four different random_state values in train/test split
print(r2_train_2000)
print(r2_test_2000)

[0.7305349009783579, 0.7226153125307601, 0.7219769808666169, 0.7853792095615689]
[0.8171585788735379, 0.8532447222509394, 0.9217845824745224, 0.42879713697474797]


In [49]:
# training and test rmse for MLP trained on 2000 data points for four different random_state values in train/test split
print(rmse_train_2000)
print(rmse_test_2000)

[0.28031217482119547, 0.2923426634748405, 0.2957520336785653, 0.22243179616526523]
[0.23415388335780019, 0.18532214203844147, 0.12754552217218693, 0.557624155525232]


In [50]:
# train - test accuracy for MLP trained on 2000 data points for four different random_state values in train/test split
accuracy_diff_2000 = np.asarray(accuracy_train_2000) - np.asarray(accuracy_test_2000)
print(accuracy_diff_2000)

# train - test f1 for MLP trained on 2000 data points for four different random_state values in train/test split
f1_diff_2000 = np.asarray(f1_train_2000) - np.asarray(f1_test_2000)
print(f1_diff_2000)

# train - test r2 for MLP trained on 2000 data points for four different random_state values in train/test split
r2_diff_2000 = np.asarray(r2_train_2000) - np.asarray(r2_test_2000)
print(r2_diff_2000)

# test - train rmse for MLP trained on 2000 data points for four different random_state values in train/test split
rmse_diff_2000 = np.asarray(rmse_test_2000) - np.asarray(rmse_train_2000)
print(rmse_diff_2000)

[0.04875  0.038125 0.036875 0.01875 ]
[ 0.14713417  0.04590021  0.09148698 -0.06192858]
[-0.08662368 -0.13062941 -0.1998076   0.35658207]
[-0.04615829 -0.10702052 -0.16820651  0.33519236]
