# Regression with a multilayer perceptron (neural network)

In [1]:
import numpy as np
import scipy.sparse as sp
np.random.seed(12)

import warnings
#Comment this to turn on warnings
warnings.filterwarnings('ignore')

from model_comparison import model_comparison
from resample import resample
import algorithms
import matplotlib.pyplot as plt
import seaborn as sns
import netCDF4 as n
from sklearn.linear_model import LinearRegression

from mpl_toolkits.axes_grid1 import make_axes_locatable
%matplotlib inline

from utils import train_test_split
from ann import NeuralNetMLP
#%matplotlib notebook

# Reading in the data

In [None]:
# reading test
path = "./files/"
filenames = ["specific_humidity_Europa_sp.nc", "relative_humidity_Europa_sp.nc", "pressure_Europa_sp.nc",  
             "temperature_Europa_sp.nc", "total_cloud_cover_Europa_sp.nc"]


cloud = n.Dataset(path + filenames[-1], "r")
relative = n.Dataset(path + filenames[1], "r")
specific = n.Dataset(path + filenames[0], "r")
pressure = n.Dataset(path + filenames[2], "r")
temperature = n.Dataset(path + filenames[3], "r")

In [12]:
relative.variables["r"]

<class 'netCDF4._netCDF4.Variable'>
int16 r(time, level, latitude, longitude)
    scale_factor: 0.0020292186290296298
    add_offset: 56.78843174459784
    _FillValue: -32767
    missing_value: -32767
    units: %
    long_name: Relative humidity
    standard_name: relative_humidity
unlimited dimensions: time
current shape = (32, 6, 61, 77)
filling on

In [13]:
specific.variables["q"]

<class 'netCDF4._netCDF4.Variable'>
int16 q(time, level, latitude, longitude)
    scale_factor: 2.1328394656708057e-07
    add_offset: 0.006964579015321184
    _FillValue: -32767
    missing_value: -32767
    units: kg kg**-1
    long_name: Specific humidity
    standard_name: specific_humidity
unlimited dimensions: time
current shape = (32, 6, 61, 77)
filling on

In [15]:
cloud.variables["tcc"]

<class 'netCDF4._netCDF4.Variable'>
int16 tcc(time, latitude, longitude)
    scale_factor: 1.525948758640685e-05
    add_offset: 0.49999237025720666
    _FillValue: -32767
    missing_value: -32767
    units: (0 - 1)
    long_name: Total cloud cover
    standard_name: cloud_area_fraction
unlimited dimensions: time
current shape = (32, 61, 77)
filling on

In [16]:
pressure.variables["sp"]

<class 'netCDF4._netCDF4.Variable'>
int16 sp(time, latitude, longitude)
    scale_factor: 0.38392012421222893
    add_offset: 91282.2142899379
    _FillValue: -32767
    missing_value: -32767
    units: Pa
    long_name: Surface pressure
    standard_name: surface_air_pressure
unlimited dimensions: time
current shape = (32, 61, 77)
filling on

In [14]:
t = 0 # timestep

#print(cloud.variables)
tcc = cloud.variables["tcc"][t][:][:].data

# Retriving ground values, these are available at six different pressure levels. 
rel = relative.variables["r"][t][0][:][:].data
#level = relative.variables["level"][:][0].data
spe = specific.variables["q"][t][0][:][:].data

surf_pre = pressure.variables["sp"][t][:][:].data
temp = temperature.variables["t2m"][t][:][:].data

In [21]:
spe.max(), spe.min() # Use logit function

(0.011704174875934848, 0.0007891556264179329)

In [4]:
def logit_inv(x): # sigmoid?
    return np.exp(x)/(1+np.exp(x))

def logit(x):
    return np.log10((x + 1e-12)/(1+1e-12-x))

In [77]:
np.log10(0+1e-12)

-12.0

In [49]:
np.log10(1./0.000000000000000000000000000000000000000000001)

45.0

In [71]:
logit(tcc[0].flatten()).reshape((4697, 1))

array([[  4.5154168 ],
       [  4.5154168 ],
       [ 15.65355977],
       ...,
       [-11.69899892],
       [-11.69899892],
       [-11.69899892]])

In [5]:
# for one certain timestep 
t = 0
# tcc[t].flatten().reshape((4697, 1))
y = logit(tcc[t].flatten()).reshape((4697, 1))
X = np.array([rel[t][0][:][:].flatten(), logit(spe[t][0][:][:].flatten()), surf_pre[t].flatten(), temp[t].flatten()]).transpose()

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, split_size=0.2)

In [74]:
X_train.shape, X_test.shape

((3758, 4), (939, 4))

In [75]:
y_train.shape, y_test.shape

((3758, 1), (939, 1))

# Two layer MLP (Neural network )

## Used to make table in report. 

In [76]:
# Experimental setup
metric = []

eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
#lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [1, 10, 50, 100]
batchsize = [1]


for epoch in epochs:
    for e in eta:
        for batch in batchsize:
            ann = NeuralNetMLP(n_hidden=30, 
                               epochs=epoch, 
                               eta=e, 
                               shuffle=True,
                               batch_size=batch, 
                               activation='sigmoid', 
                               tpe = "regression")

            ann.fit(X_train, y_train, X_test, y_test)
            ann.predict(X_test)
            # returns a list of the mean mse score for different epochs or batches
            metric.append(ann.eval_["valid_preform"])
            print("Sigmoid for nr of epochs "+str(epoch) + " and eta: " + str(e) + "  batchsize = " + str(batch) +"   performance is " + str(np.nanmean(ann.eval_["valid_preform"])))
        print("---------------------------")

Sigmoid for nr of epochs 1 and eta: 0.0001  batchsize = 1   performance is 71.84957274944483
---------------------------
Sigmoid for nr of epochs 1 and eta: 0.001  batchsize = 1   performance is 72.09490758139931
---------------------------
Sigmoid for nr of epochs 1 and eta: 0.01  batchsize = 1   performance is 94.7770890143402
---------------------------
Sigmoid for nr of epochs 1 and eta: 0.1  batchsize = 1   performance is 93.22126657108744
---------------------------
Sigmoid for nr of epochs 1 and eta: 1.0  batchsize = 1   performance is nan
---------------------------
Sigmoid for nr of epochs 10 and eta: 0.0001  batchsize = 1   performance is 71.90269623657578
---------------------------
Sigmoid for nr of epochs 10 and eta: 0.001  batchsize = 1   performance is 72.2658372708311
---------------------------
Sigmoid for nr of epochs 10 and eta: 0.01  batchsize = 1   performance is 76.62190087295724
---------------------------
Sigmoid for nr of epochs 10 and eta: 0.1  batchsize = 1  

# Performance with 10 neurons

In [20]:
# Experimental setup
metric = []

eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
#lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [1, 10, 50, 100]
batchsize = [1]

# Om vi skal bruke elu så må vi tweeke alpha i tilegg.


for epoch in epochs:
    for e in eta:
        for batch in batchsize:
            ann = NeuralNetMLP(n_hidden=10, 
                               epochs=epoch, 
                               eta=e, 
                               alpha = 0.0001,
                               shuffle=True,
                               batch_size=batch, 
                               activation='sigmoid', 
                               tpe = "regression")

            ann.fit(X_train, y_train, X_test, y_test)
            ann.predict(X_test)
            # returns a list of the mean mse score for different epochs or batches
            metric.append(ann.eval_["valid_preform"])
            print("ELU for nr of epochs "+str(epoch) + " and eta: " + str(e) + "  batchsize = " + str(batch) +"   performance is " + str(np.nanmean(ann.eval_["valid_preform"])))
        print("---------------------------")

ELU for nr of epochs 1 and eta: 0.0001  batchsize = 1   performance is 0.14517574693599355
---------------------------
ELU for nr of epochs 1 and eta: 0.001  batchsize = 1   performance is 0.13781533835655296
---------------------------
ELU for nr of epochs 1 and eta: 0.01  batchsize = 1   performance is 0.16857858768692663
---------------------------
ELU for nr of epochs 1 and eta: 0.1  batchsize = 1   performance is 0.22421847112002705
---------------------------
ELU for nr of epochs 1 and eta: 1.0  batchsize = 1   performance is nan
---------------------------
ELU for nr of epochs 10 and eta: 0.0001  batchsize = 1   performance is 0.13766677974848643
---------------------------
ELU for nr of epochs 10 and eta: 0.001  batchsize = 1   performance is 0.13793272898120018
---------------------------
ELU for nr of epochs 10 and eta: 0.01  batchsize = 1   performance is 0.14065829446965844
---------------------------
ELU for nr of epochs 10 and eta: 0.1  batchsize = 1   performance is 0.20

# Performance with 5 neurons

In [21]:
# Experimental setup
metric = []

eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
#lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [1, 10, 50, 100]
batchsize = [1]

# Om vi skal bruke elu så må vi tweeke alpha i tilegg.


for epoch in epochs:
    for e in eta:
        for batch in batchsize:
            ann = NeuralNetMLP(n_hidden=5, 
                               epochs=epoch, 
                               eta=e, 
                               alpha = 0.0001,
                               shuffle=True,
                               batch_size=batch, 
                               activation='sigmoid', 
                               tpe = "regression")

            ann.fit(X_train, y_train, X_test, y_test)
            ann.predict(X_test)
            # returns a list of the mean mse score for different epochs or batches
            metric.append(ann.eval_["valid_preform"])
            print("ELU for nr of epochs "+str(epoch) + " and eta: " + str(e) + "  batchsize = " + str(batch) +"   performance is " + str(np.nanmean(ann.eval_["valid_preform"])))
        print("---------------------------")

ELU for nr of epochs 1 and eta: 0.0001  batchsize = 1   performance is 0.19290120272586073
---------------------------
ELU for nr of epochs 1 and eta: 0.001  batchsize = 1   performance is 0.13747038903084544
---------------------------
ELU for nr of epochs 1 and eta: 0.01  batchsize = 1   performance is 0.13782926760583414
---------------------------
ELU for nr of epochs 1 and eta: 0.1  batchsize = 1   performance is 0.21863717437325492
---------------------------
ELU for nr of epochs 1 and eta: 1.0  batchsize = 1   performance is 2863.0176969732825
---------------------------
ELU for nr of epochs 10 and eta: 0.0001  batchsize = 1   performance is 0.1422118117514488
---------------------------
ELU for nr of epochs 10 and eta: 0.001  batchsize = 1   performance is 0.1377025627708945
---------------------------
ELU for nr of epochs 10 and eta: 0.01  batchsize = 1   performance is 0.1398838336441726
---------------------------
ELU for nr of epochs 10 and eta: 0.1  batchsize = 1   perform

In [None]:
len(test_m)

In [None]:
plt.figure(figsize=(10,6))
x = np.arange(len(test_m[0]))

for i in range(2):
    plt.plot(x, test_m[i], label = "testdata, eta = %.4f" %eta[i])
    plt.plot(x, train_m[i], label = "traindata, eta = %.4f" %eta[i])
    
plt.title(" Performance of MLPRegressor, sigmoid  ", fontsize = 20)
xmin, xmax, ymin, ymax = plt.axis([0,50,0,50])
plt.xlabel(" Epoch ", fontsize=15)
plt.ylabel("  MSE  ", fontsize=15)
plt.legend()
plt.savefig("./results/figures/MLPRegressor_sigmoid_MSE_50_epochs.png")

# Exploding gradients?

In [None]:
ann = NeuralNetMLP(batch_size=10,
                   epochs=50,
                   n_hidden=30,
                   eta = 0.1,
                   activation="sigmoid",
                   tpe = "regression")

ann.fit(X_train, y_train, X_valid, y_valid)
ann.predict(X_valid)

ann.eval_["valid_preform"], ann.eval_["train_preform"]

# Comparing to scikitlearn MLPregression

In [22]:
from sklearn.neural_network import MLPRegressor
from utils import mean_squared_error

In [32]:
n_nodes = [10,30,50,100, 500]
eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]

for n in n_nodes:
    for e in eta:
        for l in lmd:
            mlp = MLPRegressor(hidden_layer_sizes=(n, ), 
                               activation = 'logistic', # this is the sigmoid activation function
                               solver = "sgd", 
                               alpha = l, 
                               batch_size =10, 
                               learning_rate_init=e)

            mlp.fit(X_train, y_train)
            y_pred = mlp.predict(X_test)
            #logistic activation uses the sigmoid function 
            mse = mean_squared_error(y_pred, y_test)
            print(" for nr nodes:   " + str(n) + " eta : " + str(e) + " penalty l  " + str(l) + "   mse is " + str(mse))

 for nr nodes:   10 eta : 0.0001 penalty l  0.0001   mse is 45.28235982501334
 for nr nodes:   10 eta : 0.0001 penalty l  0.001   mse is 45.27609185643172
 for nr nodes:   10 eta : 0.0001 penalty l  0.01   mse is 45.27837742054369
 for nr nodes:   10 eta : 0.0001 penalty l  0.1   mse is 45.32869467048834
 for nr nodes:   10 eta : 0.0001 penalty l  1.0   mse is 45.31525862456218
 for nr nodes:   10 eta : 0.0001 penalty l  10   mse is 45.299083129186826
 for nr nodes:   10 eta : 0.001 penalty l  0.0001   mse is 45.20905899652674
 for nr nodes:   10 eta : 0.001 penalty l  0.001   mse is 45.22069079191869
 for nr nodes:   10 eta : 0.001 penalty l  0.01   mse is 45.237094655853035
 for nr nodes:   10 eta : 0.001 penalty l  0.1   mse is 45.24567057196377
 for nr nodes:   10 eta : 0.001 penalty l  1.0   mse is 45.21839460367592
 for nr nodes:   10 eta : 0.001 penalty l  10   mse is 45.47477546800019
 for nr nodes:   10 eta : 0.01 penalty l  0.0001   mse is 46.08555636371623
 for nr nodes:   1



 for nr nodes:   10 eta : 1.0 penalty l  0.0001   mse is nan




 for nr nodes:   10 eta : 1.0 penalty l  0.001   mse is nan




 for nr nodes:   10 eta : 1.0 penalty l  0.01   mse is nan




 for nr nodes:   10 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   10 eta : 1.0 penalty l  1.0   mse is nan
 for nr nodes:   10 eta : 1.0 penalty l  10   mse is 59.07243007589483
 for nr nodes:   30 eta : 0.0001 penalty l  0.0001   mse is 45.2072806687496
 for nr nodes:   30 eta : 0.0001 penalty l  0.001   mse is 45.258980869113024
 for nr nodes:   30 eta : 0.0001 penalty l  0.01   mse is 45.29970557546171
 for nr nodes:   30 eta : 0.0001 penalty l  0.1   mse is 45.26845078437098
 for nr nodes:   30 eta : 0.0001 penalty l  1.0   mse is 45.254644652403236
 for nr nodes:   30 eta : 0.0001 penalty l  10   mse is 45.22886727928217
 for nr nodes:   30 eta : 0.001 penalty l  0.0001   mse is 45.232433900162654
 for nr nodes:   30 eta : 0.001 penalty l  0.001   mse is 45.24347303633534
 for nr nodes:   30 eta : 0.001 penalty l  0.01   mse is 45.21745396053589
 for nr nodes:   30 eta : 0.001 penalty l  0.1   mse is 45.78438736892228
 for nr nodes:   30 eta : 0.001 penalty l  1.0   mse is 45.29776328802606
 for nr nodes:   30 eta : 0.001 penalt



 for nr nodes:   30 eta : 0.1 penalty l  0.0001   mse is nan




 for nr nodes:   30 eta : 0.1 penalty l  0.001   mse is nan




 for nr nodes:   30 eta : 0.1 penalty l  0.01   mse is nan




 for nr nodes:   30 eta : 0.1 penalty l  0.1   mse is nan
 for nr nodes:   30 eta : 0.1 penalty l  1.0   mse is 54.67161780848756
 for nr nodes:   30 eta : 0.1 penalty l  10   mse is 50.66711636483213




 for nr nodes:   30 eta : 1.0 penalty l  0.0001   mse is nan




 for nr nodes:   30 eta : 1.0 penalty l  0.001   mse is nan




 for nr nodes:   30 eta : 1.0 penalty l  0.01   mse is nan




 for nr nodes:   30 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   30 eta : 1.0 penalty l  1.0   mse is nan
 for nr nodes:   30 eta : 1.0 penalty l  10   mse is 48.19783036028535
 for nr nodes:   50 eta : 0.0001 penalty l  0.0001   mse is 45.21105345287795
 for nr nodes:   50 eta : 0.0001 penalty l  0.001   mse is 45.20737708409501
 for nr nodes:   50 eta : 0.0001 penalty l  0.01   mse is 45.28074939380891
 for nr nodes:   50 eta : 0.0001 penalty l  0.1   mse is 45.34820424873189
 for nr nodes:   50 eta : 0.0001 penalty l  1.0   mse is 45.39538087947466
 for nr nodes:   50 eta : 0.0001 penalty l  10   mse is 45.38471539447498
 for nr nodes:   50 eta : 0.001 penalty l  0.0001   mse is 45.20728926347032
 for nr nodes:   50 eta : 0.001 penalty l  0.001   mse is 45.37254263896045
 for nr nodes:   50 eta : 0.001 penalty l  0.01   mse is 45.623233636653794
 for nr nodes:   50 eta : 0.001 penalty l  0.1   mse is 45.20129324423518
 for nr nodes:   50 eta : 0.001 penalty l  1.0   mse is 45.20299164049076
 for nr nodes:   50 eta : 0.001 penalty



 for nr nodes:   50 eta : 0.1 penalty l  0.0001   mse is nan




 for nr nodes:   50 eta : 0.1 penalty l  0.001   mse is nan




 for nr nodes:   50 eta : 0.1 penalty l  0.01   mse is nan




 for nr nodes:   50 eta : 0.1 penalty l  0.1   mse is nan




 for nr nodes:   50 eta : 0.1 penalty l  1.0   mse is nan
 for nr nodes:   50 eta : 0.1 penalty l  10   mse is 46.47044122726747




 for nr nodes:   50 eta : 1.0 penalty l  0.0001   mse is nan




 for nr nodes:   50 eta : 1.0 penalty l  0.001   mse is nan




 for nr nodes:   50 eta : 1.0 penalty l  0.01   mse is nan




 for nr nodes:   50 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   50 eta : 1.0 penalty l  1.0   mse is nan
 for nr nodes:   50 eta : 1.0 penalty l  10   mse is 47.26953348052187
 for nr nodes:   100 eta : 0.0001 penalty l  0.0001   mse is 45.484149168019044
 for nr nodes:   100 eta : 0.0001 penalty l  0.001   mse is 45.2075989763467
 for nr nodes:   100 eta : 0.0001 penalty l  0.01   mse is 45.353004033872125
 for nr nodes:   100 eta : 0.0001 penalty l  0.1   mse is 45.39416538632533
 for nr nodes:   100 eta : 0.0001 penalty l  1.0   mse is 45.306281659661494
 for nr nodes:   100 eta : 0.0001 penalty l  10   mse is 45.37117271509164
 for nr nodes:   100 eta : 0.001 penalty l  0.0001   mse is 45.44187192917479
 for nr nodes:   100 eta : 0.001 penalty l  0.001   mse is 45.328553527994586
 for nr nodes:   100 eta : 0.001 penalty l  0.01   mse is 45.60726955974989
 for nr nodes:   100 eta : 0.001 penalty l  0.1   mse is 47.09627000437789
 for nr nodes:   100 eta : 0.001 penalty l  1.0   mse is 46.01167608072215
 for nr nodes:   100 eta :



 for nr nodes:   100 eta : 0.1 penalty l  0.0001   mse is nan




 for nr nodes:   100 eta : 0.1 penalty l  0.001   mse is nan




 for nr nodes:   100 eta : 0.1 penalty l  0.01   mse is nan




 for nr nodes:   100 eta : 0.1 penalty l  0.1   mse is nan




 for nr nodes:   100 eta : 0.1 penalty l  1.0   mse is nan




 for nr nodes:   100 eta : 0.1 penalty l  10   mse is nan




 for nr nodes:   100 eta : 1.0 penalty l  0.0001   mse is nan




 for nr nodes:   100 eta : 1.0 penalty l  0.001   mse is nan




 for nr nodes:   100 eta : 1.0 penalty l  0.01   mse is nan




 for nr nodes:   100 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   100 eta : 1.0 penalty l  1.0   mse is nan
 for nr nodes:   100 eta : 1.0 penalty l  10   mse is 134.81167268715743
 for nr nodes:   500 eta : 0.0001 penalty l  0.0001   mse is 45.39024484746799
 for nr nodes:   500 eta : 0.0001 penalty l  0.001   mse is 45.28034248050086
 for nr nodes:   500 eta : 0.0001 penalty l  0.01   mse is 45.19910914294125
 for nr nodes:   500 eta : 0.0001 penalty l  0.1   mse is 46.29838414312053
 for nr nodes:   500 eta : 0.0001 penalty l  1.0   mse is 45.62904153358959
 for nr nodes:   500 eta : 0.0001 penalty l  10   mse is 45.21302131782577
 for nr nodes:   500 eta : 0.001 penalty l  0.0001   mse is 49.80359781794127
 for nr nodes:   500 eta : 0.001 penalty l  0.001   mse is 45.201040006628716
 for nr nodes:   500 eta : 0.001 penalty l  0.01   mse is 49.30923285286054
 for nr nodes:   500 eta : 0.001 penalty l  0.1   mse is 48.29965763509984
 for nr nodes:   500 eta : 0.001 penalty l  1.0   mse is 49.823817105821846
 for nr nodes:   500 eta



 for nr nodes:   500 eta : 0.01 penalty l  0.0001   mse is nan




 for nr nodes:   500 eta : 0.01 penalty l  0.001   mse is nan




 for nr nodes:   500 eta : 0.01 penalty l  0.01   mse is nan




 for nr nodes:   500 eta : 0.01 penalty l  0.1   mse is nan




 for nr nodes:   500 eta : 0.01 penalty l  1.0   mse is nan




 for nr nodes:   500 eta : 0.01 penalty l  10   mse is nan




 for nr nodes:   500 eta : 0.1 penalty l  0.0001   mse is nan




 for nr nodes:   500 eta : 0.1 penalty l  0.001   mse is nan




 for nr nodes:   500 eta : 0.1 penalty l  0.01   mse is nan




 for nr nodes:   500 eta : 0.1 penalty l  0.1   mse is nan




 for nr nodes:   500 eta : 0.1 penalty l  1.0   mse is nan




 for nr nodes:   500 eta : 0.1 penalty l  10   mse is nan




 for nr nodes:   500 eta : 1.0 penalty l  0.0001   mse is nan




 for nr nodes:   500 eta : 1.0 penalty l  0.001   mse is nan




 for nr nodes:   500 eta : 1.0 penalty l  0.01   mse is nan




 for nr nodes:   500 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   500 eta : 1.0 penalty l  1.0   mse is nan
 for nr nodes:   500 eta : 1.0 penalty l  10   mse is 47.04548228108013


# Testing adam optimizer --> Converges everytime, not better results tho

In [33]:
n_nodes = [10,30,50,100, 500]
eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]

for n in n_nodes:
    for e in eta:
        for l in lmd:
            mlp = MLPRegressor(hidden_layer_sizes=(n, ), 
                               activation = 'logistic', # this is the sigmoid activation function
                               solver = "adam", 
                               alpha = l, 
                               batch_size =10, 
                               learning_rate_init=e)

            mlp.fit(X_train, y_train)
            y_pred = mlp.predict(X_test)
            #logistic activation uses the sigmoid function 
            mse = mean_squared_error(y_pred, y_test)
            print(" for nr nodes:   " + str(n) + " eta : " + str(e) + " penalty l  " + str(l) + "   mse is " + str(mse))

 for nr nodes:   10 eta : 0.0001 penalty l  0.0001   mse is 45.373028928000636
 for nr nodes:   10 eta : 0.0001 penalty l  0.001   mse is 46.790845347155745
 for nr nodes:   10 eta : 0.0001 penalty l  0.01   mse is 45.306195905067604
 for nr nodes:   10 eta : 0.0001 penalty l  0.1   mse is 47.11456317774389
 for nr nodes:   10 eta : 0.0001 penalty l  1.0   mse is 46.39152481277635
 for nr nodes:   10 eta : 0.0001 penalty l  10   mse is 46.422861923336974
 for nr nodes:   10 eta : 0.001 penalty l  0.0001   mse is 45.3863877977941
 for nr nodes:   10 eta : 0.001 penalty l  0.001   mse is 45.28696605337305
 for nr nodes:   10 eta : 0.001 penalty l  0.01   mse is 45.289459583545224
 for nr nodes:   10 eta : 0.001 penalty l  0.1   mse is 45.262205747144485
 for nr nodes:   10 eta : 0.001 penalty l  1.0   mse is 45.379169578739074
 for nr nodes:   10 eta : 0.001 penalty l  10   mse is 45.61156267328043
 for nr nodes:   10 eta : 0.01 penalty l  0.0001   mse is 45.202582385961385
 for nr nodes

 for nr nodes:   100 eta : 0.1 penalty l  0.01   mse is 47.3790662378246
 for nr nodes:   100 eta : 0.1 penalty l  0.1   mse is 46.81784490130995
 for nr nodes:   100 eta : 0.1 penalty l  1.0   mse is 45.61315572571533
 for nr nodes:   100 eta : 0.1 penalty l  10   mse is 51.52674061199963
 for nr nodes:   100 eta : 1.0 penalty l  0.0001   mse is 59.54545841601985
 for nr nodes:   100 eta : 1.0 penalty l  0.001   mse is 49.42107671307506
 for nr nodes:   100 eta : 1.0 penalty l  0.01   mse is 50.381632152702224
 for nr nodes:   100 eta : 1.0 penalty l  0.1   mse is 58.703410390392186
 for nr nodes:   100 eta : 1.0 penalty l  1.0   mse is 76.34668290668958
 for nr nodes:   100 eta : 1.0 penalty l  10   mse is 45.201123388161214
 for nr nodes:   500 eta : 0.0001 penalty l  0.0001   mse is 45.25586663412331
 for nr nodes:   500 eta : 0.0001 penalty l  0.001   mse is 45.38037247252661
 for nr nodes:   500 eta : 0.0001 penalty l  0.01   mse is 45.310122269383925
 for nr nodes:   500 eta : 0

# Testing RELU activation function 

In [None]:
n_nodes = [10,30,50,100, 500]
eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]

for n in n_nodes:
    for e in eta:
        for l in lmd:
            mlp = MLPRegressor(hidden_layer_sizes=(n, ), 
                               activation = 'relu', # this is the sigmoid activation function
                               solver = "sgd", 
                               alpha = l, 
                               batch_size =10, 
                               learning_rate_init=e)

            mlp.fit(X_train, y_train)
            y_pred = mlp.predict(X_test)
            #logistic activation uses the sigmoid function 
            mse = mean_squared_error(y_pred, y_test)
            print(" for nr nodes:   " + str(n) + " eta : " + str(e) + " penalty l  " + str(l) + "   mse is " + str(mse))

 for nr nodes:   10 eta : 0.0001 penalty l  0.0001   mse is 9.308603093307949e+23
 for nr nodes:   10 eta : 0.0001 penalty l  0.001   mse is 1.9802784596634045e+18
 for nr nodes:   10 eta : 0.0001 penalty l  0.01   mse is 130.95670985343872
 for nr nodes:   10 eta : 0.0001 penalty l  0.1   mse is 48.79353598916291




 for nr nodes:   10 eta : 0.0001 penalty l  1.0   mse is 45.28680461905824
 for nr nodes:   10 eta : 0.0001 penalty l  10   mse is 45.2783091138014
 for nr nodes:   10 eta : 0.001 penalty l  0.0001   mse is 3990899017764778.0
 for nr nodes:   10 eta : 0.001 penalty l  0.001   mse is 2.9405664378847686e+17
 for nr nodes:   10 eta : 0.001 penalty l  0.01   mse is 82527217878.32169




 for nr nodes:   10 eta : 0.001 penalty l  0.1   mse is 45.28564459858244
 for nr nodes:   10 eta : 0.001 penalty l  1.0   mse is 45.32109249542633
 for nr nodes:   10 eta : 0.001 penalty l  10   mse is 45.28876889844545
 for nr nodes:   10 eta : 0.01 penalty l  0.0001   mse is 45.79193200334896
 for nr nodes:   10 eta : 0.01 penalty l  0.001   mse is 45.26555979361497




 for nr nodes:   10 eta : 0.01 penalty l  0.01   mse is 45.43496666459698
 for nr nodes:   10 eta : 0.01 penalty l  0.1   mse is 45.19913521039744
 for nr nodes:   10 eta : 0.01 penalty l  1.0   mse is 45.27638339763154




 for nr nodes:   10 eta : 0.01 penalty l  10   mse is nan
 for nr nodes:   10 eta : 0.1 penalty l  0.0001   mse is 45.440836669721506




 for nr nodes:   10 eta : 0.1 penalty l  0.001   mse is 45.201294739528514




 for nr nodes:   10 eta : 0.1 penalty l  0.01   mse is 46.22849119100998
 for nr nodes:   10 eta : 0.1 penalty l  0.1   mse is 45.23295709202111




 for nr nodes:   10 eta : 0.1 penalty l  1.0   mse is nan




 for nr nodes:   10 eta : 0.1 penalty l  10   mse is nan




 for nr nodes:   10 eta : 1.0 penalty l  0.0001   mse is 45.37286058846592




 for nr nodes:   10 eta : 1.0 penalty l  0.001   mse is 55.947977117784696
 for nr nodes:   10 eta : 1.0 penalty l  0.01   mse is 77.82869130157012




 for nr nodes:   10 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   10 eta : 1.0 penalty l  1.0   mse is nan




 for nr nodes:   10 eta : 1.0 penalty l  10   mse is nan
 for nr nodes:   30 eta : 0.0001 penalty l  0.0001   mse is 3.28354401272616e+23
 for nr nodes:   30 eta : 0.0001 penalty l  0.001   mse is 1.0717767202806046e+25
 for nr nodes:   30 eta : 0.0001 penalty l  0.01   mse is 9.983680515537177e+18
 for nr nodes:   30 eta : 0.0001 penalty l  0.1   mse is 3.3584921136102875e+23




 for nr nodes:   30 eta : 0.0001 penalty l  1.0   mse is 45.2709315049567
 for nr nodes:   30 eta : 0.0001 penalty l  10   mse is 4.492480537166175e+28
 for nr nodes:   30 eta : 0.001 penalty l  0.0001   mse is 414955932789.8378
 for nr nodes:   30 eta : 0.001 penalty l  0.001   mse is 2.4808006696552614e+17
 for nr nodes:   30 eta : 0.001 penalty l  0.01   mse is 1.6828876813916418e+16




 for nr nodes:   30 eta : 0.001 penalty l  0.1   mse is 45.26241102967534
 for nr nodes:   30 eta : 0.001 penalty l  1.0   mse is 45.30755696017007
 for nr nodes:   30 eta : 0.001 penalty l  10   mse is 3.756490565118118e+135
 for nr nodes:   30 eta : 0.01 penalty l  0.0001   mse is 45.678523824484934
 for nr nodes:   30 eta : 0.01 penalty l  0.001   mse is 45.20834086894912




 for nr nodes:   30 eta : 0.01 penalty l  0.01   mse is 46.87061098726147
 for nr nodes:   30 eta : 0.01 penalty l  0.1   mse is 45.90875020271552
 for nr nodes:   30 eta : 0.01 penalty l  1.0   mse is 170059.71524443425




 for nr nodes:   30 eta : 0.01 penalty l  10   mse is nan
 for nr nodes:   30 eta : 0.1 penalty l  0.0001   mse is 52.403694483944896




 for nr nodes:   30 eta : 0.1 penalty l  0.001   mse is 49.062912512331614




 for nr nodes:   30 eta : 0.1 penalty l  0.01   mse is 45.24128076202347
 for nr nodes:   30 eta : 0.1 penalty l  0.1   mse is 380233.2790726699




 for nr nodes:   30 eta : 0.1 penalty l  1.0   mse is nan




 for nr nodes:   30 eta : 0.1 penalty l  10   mse is nan




 for nr nodes:   30 eta : 1.0 penalty l  0.0001   mse is 50.784262723523945
 for nr nodes:   30 eta : 1.0 penalty l  0.001   mse is 45.28074150914871
 for nr nodes:   30 eta : 1.0 penalty l  0.01   mse is 45.57498295630577




 for nr nodes:   30 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   30 eta : 1.0 penalty l  1.0   mse is nan




 for nr nodes:   30 eta : 1.0 penalty l  10   mse is nan
 for nr nodes:   50 eta : 0.0001 penalty l  0.0001   mse is 4.5403112975424323e+24
 for nr nodes:   50 eta : 0.0001 penalty l  0.001   mse is 2.4340009709811096e+22
 for nr nodes:   50 eta : 0.0001 penalty l  0.01   mse is 1.4427443931744347e+24
 for nr nodes:   50 eta : 0.0001 penalty l  0.1   mse is 7.492551697498456e+23




 for nr nodes:   50 eta : 0.0001 penalty l  1.0   mse is 45.282616544963425
 for nr nodes:   50 eta : 0.0001 penalty l  10   mse is 1.8242140441953017e+51
 for nr nodes:   50 eta : 0.001 penalty l  0.0001   mse is 1.033263400705195e+16
 for nr nodes:   50 eta : 0.001 penalty l  0.001   mse is 4471787933524.834
 for nr nodes:   50 eta : 0.001 penalty l  0.01   mse is 1577639278651110.2




 for nr nodes:   50 eta : 0.001 penalty l  0.1   mse is 45.23550699991762
 for nr nodes:   50 eta : 0.001 penalty l  1.0   mse is 45.30451868142538




 for nr nodes:   50 eta : 0.001 penalty l  10   mse is nan
 for nr nodes:   50 eta : 0.01 penalty l  0.0001   mse is 45.51018288085773
 for nr nodes:   50 eta : 0.01 penalty l  0.001   mse is 45.199208994923154




 for nr nodes:   50 eta : 0.01 penalty l  0.01   mse is 45.208223277919636
 for nr nodes:   50 eta : 0.01 penalty l  0.1   mse is 45.29065596451068
 for nr nodes:   50 eta : 0.01 penalty l  1.0   mse is 45.20528183098614




 for nr nodes:   50 eta : 0.01 penalty l  10   mse is nan
 for nr nodes:   50 eta : 0.1 penalty l  0.0001   mse is 46.38187692004034




 for nr nodes:   50 eta : 0.1 penalty l  0.001   mse is 45.19918722849603
 for nr nodes:   50 eta : 0.1 penalty l  0.01   mse is 45.340499918501266
 for nr nodes:   50 eta : 0.1 penalty l  0.1   mse is 45.58433302319376




 for nr nodes:   50 eta : 0.1 penalty l  1.0   mse is nan




 for nr nodes:   50 eta : 0.1 penalty l  10   mse is nan




 for nr nodes:   50 eta : 1.0 penalty l  0.0001   mse is 53.15560941885829
 for nr nodes:   50 eta : 1.0 penalty l  0.001   mse is 64.55600223771137
 for nr nodes:   50 eta : 1.0 penalty l  0.01   mse is 47.149574491777614




 for nr nodes:   50 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   50 eta : 1.0 penalty l  1.0   mse is nan




 for nr nodes:   50 eta : 1.0 penalty l  10   mse is nan
 for nr nodes:   100 eta : 0.0001 penalty l  0.0001   mse is 4.5796822390451474e+24
 for nr nodes:   100 eta : 0.0001 penalty l  0.001   mse is 6.136956640756174e+22
 for nr nodes:   100 eta : 0.0001 penalty l  0.01   mse is 1.6300822500015616e+24
 for nr nodes:   100 eta : 0.0001 penalty l  0.1   mse is 1.3394876491939755e+22




 for nr nodes:   100 eta : 0.0001 penalty l  1.0   mse is 45.27585195430644
 for nr nodes:   100 eta : 0.0001 penalty l  10   mse is inf
 for nr nodes:   100 eta : 0.001 penalty l  0.0001   mse is 3571035900249674.5
 for nr nodes:   100 eta : 0.001 penalty l  0.001   mse is 535022239114808.75
 for nr nodes:   100 eta : 0.001 penalty l  0.01   mse is 3819721019367405.5




 for nr nodes:   100 eta : 0.001 penalty l  0.1   mse is 45.22219506688872
 for nr nodes:   100 eta : 0.001 penalty l  1.0   mse is 45.202263966394256




 for nr nodes:   100 eta : 0.001 penalty l  10   mse is nan
 for nr nodes:   100 eta : 0.01 penalty l  0.0001   mse is 45.25759235074141
 for nr nodes:   100 eta : 0.01 penalty l  0.001   mse is 45.23443326395183




 for nr nodes:   100 eta : 0.01 penalty l  0.01   mse is 45.44326979934911
 for nr nodes:   100 eta : 0.01 penalty l  0.1   mse is 1.2287463169063014e+107
 for nr nodes:   100 eta : 0.01 penalty l  1.0   mse is 1.5907833589752722e+125




 for nr nodes:   100 eta : 0.01 penalty l  10   mse is nan
 for nr nodes:   100 eta : 0.1 penalty l  0.0001   mse is 46.15159583967677




 for nr nodes:   100 eta : 0.1 penalty l  0.001   mse is 49.07717801236209
 for nr nodes:   100 eta : 0.1 penalty l  0.01   mse is 45.20356509895192




 for nr nodes:   100 eta : 0.1 penalty l  0.1   mse is nan




 for nr nodes:   100 eta : 0.1 penalty l  1.0   mse is nan




 for nr nodes:   100 eta : 0.1 penalty l  10   mse is nan




 for nr nodes:   100 eta : 1.0 penalty l  0.0001   mse is 51.26544557400048
 for nr nodes:   100 eta : 1.0 penalty l  0.001   mse is 122.5583335522208
 for nr nodes:   100 eta : 1.0 penalty l  0.01   mse is 46.93557530255838




 for nr nodes:   100 eta : 1.0 penalty l  0.1   mse is nan




 for nr nodes:   100 eta : 1.0 penalty l  1.0   mse is nan




 for nr nodes:   100 eta : 1.0 penalty l  10   mse is nan
 for nr nodes:   500 eta : 0.0001 penalty l  0.0001   mse is 9.706788120335861e+19
 for nr nodes:   500 eta : 0.0001 penalty l  0.001   mse is 3.432057675342151e+34
 for nr nodes:   500 eta : 0.0001 penalty l  0.01   mse is 1.5786908387526802e+18
 for nr nodes:   500 eta : 0.0001 penalty l  0.1   mse is 4.355852608994095e+19




 for nr nodes:   500 eta : 0.0001 penalty l  1.0   mse is 45.27237866890797




 for nr nodes:   500 eta : 0.0001 penalty l  10   mse is nan
 for nr nodes:   500 eta : 0.001 penalty l  0.0001   mse is 574511600805647.8
 for nr nodes:   500 eta : 0.001 penalty l  0.001   mse is 1723876367372286.2
 for nr nodes:   500 eta : 0.001 penalty l  0.01   mse is 30766180772154.566




 for nr nodes:   500 eta : 0.001 penalty l  0.1   mse is 45.30491382560577




 for nr nodes:   500 eta : 0.001 penalty l  1.0   mse is nan




 for nr nodes:   500 eta : 0.001 penalty l  10   mse is nan
 for nr nodes:   500 eta : 0.01 penalty l  0.0001   mse is 45.2064831301744
 for nr nodes:   500 eta : 0.01 penalty l  0.001   mse is 45.39653672931652




 for nr nodes:   500 eta : 0.01 penalty l  0.01   mse is 46.3435071510835




 for nr nodes:   500 eta : 0.01 penalty l  0.1   mse is nan




 for nr nodes:   500 eta : 0.01 penalty l  1.0   mse is nan




 for nr nodes:   500 eta : 0.01 penalty l  10   mse is nan
 for nr nodes:   500 eta : 0.1 penalty l  0.0001   mse is 46.28310845271867




 for nr nodes:   500 eta : 0.1 penalty l  0.001   mse is 45.47867434581389




 for nr nodes:   500 eta : 0.1 penalty l  0.01   mse is nan




 for nr nodes:   500 eta : 0.1 penalty l  0.1   mse is nan


# ADAM + RELU

In [None]:
n_nodes = [10,30,50,100, 500]
eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]

for n in n_nodes:
    for e in eta:
        for l in lmd:
            mlp = MLPRegressor(hidden_layer_sizes=(n, ), 
                               activation = 'relu', # this is the sigmoid activation function
                               solver = "atam", 
                               alpha = l, 
                               batch_size =10, 
                               learning_rate_init=e)

            mlp.fit(X_train, y_train)
            y_pred = mlp.predict(X_test)
            #logistic activation uses the sigmoid function 
            mse = mean_squared_error(y_pred, y_test)
            print(" for nr nodes:   " + str(n) + " eta : " + str(e) + " penalty l  " + str(l) + "   mse is " + str(mse))