# Part d: regression on a multilayer perceptron (neural network)

In [9]:
import numpy as np
import scipy.sparse as sp
np.random.seed(12)

import warnings
#Comment this to turn on warnings
warnings.filterwarnings('ignore')

from model_comparison import model_comparison
from resample import resample
import algorithms
import matplotlib.pyplot as plt
import seaborn as sns
import netCDF4 as n
from sklearn.linear_model import LinearRegression

from mpl_toolkits.axes_grid1 import make_axes_locatable
%matplotlib inline

from utils import train_test_split
from ann import NeuralNetMLP
#%matplotlib notebook

# Reading in the data

In [30]:
# reading test
path = "./files/"
filenames = ["specific_humidity_Europa_sp.nc", "relative_humidity_Europa_sp.nc", "pressure_Europa_sp.nc",  
             "temperature_Europa_sp.nc", "total_cloud_cover_Europa_sp.nc"]


cloud = n.Dataset(path + filenames[-1], "r")
relative = n.Dataset(path + filenames[1], "r")
specific = n.Dataset(path + filenames[0], "r")
pressure = n.Dataset(path + filenames[2], "r")
temperature = n.Dataset(path + filenames[3], "r")

#print(cloud.variables)
tcc = cloud.variables["tcc"][:].data

# Retriving ground values, these are available at six different pressure levels. 
rel = relative.variables["r"][:].data
#level = relative.variables["level"][:][0].data
spe = specific.variables["q"][:].data

surf_pre = pressure.variables["sp"][:].data
temp = temperature.variables["t2m"][:].data

In [33]:
rel[0][0][:][:].shape

(61, 77)

In [70]:
def logit_inv(x): # sigmoid?
    return np.exp(x)/(1+np.exp(x))

def logit(x):
    return np.log10((x + 1e-12)/(1+1e-12-x))

In [77]:
np.log10(0+1e-12)

-12.0

In [49]:
np.log10(1./0.000000000000000000000000000000000000000000001)

45.0

In [71]:
logit(tcc[0].flatten()).reshape((4697, 1))

array([[  4.5154168 ],
       [  4.5154168 ],
       [ 15.65355977],
       ...,
       [-11.69899892],
       [-11.69899892],
       [-11.69899892]])

In [72]:
# for one certain timestep 
t = 6
# tcc[t].flatten().reshape((4697, 1))
y = logit(tcc[t].flatten()).reshape((4697, 1))
X = np.array([rel[t][0][:][:].flatten(), spe[t][0][:][:].flatten(), surf_pre[t].flatten(), temp[t].flatten()]).transpose()

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, split_size=0.2)

In [74]:
X_train.shape, X_test.shape

((3758, 4), (939, 4))

In [75]:
y_train.shape, y_test.shape

((3758, 1), (939, 1))

# Two layer MLP (Neural network )

## Used to make table in report. 

In [76]:
# Experimental setup
metric = []

eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
#lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [1, 10, 50, 100]
batchsize = [1]


for epoch in epochs:
    for e in eta:
        for batch in batchsize:
            ann = NeuralNetMLP(n_hidden=30, 
                               epochs=epoch, 
                               eta=e, 
                               shuffle=True,
                               batch_size=batch, 
                               activation='sigmoid', 
                               tpe = "regression")

            ann.fit(X_train, y_train, X_test, y_test)
            ann.predict(X_test)
            # returns a list of the mean mse score for different epochs or batches
            metric.append(ann.eval_["valid_preform"])
            print("Sigmoid for nr of epochs "+str(epoch) + " and eta: " + str(e) + "  batchsize = " + str(batch) +"   performance is " + str(np.nanmean(ann.eval_["valid_preform"])))
        print("---------------------------")

Sigmoid for nr of epochs 1 and eta: 0.0001  batchsize = 1   performance is 71.84957274944483
---------------------------
Sigmoid for nr of epochs 1 and eta: 0.001  batchsize = 1   performance is 72.09490758139931
---------------------------
Sigmoid for nr of epochs 1 and eta: 0.01  batchsize = 1   performance is 94.7770890143402
---------------------------
Sigmoid for nr of epochs 1 and eta: 0.1  batchsize = 1   performance is 93.22126657108744
---------------------------
Sigmoid for nr of epochs 1 and eta: 1.0  batchsize = 1   performance is nan
---------------------------
Sigmoid for nr of epochs 10 and eta: 0.0001  batchsize = 1   performance is 71.90269623657578
---------------------------
Sigmoid for nr of epochs 10 and eta: 0.001  batchsize = 1   performance is 72.2658372708311
---------------------------
Sigmoid for nr of epochs 10 and eta: 0.01  batchsize = 1   performance is 76.62190087295724
---------------------------
Sigmoid for nr of epochs 10 and eta: 0.1  batchsize = 1  

# Performance with 10 neurons

In [20]:
# Experimental setup
metric = []

eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
#lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [1, 10, 50, 100]
batchsize = [1]

# Om vi skal bruke elu så må vi tweeke alpha i tilegg.


for epoch in epochs:
    for e in eta:
        for batch in batchsize:
            ann = NeuralNetMLP(n_hidden=10, 
                               epochs=epoch, 
                               eta=e, 
                               alpha = 0.0001,
                               shuffle=True,
                               batch_size=batch, 
                               activation='sigmoid', 
                               tpe = "regression")

            ann.fit(X_train, y_train, X_test, y_test)
            ann.predict(X_test)
            # returns a list of the mean mse score for different epochs or batches
            metric.append(ann.eval_["valid_preform"])
            print("ELU for nr of epochs "+str(epoch) + " and eta: " + str(e) + "  batchsize = " + str(batch) +"   performance is " + str(np.nanmean(ann.eval_["valid_preform"])))
        print("---------------------------")

ELU for nr of epochs 1 and eta: 0.0001  batchsize = 1   performance is 0.14517574693599355
---------------------------
ELU for nr of epochs 1 and eta: 0.001  batchsize = 1   performance is 0.13781533835655296
---------------------------
ELU for nr of epochs 1 and eta: 0.01  batchsize = 1   performance is 0.16857858768692663
---------------------------
ELU for nr of epochs 1 and eta: 0.1  batchsize = 1   performance is 0.22421847112002705
---------------------------
ELU for nr of epochs 1 and eta: 1.0  batchsize = 1   performance is nan
---------------------------
ELU for nr of epochs 10 and eta: 0.0001  batchsize = 1   performance is 0.13766677974848643
---------------------------
ELU for nr of epochs 10 and eta: 0.001  batchsize = 1   performance is 0.13793272898120018
---------------------------
ELU for nr of epochs 10 and eta: 0.01  batchsize = 1   performance is 0.14065829446965844
---------------------------
ELU for nr of epochs 10 and eta: 0.1  batchsize = 1   performance is 0.20

# Performance with 5 neurons

In [21]:
# Experimental setup
metric = []

eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
#lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [1, 10, 50, 100]
batchsize = [1]

# Om vi skal bruke elu så må vi tweeke alpha i tilegg.


for epoch in epochs:
    for e in eta:
        for batch in batchsize:
            ann = NeuralNetMLP(n_hidden=5, 
                               epochs=epoch, 
                               eta=e, 
                               alpha = 0.0001,
                               shuffle=True,
                               batch_size=batch, 
                               activation='sigmoid', 
                               tpe = "regression")

            ann.fit(X_train, y_train, X_test, y_test)
            ann.predict(X_test)
            # returns a list of the mean mse score for different epochs or batches
            metric.append(ann.eval_["valid_preform"])
            print("ELU for nr of epochs "+str(epoch) + " and eta: " + str(e) + "  batchsize = " + str(batch) +"   performance is " + str(np.nanmean(ann.eval_["valid_preform"])))
        print("---------------------------")

ELU for nr of epochs 1 and eta: 0.0001  batchsize = 1   performance is 0.19290120272586073
---------------------------
ELU for nr of epochs 1 and eta: 0.001  batchsize = 1   performance is 0.13747038903084544
---------------------------
ELU for nr of epochs 1 and eta: 0.01  batchsize = 1   performance is 0.13782926760583414
---------------------------
ELU for nr of epochs 1 and eta: 0.1  batchsize = 1   performance is 0.21863717437325492
---------------------------
ELU for nr of epochs 1 and eta: 1.0  batchsize = 1   performance is 2863.0176969732825
---------------------------
ELU for nr of epochs 10 and eta: 0.0001  batchsize = 1   performance is 0.1422118117514488
---------------------------
ELU for nr of epochs 10 and eta: 0.001  batchsize = 1   performance is 0.1377025627708945
---------------------------
ELU for nr of epochs 10 and eta: 0.01  batchsize = 1   performance is 0.1398838336441726
---------------------------
ELU for nr of epochs 10 and eta: 0.1  batchsize = 1   perform

In [None]:
len(test_m)

In [None]:
plt.figure(figsize=(10,6))
x = np.arange(len(test_m[0]))

for i in range(2):
    plt.plot(x, test_m[i], label = "testdata, eta = %.4f" %eta[i])
    plt.plot(x, train_m[i], label = "traindata, eta = %.4f" %eta[i])
    
plt.title(" Performance of MLPRegressor, sigmoid  ", fontsize = 20)
xmin, xmax, ymin, ymax = plt.axis([0,50,0,50])
plt.xlabel(" Epoch ", fontsize=15)
plt.ylabel("  MSE  ", fontsize=15)
plt.legend()
plt.savefig("./results/figures/MLPRegressor_sigmoid_MSE_50_epochs.png")

# Exploding gradients?

In [None]:
ann = NeuralNetMLP(batch_size=10,
                   epochs=50,
                   n_hidden=30,
                   eta = 0.1,
                   activation="sigmoid",
                   tpe = "regression")

ann.fit(X_train, y_train, X_valid, y_valid)
ann.predict(X_valid)

ann.eval_["valid_preform"], ann.eval_["train_preform"]

# Comparing to scikitlearn MLPregression

In [None]:
from sklearn.neural_network import MLPRegressor
from utils import mean_squared_error

In [None]:
mlp = MLPRegressor(hidden_layer_sizes=(30, ), 
                   activation = 'logistic', # this is the sigmoid activation function
                   solver = "sgd", 
                   alpha = 0.0001, 
                   batch_size =10, 
                   learning_rate_init=0.0001)

mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_valid)
#logistic activation uses the sigmoid function 
mean_squared_error(y_pred, y_valid)

In [None]:
mlp = MLPRegressor(hidden_layer_sizes=(30, ), 
                   activation = 'relu', # similar to both elu and lrelu but it is zero for negative values.
                   solver = "sgd", 
                   alpha = 0.0001, # out lmd
                   batch_size =10, 
                   learning_rate_init=0.0001)

mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_valid)

In [None]:
mean_squared_error(y_pred, y_valid)