# Predicting cloud cover using neural networks

In [11]:
import numpy as np
import scipy.sparse as sp
np.random.seed(12)

import warnings
#Comment this to turn on warnings
warnings.filterwarnings('ignore')

from model_comparison import model_comparison
from resample import resample
import algorithms
import matplotlib.pyplot as plt
import seaborn as sns
import netCDF4 as n
from sklearn.linear_model import LinearRegression
#from mpl_toolkits.axes_grid1 import make_axes_locatable

from utils import train_test_split
%matplotlib inline
#%matplotlib notebook

from deepNN import NeuralNetRegressor

from sklearn.neural_network import MLPRegressor
from utils import mean_squared_error

In [12]:
# reading test
path = "./files/"
filenames = ["specific_humidity_Europa_sp.nc", "relative_humidity_Europa_sp.nc", "pressure_Europa_sp.nc",  
             "temperature_Europa_sp.nc", "total_cloud_cover_Europa_sp.nc"]


cloud = n.Dataset(path + filenames[-1], "r")
relative = n.Dataset(path + filenames[1], "r")
specific = n.Dataset(path + filenames[0], "r")
pressure = n.Dataset(path + filenames[2], "r")
temperature = n.Dataset(path + filenames[3], "r")

In [13]:
#print(cloud.variables)
tcc = cloud.variables["tcc"][:][:][:].data

# Retriving ground values, these are available at six different pressure levels. 
rel = relative.variables["r"][:][:][:][:].data
#level = relative.variables["level"][:][0].data
spe = specific.variables["q"][:][:][:][:].data

surf_pre = pressure.variables["sp"][:][:][:].data
temp = temperature.variables["t2m"][:][:][:].data

In [14]:
def logit_inv(x): # sigmoid?
    return np.exp(x)/(1+np.exp(x))

def logit(x):
    return np.log((x + 1e-12)/(1+1e-12 - x))

In [31]:
# r2_score(y_true, y_pred)
def A_R2(y_true, y_pred, n, p):
    return 1 - (1 - r2_score( y_true, y_pred))*((n-1)/(n-p-1))

def NRMSE(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))/np.mean(y_true)

In [16]:
# for one certain timestep 

n_days = 0.25

TCC = []
REL = []
SPE = []
PRE = []
TEMP = []


for t in range(int(n_days*4)):
    TCC.append(tcc[t].flatten())
    REL.append(rel[t][0].flatten())
    SPE.append(spe[t][0].flatten())
    PRE.append(surf_pre[t].flatten())
    TEMP.append(temp[t].flatten())


In [17]:
y =(np.array(TCC).flatten())
temp = y[y<1]
y[y>1] = temp.max()
X = np.array([np.array(REL).flatten(), np.array(SPE).flatten(), np.array(PRE).flatten(), np.array(TEMP).flatten()])
y = logit(np.array(TCC).flatten())

In [18]:
np.array(TCC).min(), np.array(TCC).max()

(9.99866855977416e-13, 1.0000000000009999)

In [19]:
y.min(), y.max()

(-26.93794050959591, 36.04365338911916)

In [20]:
np.shape(X), np.shape(y)

((4, 4697), (4697,))

In [21]:
y = y.reshape((len(y),1))
X = X.T

In [22]:
np.shape(X), np.shape(y)

((4697, 4), (4697, 1))

In [23]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, split_size = 0.2)
import sklearn.model_selection as s
X_train, X_test, y_train, y_test = s.train_test_split(X,y,test_size = 0.2)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
fit = scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [24]:
np.shape(X_train), np.shape(y_train), np.shape(X_test), np.shape(y_test)

((3757, 4), (3757, 1), (940, 4), (940, 1))

In [25]:
y_train.max(), y_train.min()

(36.04365338911916, -26.93794050959591)

In [26]:
logit(0), logit(1)

(-27.63102111592955, 27.63093221929863)

In [27]:
n_nodes = [10,30,50,100, 500]
eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [10,50,100]
batch_s = [1,10,50]

"""
for e in epochs:
    for b in batch_s:
        for et in eta:
            for n in n_nodes:
                model = NeuralNetRegressor(n_hidden = [30, n, 20],  
                                           epochs=e, 
                                           eta=et, 
                                           shuffle=True, 
                                           batch_size=b,
                                           seed=None, 
                                           alpha=0.0001, 
                                           activation='sigmoid')

                p = model.fit(X_train, y_train, X_test, y_test)
                l = model.predict(X_test)
                print(" ")
                print( " for epochs :" + str(e) + " for bactsize : " + str(b) + " learningrat e : " + str(et) + "noden in middle layer n: " + str(n) + " traininperformance ois " + str(p.eval_['train_preform']) + "validation performance is "+ str(p.eval_['valid_preform']))
                print(" ")
                
"""                

'\nfor e in epochs:\n    for b in batch_s:\n        for et in eta:\n            for n in n_nodes:\n                model = NeuralNetRegressor(n_hidden = [30, n, 20],  \n                                           epochs=e, \n                                           eta=et, \n                                           shuffle=True, \n                                           batch_size=b,\n                                           seed=None, \n                                           alpha=0.0001, \n                                           activation=\'sigmoid\')\n\n                p = model.fit(X_train, y_train, X_test, y_test)\n                l = model.predict(X_test)\n                print(" ")\n                print( " for epochs :" + str(e) + " for bactsize : " + str(b) + " learningrat e : " + str(et) + "noden in middle layer n: " + str(n) + " traininperformance ois " + str(p.eval_[\'train_preform\']) + "validation performance is "+ str(p.eval_[\'valid_preform\']))\n       

In [28]:
#p.eval_['train_preform']

In [19]:
# p.eval_['valid_preform']

In [20]:
y_train = y_train.ravel()

In [21]:
y_train

array([  6.02530017, -26.93794051, -26.93794051, ..., -26.93794051,
        -0.72208271,   3.77878821])

# Scikit MLP Regressor using several layers of depth 

In [35]:
n_nodes = [10,30,50,100, 500]
eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [10,50,100]
batch_s = [1,10,50]

n, p = X_train.shape

#for e in epochs:

"""for b in batch_s:
    for et in eta:
        for l in lmd:"""

mlp = MLPRegressor(hidden_layer_sizes=(30,), 
                   activation = 'logistic', # this is the sigmoid activation function
                   solver = "adam", 
                   alpha =0.01, # penalty
                   batch_size = 10, 
                   learning_rate_init = 0.001)

mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
#logistic activation uses the sigmoid function 
mse = NRMSE(y_pred, y_test, n, p)
print(" eta : " + str(et) +   " lmd :   "   + str(l) + "    batch size : " +  str(b)   + "   mse is " + str(mse))

TypeError: NRMSE() takes 2 positional arguments but 4 were given

# Adding regularization may result in a better preformance despite of the network architecture ..? Doen't apper to be so, but check it out.