# Predicting cloud cover using neural networks

In [21]:
import numpy as np
import scipy.sparse as sp
np.random.seed(12)

import warnings
#Comment this to turn on warnings
warnings.filterwarnings('ignore')

from model_comparison import model_comparison
from resample import resample
import algorithms
import matplotlib.pyplot as plt
import seaborn as sns
import netCDF4 as n
from sklearn.linear_model import LinearRegression
#from mpl_toolkits.axes_grid1 import make_axes_locatable

from utils import train_test_split
%matplotlib inline
#%matplotlib notebook

from deepNN import NeuralNetRegressor

from sklearn.neural_network import MLPRegressor
from utils import mean_squared_error

In [22]:
# reading test
path = "./files/"
filenames = ["specific_humidity_Europa_sp.nc", "relative_humidity_Europa_sp.nc", "pressure_Europa_sp.nc",  
             "temperature_Europa_sp.nc", "total_cloud_cover_Europa_sp.nc"]


cloud = n.Dataset(path + filenames[-1], "r")
relative = n.Dataset(path + filenames[1], "r")
specific = n.Dataset(path + filenames[0], "r")
pressure = n.Dataset(path + filenames[2], "r")
temperature = n.Dataset(path + filenames[3], "r")

In [23]:
#print(cloud.variables)
tcc = cloud.variables["tcc"][:][:][:].data

# Retriving ground values, these are available at six different pressure levels. 
rel = relative.variables["r"][:][:][:][:].data
#level = relative.variables["level"][:][0].data
spe = specific.variables["q"][:][:][:][:].data

surf_pre = pressure.variables["sp"][:][:][:].data
temp = temperature.variables["t2m"][:][:][:].data

In [24]:
def logit_inv(x): # sigmoid?
    return np.exp(x)/(1+np.exp(x))

def logit(x):
    return np.log((x + 1e-12)/(1+1e-12 - x))

In [25]:
# for one certain timestep 

n_days = 0.25

TCC = []
REL = []
SPE = []
PRE = []
TEMP = []


for t in range(int(n_days*4)):
    TCC.append(tcc[t].flatten())
    REL.append(rel[t][0].flatten())
    SPE.append(spe[t][0].flatten())
    PRE.append(surf_pre[t].flatten())
    TEMP.append(temp[t].flatten())


In [26]:
y =(np.array(TCC).flatten())
temp = y[y<1]
y[y>1] = temp.max()
X = np.array([np.array(REL).flatten(), np.array(SPE).flatten(), np.array(PRE).flatten(), np.array(TEMP).flatten()])
y = logit(np.array(TCC).flatten())

In [27]:
np.array(TCC).min(), np.array(TCC).max()

(9.99866855977416e-13, 1.0000000000009999)

In [28]:
y.min(), y.max()

(-26.93794050959591, 36.04365338911916)

In [8]:
np.shape(X), np.shape(y)

((4, 4697), (4697,))

In [9]:
y = y.reshape((len(y),1))
X = X.T

In [10]:
np.shape(X), np.shape(y)

((4697, 4), (4697, 1))

In [11]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, split_size = 0.2)
import sklearn.model_selection as s
X_train, X_test, y_train, y_test = s.train_test_split(X,y,test_size=0.2)

In [12]:
np.shape(X_train), np.shape(y_train), np.shape(X_test), np.shape(y_test)

((3757, 4), (3757, 1), (940, 4), (940, 1))

In [13]:
y_train.max(), y_train.min()

(36.04365338911916, -26.93794050959591)

In [14]:
logit(0), logit(1)

(-27.63102111592955, 27.63093221929863)

In [15]:
n_nodes = [10,30,50,100, 500]
eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [10,50,100]
batch_s = [1,10,50]


for e in epochs:
    for b in batch_s:
        for et in eta:
            for n in n_nodes:
                model = NeuralNetRegressor(n_hidden = [30, n, 20],  
                                           epochs=e, 
                                           eta=et, 
                                           shuffle=True, 
                                           batch_size=b,
                                           seed=None, 
                                           alpha=0.0001, 
                                           activation='sigmoid')

                p = model.fit(X_train, y_train, X_test, y_test)
                l = model.predict(X_test)
                print( " for epochs :" + str(e) + " for bactsize : " + str(b) + " learningrat e : " + str(et) + "noden in middle layer n: " + str(n) + " traininperformance ois " + str(p.eval_['train_preform']) + "validation performance is "+ str(p.eval_['valid_preform']))
                
                
                

 for epochs :10 for bactsize : 1 learningrat e : 0.0001noden in middle layer n: 10 traininperformance ois [233.22948743034272]validation performance is [222.4940143207615]
 for epochs :10 for bactsize : 1 learningrat e : 0.0001noden in middle layer n: 30 traininperformance ois [232.732010869807]validation performance is [221.33154647789004]
 for epochs :10 for bactsize : 1 learningrat e : 0.0001noden in middle layer n: 50 traininperformance ois [232.6610075451657]validation performance is [221.68941506347142]
 for epochs :10 for bactsize : 1 learningrat e : 0.0001noden in middle layer n: 100 traininperformance ois [232.52082629528945]validation performance is [221.3239737728288]
 for epochs :10 for bactsize : 1 learningrat e : 0.0001noden in middle layer n: 500 traininperformance ois [232.58158399053363]validation performance is [221.54633097206732]
 for epochs :10 for bactsize : 1 learningrat e : 0.001noden in middle layer n: 10 traininperformance ois [234.8339240383847]validation per

 for epochs :10 for bactsize : 50 learningrat e : 0.0001noden in middle layer n: 100 traininperformance ois [232.71949352986886]validation performance is [221.7837256718339]
 for epochs :10 for bactsize : 50 learningrat e : 0.0001noden in middle layer n: 500 traininperformance ois [233.83935003705787]validation performance is [223.25934246764598]
 for epochs :10 for bactsize : 50 learningrat e : 0.001noden in middle layer n: 10 traininperformance ois [232.7539188622462]validation performance is [221.83682545480866]
 for epochs :10 for bactsize : 50 learningrat e : 0.001noden in middle layer n: 30 traininperformance ois [236.7431440781679]validation performance is [224.53341059540926]
 for epochs :10 for bactsize : 50 learningrat e : 0.001noden in middle layer n: 50 traininperformance ois [235.03871009638235]validation performance is [223.06676701672717]
 for epochs :10 for bactsize : 50 learningrat e : 0.001noden in middle layer n: 100 traininperformance ois [236.15234344771417]validat

 for epochs :50 for bactsize : 10 learningrat e : 0.001noden in middle layer n: 10 traininperformance ois [237.28287874473517]validation performance is [227.2280702425372]
 for epochs :50 for bactsize : 10 learningrat e : 0.001noden in middle layer n: 30 traininperformance ois [233.80015070970865]validation performance is [223.21141395409416]
 for epochs :50 for bactsize : 10 learningrat e : 0.001noden in middle layer n: 50 traininperformance ois [232.60760888202753]validation performance is [221.28978876478368]
 for epochs :50 for bactsize : 10 learningrat e : 0.001noden in middle layer n: 100 traininperformance ois [233.9108794727925]validation performance is [223.3464734970596]
 for epochs :50 for bactsize : 10 learningrat e : 0.001noden in middle layer n: 500 traininperformance ois [233.61902493119157]validation performance is [221.9206758927666]
 for epochs :50 for bactsize : 10 learningrat e : 0.01noden in middle layer n: 10 traininperformance ois [nan]validation performance is [

 for epochs :100 for bactsize : 1 learningrat e : 0.001noden in middle layer n: 100 traininperformance ois [232.56004895340817]validation performance is [221.50112263508169]
 for epochs :100 for bactsize : 1 learningrat e : 0.001noden in middle layer n: 500 traininperformance ois [233.83561658816805]validation performance is [222.0871790151195]
 for epochs :100 for bactsize : 1 learningrat e : 0.01noden in middle layer n: 10 traininperformance ois [632.6054844941967]validation performance is [611.2724344035613]
 for epochs :100 for bactsize : 1 learningrat e : 0.01noden in middle layer n: 30 traininperformance ois [245.0854709024247]validation performance is [235.7231044871467]
 for epochs :100 for bactsize : 1 learningrat e : 0.01noden in middle layer n: 50 traininperformance ois [394.6114820588026]validation performance is [376.97453395133573]
 for epochs :100 for bactsize : 1 learningrat e : 0.01noden in middle layer n: 100 traininperformance ois [245.1956104827229]validation perfor

 for epochs :100 for bactsize : 50 learningrat e : 0.01noden in middle layer n: 10 traininperformance ois [nan]validation performance is [nan]
 for epochs :100 for bactsize : 50 learningrat e : 0.01noden in middle layer n: 30 traininperformance ois [nan]validation performance is [nan]
 for epochs :100 for bactsize : 50 learningrat e : 0.01noden in middle layer n: 50 traininperformance ois [nan]validation performance is [nan]
 for epochs :100 for bactsize : 50 learningrat e : 0.01noden in middle layer n: 100 traininperformance ois [nan]validation performance is [nan]
 for epochs :100 for bactsize : 50 learningrat e : 0.01noden in middle layer n: 500 traininperformance ois [nan]validation performance is [nan]
 for epochs :100 for bactsize : 50 learningrat e : 0.1noden in middle layer n: 10 traininperformance ois [nan]validation performance is [nan]
 for epochs :100 for bactsize : 50 learningrat e : 0.1noden in middle layer n: 30 traininperformance ois [nan]validation performance is [nan]

In [16]:
p.eval_['train_preform']

[nan]

In [17]:
p.eval_['valid_preform']

[nan]

In [18]:
y_train = y_train.ravel()

In [19]:
y_train

array([  6.02530017, -26.93794051, -26.93794051, ..., -26.93794051,
        -0.72208271,   3.77878821])

# Scikit MLP Regressor using several layers of depth 

In [20]:
n_nodes = [10,30,50,100, 500]
eta = [0.0001, 0.001, 0.01, 0.1, 1.0]
lmd = [0.0001, 0.001, 0.01, 0.1, 1.0, 10]
epochs = [10,50,100]
batch_s = [1,10,50]



#for e in epochs:
for b in batch_s:
    for et in eta:
        for l in lmd:
            mlp = MLPRegressor(hidden_layer_sizes=(100,50), 
                               activation = 'logistic', # this is the sigmoid activation function
                               solver = "adam", 
                               alpha = l, # penalty
                               batch_size =b, 
                               learning_rate_init=et)

            mlp.fit(X_train, y_train)
            y_pred = mlp.predict(X_test)
            #logistic activation uses the sigmoid function 
            mse = mean_squared_error(y_pred, y_test)
            print(" eta : " + str(et) +   " lmd :   "   + str(l) + "    batch size : " +  str(b)   + "   mse is " + str(mse))

 eta : 0.0001 lmd :   0.0001    batch size : 1   mse is 221.28791670137886
 eta : 0.0001 lmd :   0.001    batch size : 1   mse is 221.2936133380667
 eta : 0.0001 lmd :   0.01    batch size : 1   mse is 221.3189646416359
 eta : 0.0001 lmd :   0.1    batch size : 1   mse is 221.35573206317684
 eta : 0.0001 lmd :   1.0    batch size : 1   mse is 221.30547151686272
 eta : 0.0001 lmd :   10    batch size : 1   mse is 221.34754495060636
 eta : 0.001 lmd :   0.0001    batch size : 1   mse is 221.54619772970133
 eta : 0.001 lmd :   0.001    batch size : 1   mse is 221.31741787209108
 eta : 0.001 lmd :   0.01    batch size : 1   mse is 221.33462334729575
 eta : 0.001 lmd :   0.1    batch size : 1   mse is 221.28790047169582
 eta : 0.001 lmd :   1.0    batch size : 1   mse is 221.29611414083394
 eta : 0.001 lmd :   10    batch size : 1   mse is 221.52063614689658
 eta : 0.01 lmd :   0.0001    batch size : 1   mse is 222.0242270396313
 eta : 0.01 lmd :   0.001    batch size : 1   mse is 221.37883

# Adding regularization may result in a beytte preformance despite of the network architecture 