In [84]:
__author__ = '@Tssp'

''' RNN Bidireccional en Keras '''

import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras.layers.convolutional import Conv1D, MaxPooling1D
import pandas as pd  
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats.stats import pearsonr
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import matplotlib.dates as mdates

In [85]:
# Load 222Rn:
mdnRnA = np.loadtxt('../mdnRnA.txt', delimiter=',')
startday = pd.datetime(2013, 7, 1)
dates = pd.date_range(startday, periods=len(mdnRnA), freq='W')

In [86]:
def loadallDF(list_cities):
    output = {}
    for city in list_cities:
        output[city + '_arima'] = pd.read_csv('../AEMET/Data/Daily/{}/{}_arima_weekly.csv'.format(city, city),
                                         usecols=['fecha', 'tmed', 'presmed', 'velmedia'])
        output[city + '_arima']['mdnRnA'] = mdnRnA
        output[city + '_arima']['dates'] = pd.to_datetime(output[city + '_arima']['fecha'])
    return output

In [87]:
list_cities = ['BCN', 'NVR', 'HSC', 'ZGZ']
weekly = loadallDF(list_cities)

In [88]:
weekly['BCN_arima'].head()

Unnamed: 0,fecha,tmed,velmedia,presmed,mdnRnA,dates
0,2013-07-07,25.8,2.8,1020.6,90.0,2013-07-07
1,2013-07-14,25.642857,3.914286,1016.357143,79.0,2013-07-14
2,2013-07-21,26.028571,4.285714,1017.578571,99.0,2013-07-21
3,2013-07-28,27.214286,4.9,1013.457143,117.0,2013-07-28
4,2013-08-04,25.785714,4.528571,1018.435714,99.0,2013-08-04


In [105]:
BCN_arima = weekly['BCN_arima']
PMP_arima = weekly['NVR_arima']
HSC_arima = weekly['HSC_arima']
ZGZ_arima = weekly['ZGZ_arima']
DF_list = [BCN_arima, PMP_arima, ZGZ_arima, HSC_arima]
arr_str = ['BCN', 'PMP', 'ZGZ', 'HSC']

In [106]:
def scaleallDF(DF_arr, arr_str):
    field = ['tmed', 'velmedia', 'presmed', 'mdnRnA']
    output = {}
    for i in range(len(DF_arr)):
        scaled = MinMaxScaler().fit(DF_arr[i][field].values).transform(DF_arr[i][field].values)
        output[arr_str[i]] = pd.DataFrame(scaled, columns=field)
        output[arr_str[i]]['dates'] = DF_arr[i]['dates']
    return output

In [109]:
weekly_scaled = scaleallDF(DF_list, arr_str)
weekly_scaled['BCN'].head()

Unnamed: 0,tmed,velmedia,presmed,mdnRnA,dates
0,0.824465,0.066667,0.641113,0.573333,2013-07-07
1,0.817737,0.259259,0.50559,0.426667,2013-07-14
2,0.834251,0.323457,0.544604,0.693333,2013-07-21
3,0.885015,0.42963,0.412959,0.933333,2013-07-28
4,0.823853,0.365432,0.571983,0.693333,2013-08-04


In [110]:
BCN_scaled = weekly_scaled['BCN']
PMP_scaled = weekly_scaled['PMP']
HSC_scaled = weekly_scaled['HSC']
ZGZ_scaled = weekly_scaled['ZGZ']
DFscaled_list = [BCN_scaled, PMP_scaled, ZGZ_scaled, HSC_scaled]

In [111]:
def data_toCNN_format(DF_list, arr_str, fields, sample_size):
    output = {}
    for i in range(len(DF_list)):
        weekly_to3d = DF_list[i][fields].values
        output[arr_str[i]] = np.array([weekly_to3d[start:start+sample_size] for start in range(0, weekly_to3d.shape[0]-sample_size)])
    return output

In [112]:
sample_size = 4
Xt = data_toCNN_format(DFscaled_list, arr_str, ['tmed', 'mdnRnA'], sample_size)

In [113]:
Y = mdnRnA[sample_size:]

# Only Temperature

In [114]:
Xt_BCN = Xt['BCN']
Xt_PMP = Xt['PMP']
Xt_HSC = Xt['HSC']
Xt_ZGZ = Xt['ZGZ']

In [115]:
def train_test_split(X):
    X_train, X_test = X[:-test_size], X[-test_size:]
    return X_train, X_test

In [116]:
test_size = int(0.3*len(mdnRnA))
Xtrain_BCN, Xtest_BCN = train_test_split(Xt_BCN)
Xtrain_PMP, Xtest_PMP = train_test_split(Xt_PMP)
Xtrain_HSC, Xtest_HSC = train_test_split(Xt_HSC)
Xtrain_ZGZ, Xtest_ZGZ = train_test_split(Xt_ZGZ)
Ytrain, Ytest = Y[:-test_size], Y[-test_size:]

In [117]:
print("X_train.shape = ", Xtrain_BCN.shape, "\nY_train.shape = ", Ytrain.shape)

X_train.shape =  (218, 4, 2) 
Y_train.shape =  (218,)


In [131]:
def NN(neurons, nep, X_train, Y_train, X_test, Y_test, sample_size, v=0, btch_size=10):
    model = Sequential()
    model.add(Conv1D(filters=neurons[0], kernel_size=3, activation='relu', input_shape=(sample_size, 2)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(neurons[0], activation='relu'))
    model.add(Dense(neurons[1], activation='relu'))
    model.add(Dense(1))
    model.compile(loss="mse", optimizer="adam", metrics=["acc"])
    history = model.fit(X_train, Y_train, epochs=nep, batch_size=btch_size, verbose=v, validation_data=(X_test, Y_test))
    pred = model.predict(X_test)
    acc_train = np.average(history.history["acc"])
    acc_test = np.average(history.history["val_acc"])
    #print("Train Accuracy: ", acc_train, "\nTest Accuracy:  ", acc_test)
    return history, pred, acc_train, acc_test

In [132]:
history, pred, acc_train, acc_test = NN([64,32], 35, Xtrain_BCN, Ytrain, Xtest_BCN, Ytest, sample_size)

In [135]:
def show_errors(Xtrainlist, Y_train, Xtest_list, Y_test, arr_str, it, sample_size):
    ECM = []
    EAM = []
    for i in range(len(Xtrainlist)):
        print('\n\n#########\n', arr_str[i], '\n########\n\n')
        for it in range(it):
            print('Iteration ', it)
            history, pred, acc_train, acc_test = NN([64,32], 35, Xtrainlist[i], Y_train,
                                                    Xtest_list[i], Y_test)
            ECM.append(mean_squared_error(Y_test, pred))
            EAM.append(mean_absolute_error(Y_test, pred))
        print(':ECM: %.4f' % (np.mean(ECM)))
        print(':EAM: %.4f' % (np.mean(EAM)))

In [134]:
Xtrainlist = [Xtrain_BCN, Xtrain_PMP, Xtrain_HSC, Xtrain_ZGZ]
Xtestlist = [Xtest_BCN, Xtest_PMP, Xtest_HSC, Xtest_ZGZ]

In [136]:
show_errors(Xtrainlist, Ytrain, Xtestlist, Ytest, arr_str, 1, 4)



#########
 BCN 
########


Iteration  0


TypeError: NN() missing 1 required positional argument: 'sample_size'