In [1]:
#Imports
import pandas as pd
import numpy as np
import os
from sklearn import preprocessing
import sklearn.model_selection as ms
import sklearn.metrics as metrics

import sys
sys.path.append('../lib')
from helpers import *
import os

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


In [21]:
dataPath = "../data/canela.csv"
input_dim = 24
output_dim = 12
timesteps = 10
train_perc = 0.8
batch_size = 168
epochs = 10
preprocess = "minmax"
stateful = False

In [10]:
def create_lstm(input_dim,output_dim,timesteps, nodes,loss='mean_squared_error',optimizer='adam',activation="tanh",recurrent_activation='hard_sigmoid', batch_size = 168,stateful=False):
    model = Sequential()
    model.add(LSTM(nodes, input_shape=(timesteps,input_dim),activation=activation, recurrent_activation=recurrent_activation,stateful=True, batch_size=batch_size))
    model.add(Dense(output_dim))
    model.compile(loss=loss, optimizer=optimizer)
    return model

In [11]:
print("Reading Data")
#Data split parameters
data = pd.Series.from_csv(dataPath)

print("Preparing data")
X,y = create_data_cube(data, input_dim=input_dim,timesteps=timesteps)


trainlen1 = int(train_perc*len(X))
trainlen = int(train_perc*trainlen1)
trainlen = trainlen - (trainlen%batch_size)
vallen = trainlen1 - trainlen
vallen = vallen - (vallen%batch_size)

X_train,X_test = X[:trainlen], X[trainlen:trainlen+vallen]
y_train,y_test = y[:trainlen], y[trainlen:trainlen+vallen]
y_train_orig  = y_train

Reading Data
Preparing data


In [12]:
print("Preprocessing Data")

if preprocess == "minmax":
    minmax_in = preprocessing.MinMaxScaler(feature_range=(0,1))
    minmax_out = preprocessing.MinMaxScaler(feature_range=(0,1))

    minmax_in.fit(X_train[:,0,:])
    minmax_out.fit(y_train)

    preproc_in = minmax_in
    preproc_out = minmax_out

else:
    standarization_in = preprocessing.StandardScaler()
    standarization_out = preprocessing.StandardScaler()

    standarization_in.fit(X_train[:,0,:])
    standarization_out.fit(y_train)

    preproc_in = standarization_in
    preproc_out = standarization_out

for i in range(timesteps):
    X_train[:,i,:] = preproc_in.transform(X_train[:,i,:]) if preproc_in else X_train
    X_test[:,i,:] = preproc_in.transform(X_test[:,i,:]) if preproc_in else X_test
y_train = preproc_out.transform(y_train) if preproc_out else y_train



Preprocessing Data


In [13]:
print("Creating Param List")
lsm_nodes = [32]
loss = ["mean_squared_error"]
activation = ["relu", "tanh", "sigmoid","linear"]
recurrent_activation = ["hard_sigmoid","sigmoid","relu"]
param_grid = {"nodes":lsm_nodes,"loss":loss, "input_dim":[input_dim], "output_dim": [12], "timesteps":[timesteps],
              "activation":activation, "recurrent_activation":recurrent_activation, "batch_size":[batch_size]}
params = ms.ParameterGrid(param_grid)

Creating Param List


In [14]:
print("Evaluating Models")

scores = []
for param in params:
    print(param)
    np.random.seed(42)
    model = create_lstm(**param)

    if stateful:
        for i in range(epochs):
            print("Epoch ",i)
            model.reset_states()
            model.fit(X_train, y_train,shuffle=False,verbose=False, epochs=1, batch_size=batch_size)
    else:
        model.fit(X_train, y_train,shuffle=False,verbose=False, epochs=epochs, batch_size=batch_size)
    try:
        y_approx = preproc_out.inverse_transform(model.predict(X_test,batch_size=batch_size))

        score = metrics.mean_squared_error(y_test,y_approx)

        print("Score Validation: ",score)
        param["score"] = score
        scores.append(param)
    except:
        print("Error")
        continue
scores = pd.DataFrame(scores)

Evaluating Models
{'activation': 'relu', 'batch_size': 168, 'input_dim': 24, 'loss': 'mean_squared_error', 'nodes': 32, 'output_dim': 12, 'recurrent_activation': 'hard_sigmoid', 'timesteps': 10}
Score Validation:  6.39059254723
{'activation': 'relu', 'batch_size': 168, 'input_dim': 24, 'loss': 'mean_squared_error', 'nodes': 32, 'output_dim': 12, 'recurrent_activation': 'sigmoid', 'timesteps': 10}
Score Validation:  6.39596446486
{'activation': 'relu', 'batch_size': 168, 'input_dim': 24, 'loss': 'mean_squared_error', 'nodes': 32, 'output_dim': 12, 'recurrent_activation': 'relu', 'timesteps': 10}
Error
{'activation': 'tanh', 'batch_size': 168, 'input_dim': 24, 'loss': 'mean_squared_error', 'nodes': 32, 'output_dim': 12, 'recurrent_activation': 'hard_sigmoid', 'timesteps': 10}
Score Validation:  6.28224213211
{'activation': 'tanh', 'batch_size': 168, 'input_dim': 24, 'loss': 'mean_squared_error', 'nodes': 32, 'output_dim': 12, 'recurrent_activation': 'sigmoid', 'timesteps': 10}
Score Vali

NameError: name 'filename' is not defined

In [18]:
filename = os.path.basename(dataPath).replace(".csv","")

In [22]:
scores.to_csv("{}_scores_lstm_{}lags_{}outs_{}batch.csv".format(filename,input_dim, output_dim,batch_size))