In [1]:
# imports
import numpy             as np
import matplotlib.pyplot as plt
import pandas            as pd
import math
import keras
from scipy.stats  import norm
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras        import backend as K
from datetime     import datetime
from keras.optimizers      import RMSprop
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics       import mean_squared_error
from matplotlib.pylab      import rcParams
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
%matplotlib inline
rcParams['figure.figsize'] = 15, 6

# number of inputs to be fed
look_back    = 288
# number of outputs to be generated
look_forward = 24
# the number of stations
stations     = 66

Using TensorFlow backend.


In [2]:
# The interval between each dataset (original data in 5 minute intervals)
time_grouping = '5min'
# load the dataset
dataframe = pd.read_csv('ignored_assets/paxout_table.csv', engine='python', nrows=288*21)
dataframe['time_bucket'] = pd.to_datetime(dataframe['time_bucket'])
dataframe = dataframe.set_index('time_bucket')
# dataframe['total'] = dataframe.sum(axis=1)
dataframe['day_hour'] = dataframe.index.round(time_grouping)
dataframe = dataframe.groupby('day_hour').sum()
# removes the timestamp at column 67
dataset_orig = dataframe.values[:,:stations]
dataset_orig = dataset_orig.astype('float32')
# scale the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset_orig)

# convert an array of values into a dataset matrix, adjusted to make a dateset that is 66 wide
def create_dataset(dataset, look_back=1, look_forward=2):
    dataX, dataY = [], []
    np.array(dataY)
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back),:]
        # Makes sure that the array isn't the last 2 or 3 or whatever bits. It has to be the full 24
        if len(dataset[i + look_back:i+look_back+look_forward, 0]) == look_forward:
            dataX.append(a.T)
            dataY.append(dataset[i + look_back:i+look_back+look_forward, :].T)
    return np.array(dataX), np.array(dataY)

# split into train and test sets
train_size = 288*10
test_size = len(dataset) - train_size
test, train = dataset[0:test_size,:], dataset[test_size:len(dataset)-look_forward,:]

# reshape into X=[t, t-1, t-2,..., t-look_back] and Y=[t+1, t+2,... t+look_forward]
trainX, trainY = create_dataset(train, look_back, look_forward)

In [3]:
def top_heavy_loss (y_true, y_pred):
    w = np.arange(1,0, -1./24) ** 2
    w = (w / w.sum())[:,None]
    W = K.variable(value = w)
    return K.dot( K.abs(y_pred-y_true), W)

In [None]:

# creates a model but does not compile the model
def create_model():
    dropout_rate = 0.3
    layers            = 3
    neurons           = 128
    model = Sequential()
    model.add(Dropout(dropout_rate, input_shape=(stations, look_back)))
    lstm = LSTM(neurons, recurrent_dropout=dropout_rate, return_sequences=True)
    model.add(lstm)
    for i in range(layers):
        model.add(Dropout(dropout_rate))
        model.add(Dense(neurons))
    model.add(Dense(look_forward))
    model.compile(loss=top_heavy_loss, optimizer=RMSprop(lr=0.0001, rho=0.9, epsilon=1e-08, decay=0.0), metrics=['accuracy'])
    return model

# creates the model
model = KerasRegressor(build_fn=create_model, verbose=3)

# The Parameters to Search Through
epochs = [10, 20, 40]
batch = [10, 20, 40]

# Defines it as a dictionary
param_grid = dict(epochs = epoch,
                  batch_size = batch)

# Begins the Search For the Optimal Properties
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)

grid_result = grid.fit(trainX, trainY)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

K.clear_session()

Epoch 1/10
Epoch 1/10
Epoch 1/10
Epoch 1/20
Epoch 2/10
Epoch 2/10
Epoch 2/10
Epoch 2/20
Epoch 3/10
Epoch 3/10
Epoch 3/10
Epoch 3/20
Epoch 4/10
Epoch 4/10
Epoch 4/10
Epoch 4/20
Epoch 5/10
Epoch 5/10
Epoch 5/20
Epoch 5/10
Epoch 6/10
Epoch 6/10
Epoch 6/20
Epoch 6/10
Epoch 7/10
Epoch 7/10
Epoch 7/20
Epoch 7/10
Epoch 8/10
Epoch 8/10
Epoch 8/20
Epoch 8/10
Epoch 9/10
Epoch 9/10
Epoch 9/10
Epoch 9/20
Epoch 10/10
Epoch 10/10
Epoch 10/10
Epoch 10/20
Epoch 11/20
Epoch 1/20
Epoch 1/20
Epoch 1/40
Epoch 12/20
Epoch 2/20
Epoch 2/20
Epoch 2/40
Epoch 13/20
Epoch 3/20
Epoch 3/20
Epoch 3/40
Epoch 14/20
Epoch 4/20
Epoch 4/20
Epoch 4/40
Epoch 15/20
Epoch 5/20
Epoch 5/20
Epoch 5/40
Epoch 16/20
Epoch 6/20
Epoch 6/20
Epoch 6/40
Epoch 17/20
Epoch 7/20
Epoch 7/20
Epoch 7/40
Epoch 18/20
Epoch 8/20
Epoch 8/20
Epoch 8/40
Epoch 19/20
Epoch 9/20
Epoch 9/20
Epoch 9/40
Epoch 20/20
Epoch 10/20
Epoch 10/40
Epoch 10/20
Epoch 11/20
Epoch 11/20
Epoch 11/40
Epoch 1/40
Epoch 12/20
Epoch 12/40
Epoch 12/20
Epoch 2/40
Epoch 13/

In [18]:
def func(a=1, b=2, **kwargs):
    print a+b
args = dict(a=3, b=4, c=5)
func (**args)

7
