In [1]:
# imports
import numpy             as np
import matplotlib.pyplot as plt
import pandas            as pd
import math
import keras
from scipy.stats  import norm
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras        import backend as K
from datetime     import datetime
from keras.optimizers      import RMSprop
from sklearn.preprocessing import MinMaxScaler
from matplotlib.pylab      import rcParams
%matplotlib inline
rcParams['figure.figsize'] = 15, 6

# number of inputs to be fed
look_back    = 288
# number of outputs to be generated
look_forward = 24
# the number of stations
stations     = 66

Using TensorFlow backend.


# Prepares the Dataset For the Models

In [2]:
# The interval between each dataset (original data in 5 minute intervals)
time_grouping = '5min'
# load the dataset
dataframe = pd.read_csv('ignored_assets/paxout_table.csv', engine='python', nrows=288*21)
dataframe['time_bucket'] = pd.to_datetime(dataframe['time_bucket'])
dataframe = dataframe.set_index('time_bucket')
# dataframe['total'] = dataframe.sum(axis=1)
dataframe['day_hour'] = dataframe.index.round(time_grouping)
dataframe = dataframe.groupby('day_hour').sum()
# removes the timestamp at column 67
dataset_orig = dataframe.values[:,:stations]
dataset_orig = dataset_orig.astype('float32')
# scale the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset_orig)

# convert an array of values into a dataset matrix, adjusted to make a dateset that is 66 wide
def create_dataset(dataset, look_back=1, look_forward=2):
    dataX, dataY = [], []
    np.array(dataY)
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back),:]
        # Makes sure that the array isn't the last 2 or 3 or whatever bits. It has to be the full 24
        if len(dataset[i + look_back:i+look_back+look_forward, 0]) == look_forward:
            dataX.append(a.T)
            dataY.append(dataset[i + look_back:i+look_back+look_forward, :].T)
    return np.array(dataX), np.array(dataY)

# split into train and test sets
train_size = 288*14
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset)-look_forward,:]

# reshape into X=[t, t-1, t-2,..., t-look_back] and Y=[t+1, t+2,... t+look_forward]
trainX, trainY = create_dataset(train, look_back, look_forward)
trainX, trainY = create_dataset(test, look_back, look_forward)

# The Loss Functions

In [3]:
def top_heavy_loss (y_true, y_pred):
    w = np.arange(1,0, -1./24) ** 2
    w = (w / w.sum())[:,None]
    W = K.variable(value = w)
    return K.dot( K.abs(y_pred-y_true), W)

def top_heavy_tail_heavy (y_true, y_pred):
    # This is the normal weights that decrease
    # w = np.arange(1, 0, -1./24)**1.3
    # This is the Weights with one difference, The last is weighted as equally as the first
    w = np.append(np.arange(1,1./23, -1./24)**2.5, [1])
    w = w / w.sum()
    w = w[:,None]
    W = K.variable(value = w)
    return K.dot( K.abs(y_pred-y_true), W)


# The Custom Grid Search Function

In [6]:
'''
  My Custom Grid Search Function:
    * trainX       - The input data for the model
    * trainY       - The target data for the model
    * create_model - The function which returns a compiled model
    * param_grid   - The parameters for the function to grid search through
    * batch_size   - The default batch size if not otherwise specified directly or as a parameter to grid search
    * epochs       - The default number of epochs if not otherwise specified directly or as a parameter to grid search through
    * verbose      - The verbose level when training the individual prameters
    * seed         - The seed to be applied to each model 
'''
def custom_grid_search(trainX, trainY, create_model, param_grid, batch_size=25, epochs=10, verbose=3, seed=None):
    # creates an array of the keys
    keys = np.array(param_grid.keys())
    # feeds the keys, the full dictionary, and a fresh dictionary to the recursive function
    # to get an array of dictionaries of every possible combination of parameters
    indiv_params = combine_parameters (keys, param_grid, dict())
    # An array which will keep track of the accuracy of each iteration of the parameters
    results = np.zeros(indiv_params.size)
    # Runs each of the possible combinations
    for i in range(indiv_params.size):
        # prevents changing the original set of parameters
        indiv_param = indiv_params[i].copy()
        # Saves batch_size and epochs for the fitting of the model, not the creation/compilation of
        if 'batch_size' in keys:
            batch_size = indiv_param.pop('batch_size', None)
        if 'epochs' in keys:
            epochs = indiv_param.pop('epochs', None)
        # if a seed has been specified, use it
        if seed != None:
            np.random.seed(seed)
        # Creates the model and trains it
        model = create_model(**indiv_param)
        model.fit(trainX, trainY, epochs=epochs, batch_size=batch_size, verbose=verbose)
        # Stores this iteration's results
        results[i] = np.mean(model.evaluate(trainX, trainY, verbose=3))
        # Clears the backend session in an attempt to save memory with each model
        K.clear_session()
    # finds the index of the result with the lowest loss value
    best_i = np.argwhere(results == min(results) )
    
    best_results = results[best_i].reshape(1)[0]
    best_params  = indiv_params[best_i].reshape(1)[0]
    # Returns all the results and their respective parameters in order
    return dict(results=results,
                parameters=indiv_params,
                best_results = best_results,
                best_parameters = best_params)

'''
  Given a Dictionary, the keys yet to be looped through, and a specific instance of the dictionary 
  it returns an array of dictionaries showing all unique combinations of the values
  
  e.g. given a dictionary (a=[0, 1], b=[0, 1]), it will return the following array
  array([
    dict(a=0, b=0),
    dict(a=0, b=1),
    dict(a=1, b=0),
    dict(a=1, b-1)
  ])
  
  The Structure is as follows:

    The function is given an array of keys ['a', 'b'] and a dictionary (a=[0,1], b=[0, 1])

    It loops through the first key, in this case a ∈ {0, 1} and removes the first key from the array,
    its new value being ['b']

    It then loops through all the values of the first key, creating a variable called specific_dict, 
    which is a dictionary with the given value of a (as well as any previously defined keys in specific_dict)
      (a=0), and (a=1)

    It then calls itself, providing the updated list of keys yet to be iterated through, the full dictionary,
    and the specific_dict each for loop

    Once the list of keys yet to be iterated through is of size 1, it ceases the recursion
  
'''
def combine_parameters(keys, full_dict, specific_dict):
    result = np.array([])
    # If the size of the keys array is 1, that means this is the final key to be looped through, cease recursion
    if keys.size == 1:
        for i in range(len(full_dict[keys[0]])):
            specific_dict = specific_dict.copy()
            # the result will be the specific dict followed by every value in this 
            specific_dict[keys[0]] = full_dict[keys[0]][i]
            result = np.append(result, specific_dict)
        return result
    else:
        # keeps the first key
        current_key = keys[0]
        # removes the first key from the list of keys that havent been looped through yet
        keys = keys[1:]
        # for every value of the current_key
        for i in range(len(full_dict[current_key])):
            specific_dict = specific_dict.copy()
            specific_dict[current_key] = full_dict[current_key][i]
            result = np.append(result, combine_parameters(keys, full_dict, specific_dict))
    return result

In [8]:

# creates a model but does not compile the model
def create_model(dropout_rate = 0.3):
    layers            = 4
    neurons           = 128
    model = Sequential()
    model.add(Dropout(dropout_rate, input_shape=(stations, look_back)))
    lstm = LSTM(256, recurrent_dropout=dropout_rate, return_sequences=True)
    model.add(lstm)
    for i in range(layers):
        model.add(Dropout(dropout_rate))
        model.add(Dense(neurons))
    model.add(Dense(look_forward))
    model.compile(loss=top_heavy_loss, optimizer=RMSprop(lr=0.0001, rho=0.9, epsilon=1e-08, decay=0.0), metrics=[top_heavy_loss])
    return model

# The Parameters to Search Through
dropout_rate = [0.1, 0.2, 0.3, 0.4]

# Defines it as a dictionary
param_grid = dict(dropout_rate=dropout_rate)

# Begins the Search For the Optimal Properties
grid_result = custom_grid_search(trainX, trainY, create_model, epochs=40, batch_size=10, param_grid=param_grid)

# clears 
K.clear_session()

# summarize results
print("Best: %f using %s" % (grid_result['best_results'], str(grid_result['best_parameters'])))

loss = grid_result['results']
params = grid_result['parameters']

for l, p in zip(loss, params):
    print l, p

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 2

In [24]:

loss = grid_result['results']
params = grid_result['parameters']

for l, p in zip(loss, params):
    print l, p

0.0686208955824 {'epochs': 5, 'batch_size': 20}
0.0745190880896 {'epochs': 5, 'batch_size': 40}
0.0822066692752 {'epochs': 5, 'batch_size': 60}
0.0817830799339 {'epochs': 5, 'batch_size': 80}
0.066416942638 {'epochs': 10, 'batch_size': 20}
0.0739703796702 {'epochs': 10, 'batch_size': 40}
0.0696016757825 {'epochs': 10, 'batch_size': 60}
0.0752343996508 {'epochs': 10, 'batch_size': 80}
0.0682121706503 {'epochs': 20, 'batch_size': 20}
0.0690306955101 {'epochs': 20, 'batch_size': 40}
0.0732031173911 {'epochs': 20, 'batch_size': 60}
0.0684854398054 {'epochs': 20, 'batch_size': 80}
0.0652724351692 {'epochs': 30, 'batch_size': 20}
0.067623995967 {'epochs': 30, 'batch_size': 40}
0.0665243551986 {'epochs': 30, 'batch_size': 60}
0.0748547133832 {'epochs': 30, 'batch_size': 80}
0.0652830774526 {'epochs': 40, 'batch_size': 20}
0.0654454992653 {'epochs': 40, 'batch_size': 40}
0.0658704056574 {'epochs': 40, 'batch_size': 60}
0.0800149064402 {'epochs': 40, 'batch_size': 80}


In [7]:
K.clear_session()

In [None]:
'''

Best: 0.064020 using {'epochs': 40, 'batch_size': 20}
0.0669075761485 {'epochs': 10, 'batch_size': 20}
0.0718928324607 {'epochs': 10, 'batch_size': 40}
0.0704617485391 {'epochs': 10, 'batch_size': 60}
0.0653233264552 {'epochs': 20, 'batch_size': 20}
0.06774414633   {'epochs': 20, 'batch_size': 40}
0.0692400951415 {'epochs': 20, 'batch_size': 60}
0.0648723018114 {'epochs': 30, 'batch_size': 20}
0.0720279630803 {'epochs': 30, 'batch_size': 40}
0.0669507312808 {'epochs': 30, 'batch_size': 60}
0.0640200174334 {'epochs': 40, 'batch_size': 20}
0.0727290603898 {'epochs': 40, 'batch_size': 40}
0.0679111944213 {'epochs': 40, 'batch_size': 60}


'''