In [None]:
import os
import time
import warnings
import datetime
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from pandas.plotting import autocorrelation_plot
from numpy import newaxis
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sqlalchemy import create_engine

%matplotlib inline

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings
warnings.filterwarnings("ignore") #Hide messy Numpy warnings
sqlalchemy_conn_string = 'postgresql://test:test@localhost:5432/test' # connection string for your db

In [None]:
engine = create_engine(sqlalchemy_conn_string)

In [None]:
def load_data(sequence_length, normalise_window=True, train_fraction=0.9):
    # rewrite the query to pull the data you want to predict.
    # it should be an ordered series. 
    # If you have some dates that are zero, don't forget to use a date sequence to fix it.
    query = '''
        select count(*) as numbers
        from mytable
        group by created::date
        order by created::date;
    '''
    numbers = pd.read_sql_query(query, engine)
    data = numbers.as_matrix(columns=['numbers'])

    sequence_length = sequence_length + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    print('> N Samples = {}'.format(len(result)))
    
    if normalise_window:
        result, norms = normalise_windows(result)

    result = np.array(result)
    n_training_rows = round(train_fraction * result.shape[0])
    
    train = result[:int(n_training_rows), :]
    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    nrm_train = norms[:int(n_training_rows)]
    
    test = result[int(n_training_rows):, :]
    x_test = test[:, :-1]
    y_test = test[:, -1]
    nrm_test = norms[int(n_training_rows):]
    
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  
    
    print('> X Train shape= {}'.format(x_train.shape))
    print('> Y Train shape= {}'.format(y_train.shape))
    print('> X Test shape = {}'.format(x_test.shape))
    print('> Y Test shape = {}'.format(y_test.shape))

    return [x_train, y_train, x_test, y_test, nrm_test, nrm_train]

def normalise_windows(window_data):
    # scaled and shifted data.
    normalised_data = []
    # norms are used to put the data back to it's original range.
    norms = []
    for window in window_data:
        norms.append(float(window[0]))
        normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalised_data.append(normalised_window)
    return normalised_data, norms

def denormalise_windows(normalised_data, norms):
    # inverse operation for normalise_windows
    data = []
    for window, nrm in zip(normalised_data, norms):
        data_window = (window+1.0)*nrm 
        data.append(data_window)
    return data

In [None]:
def build_model(layers):
    model = Sequential()

    model.add(LSTM(
        input_shape=(layers[1], layers[0]),
        output_dim=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[2],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[3],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        output_dim=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="rmsprop")
    print("> Compilation Time : ", time.time() - start)
    return model

In [None]:
def predict_sequences_multiple(model, data, prediction_offset, prediction_length):
    # we'll insert the new prediction into the current frame at the end.
    end_of_sample_index = data[0].shape[0]-1
    prediction_seqs = []
    for i in range(int(len(data)/prediction_offset)):
        curr_frame = data[i*prediction_offset]
        predicted = []
        for j in range(prediction_length):
            predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
            curr_frame = curr_frame[1:]
            curr_frame = np.insert(curr_frame, [end_of_sample_index], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

def plot_results_multiple(predicted_data, true_data, y_norms, window_len):
    fig = plt.figure(facecolor='white', figsize=(16,8))
    ax = fig.add_subplot(111)
    # put it back into the original units
    ax.plot(denormalise_windows(true_data, y_norms), label='True Data')
    #Pad the list of predictions to shift it in the graph to it's correct start
    for i, data in enumerate(predicted_data):
        padding = [None for p in range(i * window_len)]
        # inline denormalization
        plt.plot(padding + [y_norms[i * window_len]*(x+1.0) for x in data], label='Prediction')
    plt.show()

In [None]:
global_start_time = time.time()
epochs  = 400

sequence_length = 28
prediction_length = 7
prediction_offset = 7

print('> Loading data... ')

X_train, y_train, X_test, y_test, nrm_test, nrm_train = load_data(sequence_length, normalise_window=True)

print('> Data Loaded. Compiling...')

model = build_model([1, sequence_length, int(sequence_length*2), int(sequence_length*4), 1])

model.fit(
    X_train,
    y_train,
    batch_size=25,
    nb_epoch=epochs,
    validation_split=0.05,
    verbose=0)

In [None]:
# plot the predictions on the test set
predictions = predict_sequences_multiple(model, X_test, prediction_offset, prediction_length)
print('> Training duration (s) : ', time.time() - global_start_time)
plot_results_multiple(predictions, y_test, nrm_test, prediction_offset)

In [None]:
# plot the predictions on the training set
predictions = predict_sequences_multiple(model, X_train, prediction_offset, prediction_length)
plot_results_multiple(predictions, y_train, nrm_train, prediction_offset)