In [1]:
import time
import numpy as np
from numpy import newaxis
from keras.layers.core import Dense, Activation, Dropout
from keras.layers import CuDNNLSTM,BatchNormalization
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from matplotlib import pyplot
from matplotlib import pyplot as plt
import pickle
import pandas as pd

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def loss_plot(history):
    pyplot.plot(history.history['loss'], label='train')
    pyplot.plot(history.history['val_loss'], label='test')
    pyplot.legend()
    pyplot.show();
def plot_results_multiple(predicted_data, true_data,x_test_date, prediction_len):
    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)
    ax.plot(true_data, label='True Data')
    #Pad the list of predictions to shift it in the graph to it's correct start
    for i, data in enumerate(predicted_data):
        padding = [None for p in range(i * prediction_len)]
        plt.plot(padding + data, label='Prediction')
        plt.legend()
    plt.show();    

In [None]:
def load_data(seq_len):
   
   # data=pickle.load(open('processed_data.p', "rb" ))
    data_list=['Barclays-Noida-3.2-Jan', 'Barclays-Noida-3.2-Feb', 'Barclays-Noida-3.2-March','Barclays-Noida-3.2-April']

    for month in range(len(data_list)):
        month_data=pickle.load(open('../../processed_data/'+data_list[month]+'.p', "rb" ))
        if month==0:
            data=month_data.copy()
        else:
            data=pd.concat([data,month_data])
    data=data.drop(['Country','Department','Resource Type'],axis=1)

    data=data.groupby([data.floor,data.seatName,pd.Grouper(key='Datetime',freq='H')])
    data=data['Space'].agg([np.sum])
    data.reset_index(inplace=True)
    data['isOccupied']=[0 if x <3 else 1 for x in data['sum']]
    data=data.drop(['sum'],axis=1)
    floor_data=data.groupby([data.floor,pd.Grouper(key='Datetime',freq='H')])['isOccupied'].agg([np.sum])
    floor_data.reset_index(inplace=True)
    one_floor=floor_data.loc[floor_data.floor=='Level 9']
    one_floor.set_index('Datetime',inplace=True)
    one_floor.index.freq='H'
    trial_data=np.array(one_floor['sum'])

    
    #Add rows to columns based on the sequence length
    sequence_length = seq_len + 1
    result = []
    for index in range(len(trial_data) - sequence_length):
        result.append(trial_data[index: index + sequence_length])
    
    # shape is (row)4453, (sequence length) 11
    result = np.array(result)
    
    #Split data
    print('len size',result.shape[0])
    row = round(0.90* result.shape[0])
    train = result[:int(row), :]
    
    #np.random.shuffle(train)
    # X train keeps all the rows but removes last column, col x row(:-1)
    x_train = train[:, :-1]
    # y train keeps the cols with ONLY the values of the missing row (-1)
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]
    print(x_train.shape,y_train.shape,x_test.shape,y_test.shape,'before reshape')
    #reshape X train and X_test based on the required LSTM shape(samples,Timestep,Features)
        # Samples is the number of rows (can be considered batch size)
        # Timestep is the number of values per row (can be considered values in the batch)
        # features is the number of variables input
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  
    print(x_train.shape,y_train.shape,x_test.shape,y_test.shape,'after reshape')
    x_test_date = one_floor.tail(len(x_test)).index
    
    return [x_train, y_train, x_test, y_test,x_test_date]

def build_model(layers):
    model = Sequential()
    #input shape follows the (sequence length,time)
    #input shape (Sequence length(50),num of features(1)), neurons=(seq len)50
    model.add(LSTM( input_shape=(layers[1], layers[0]), units=layers[1]))
    model.add(Dense(layers[3]))
    model.add(Activation("sigmoid"))

    start = time.time()
    model.compile(loss="mean_squared_error", optimizer="adam",metrics=["accuracy"])
    print("> Compilation Time : ", time.time() - start)
    return model

def predict_sequences_multiple(model, data, window_size, prediction_len):
    #Predict sequence of 50 steps before shifting prediction run forward by 50 steps
    prediction_seqs = []
    # loop through built prediction batches (test data/pred size)
    for i in range(int(len(data)/prediction_len)):
        #current batch/sequence = test[prediction batch number*prediction length]
        curr_frame = data[i*prediction_len]
        #sample shape = (10,1)
        predicted = []
        # loop through prediction length to build multiple predictions
        for j in range(prediction_len):
            #predict current batch
            # cur_frame[newaxis,:,:] sample shape = (1,10,1)
            predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
            #example result =  [0.011287009]
            #move current frame row by one forward
            curr_frame = curr_frame[1:]
            #insert previous prediction to current frame to predict next sequence
            curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
        prediction_seqs.append(predicted)
    return prediction_seqs

In [None]:
def make_prediction():
        global_start_time = time.time()
        epochs  = 50
        seq_len = 10
        #lstm_dim=50
        lstm_dim2=100
        batch_size=50
        #pred_len=10
        pred_len=seq_len
        print('> Loading data... ')

        X_train, y_train, X_test, y_test,x_test_date = load_data(seq_len)
        print('> Data Loaded. Compiling...')
        
        model = build_model([1, seq_len, lstm_dim2, 1])

        history=model.fit( X_train,y_train,batch_size=batch_size,shuffle=False,epochs=epochs,validation_split=0.20)

        predictions = predict_sequences_multiple(model, X_test, seq_len, pred_len)      
        print('Training duration (s) : ', time.time() - global_start_time)
        show_plot=True
        if show_plot==True:
            loss_plot(history)
            plot_results_multiple(predictions, y_test,x_test_date, seq_len)

In [None]:
make_prediction()