### Exploration of LSTMs


In [1]:
from __future__ import print_function
import re
import urllib.request
import zipfile
import lxml.etree
import itertools
import numpy as np
import tensorflow as tf
import time
import pickle
import os
import random
import sys
import h5py
import pickle
import pandas as pd
from utils import *

from keras.models import Sequential, load_model, model_from_json
from keras.layers import Dense, Activation, Dropout, LSTM, GRU
from keras.optimizers import RMSprop, Adam
from keras.utils.data_utils import get_file
from keras.utils import np_utils
from keras.layers.wrappers import TimeDistributed
from keras.callbacks import LearningRateScheduler
%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


In [29]:
time_percentage = 0.8
explained_variance = 0.9
df = pd.read_csv("data/rescuetime_data-ac-min.csv")
data_pd = Clean_DF(df)
data_pd.clean_data(time_percentage=time_percentage)
data_pd.clean_df = data_pd.clean_df.reset_index()
data_pd.get_pca(explained_variance=explained_variance)
data_pd.get_day_time()

In [30]:
# Saving the objects:
with open('data_pd_80.pickle', 'wb') as f:  # Python 3: open(..., 'wb')
    pickle.dump(data_pd, f)

# # Getting back the objects:
with open('data_pd_80.pickle', 'rb') as f:  # Python 3: open(..., 'rb')
    data_pd = pickle.load(f)


In [31]:
print("Dataset size:", data_pd.clean_df.shape,'\n')
print("Number of apps that consume", time_percentage*100, "% of all users time: ",len(data_pd.popular_apps), '\n')
print("Cleaned dataset columns:",'\n', data_pd.clean_df.columns.values, '\n')
print("Number of components that explain", explained_variance*100,"% of the data: ",data_pd.pca_data.shape[1], '\n')

Dataset size: (16704, 9) 

Number of apps that consume 80.0 % of all users time:  41 

Cleaned dataset columns: 
 ['Date' 'Time Spent (seconds)' 'Activity' 'Category' 'Productivity'
 'Activity Vector' 'Productivity Score' 'Day' 'Time'] 

Number of components that explain 90.0 % of the data:  18 



In [99]:
FLAGS = tf.flags
FLAGS.look_back = 24
FLAGS.batch_size = 16
FLAGS.inputlength = data_pd.activity_vector.shape[1]
np.random.seed(7)
dataset = data_pd.activity_vector

In [100]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        dataX.append(a)
        dataY.append(dataset[i + look_back, :])
    return np.array(dataX), np.array(dataY)

In [101]:
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
print(len(train), len(test))

11191 5513


In [102]:
trainX, trainY = create_dataset(train, FLAGS.look_back)
testX, testY = create_dataset(test, FLAGS.look_back)

In [103]:
trainX.shape

(11166, 24, 41)

## Build model

In [104]:
# RNN parameters
N_HIDDEN = 64
LEARNING_RATE = 0.01
EPOCHS = 100

In [110]:
print('Building training model...')
model = Sequential()
model.add(GRU(N_HIDDEN, dropout_U=0.2, dropout_W=0.2, input_shape=(FLAGS.look_back, FLAGS.inputlength)))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense(FLAGS.inputlength))  # Add another dense layer with the desired output size.
model.add(Activation('softmax'))
model.compile(loss='mean_squared_error', optimizer = RMSprop(lr=0.002, clipnorm=5))

print(model.summary()) # Convenient function to see details about the network model.

Building training model...
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
gru_12 (GRU)                     (None, 64)            20352       gru_input_12[0][0]               
____________________________________________________________________________________________________
dense_21 (Dense)                 (None, 64)            4160        gru_12[0][0]                     
____________________________________________________________________________________________________
activation_19 (Activation)       (None, 64)            0           dense_21[0][0]                   
____________________________________________________________________________________________________
dense_22 (Dense)                 (None, 41)            2665        activation_19[0][0]              
________________________________________________________________

## Build inference model
Note: the inference model will have only one time step as we will feed each predicted character back into the rnn as a seed for predicting the next character. It will also be stateful so as to 'remember' previous states.

In [111]:
model.fit(x=trainX, y=trainY, nb_epoch=20, batch_size=FLAGS.batch_size, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20

KeyboardInterrupt: 

In [84]:
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [86]:
trainPredict[0]

array([  1.65477803e-03,   1.51121395e-03,   1.12472328e-04,
         8.75886995e-03,   1.69630497e-04,   1.50397781e-03,
         1.27916879e-04,   1.01695303e-04,   3.54351662e-03,
         9.76161682e-04,   7.64763355e-01,   4.19076860e-05,
         6.75960502e-04,   1.52758486e-03,   9.45378910e-04,
         2.96587765e-04,   5.96035784e-03,   3.15765705e-04,
         8.00360576e-05,   3.89985391e-03,   1.69718692e-06,
         8.39300337e-05,   4.60189302e-04,   7.23003119e-04,
         3.88497929e-03,   3.75331321e-04,   1.16563439e-02,
         3.08648916e-03,   5.67283621e-03,   1.88760750e-03,
         4.09416147e-02,   1.17878400e-01,   2.59796926e-03,
         1.69030356e-03,   2.31537339e-03,   3.19881318e-03,
         1.38332311e-03,   1.03186190e-04,   1.42653915e-03,
         2.26945779e-03,   1.39531121e-03], dtype=float32)

In [88]:
trainX[1]

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.78333333,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.21666667,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ]])