<h1 align="center" style="background-color:#616161;color:white">RNN Model</h1>

Adapted from: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/recurrent_network.py

<h3 style="background-color:#616161;color:white">0. Setup</h3>

<div style="background-color:white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Input Parameters</div>

In [6]:
# Root path
#root = "C:/DS/Github/MusicRecommendation"  # BA, Windows
root = "/home/badrul/git/EventPrediction" # BA, Linux

<div style="background-color:white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Common Libraries</div>

In [7]:
# Core
import numpy as np
import pandas as pd
from IPython.core.debugger import Tracer    # Used for debugging
import logging
from random import *

# File and database management
import csv
import os
import sys
import json
import sqlite3
from pathlib import Path

# Date/Time
import datetime
import time
#from datetime import timedelta # Deprecated

# Visualization
import matplotlib.pyplot as plt             # Quick
%matplotlib inline

# Misc
import random
import importlib
import warnings
warnings.filterwarnings('ignore')
import logging
logging.basicConfig(filename='RNN.log',level=logging.DEBUG)

#-------------- Custom Libs -----------------#
os.chdir(root)

# Import the codebase module
fPath = root + "/1_codemodule"
if fPath not in sys.path: sys.path.append(fPath)

# Custom Libs
import coreCode as cc
import lastfmCode as fm

<div style="background-color:white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Page Specific Libraries</div>

In [3]:
# Data science (comment out if not needed)
#from sklearn.manifold import TSNE
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.python.framework import ops
ops.reset_default_graph()
from sklearn import metrics
from sklearn import preprocessing

<div style="background-color:#white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Load settings</div>

In [4]:
settingsDict =  cc.loadSettings()
dbPath = root + settingsDict['mainDbPath_xsml']
fmSimilarDbPath = root + settingsDict['fmSimilarDbPath']
fmTagsDbPath = root + settingsDict['fmTagsDbPath']
trackMetaDbPath = root + settingsDict['trackmetadata']
periodGranularity = int(settingsDict['periodGranularity'])

<div style="background-color:#white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Set parameters</div>

In [13]:
# Model setup
loadFromSave = False
n_steps = 336 # timesteps
n_hidden = 160 # hidden layer num of features
n_classes = 2
batch_size = 336*4
learning_rate = 0.001
cellType = "BasicLSTMCell"  # Choose: TimeFreqLSTMCell BasicLSTMCell

#fieldList="UserID, t, HrsFrom5pm, isSun,isMon,isTue,isWed,isThu,isFri,isSat, t1,t2,t3,t4,t5,t10,t12hrs,t23_5hrs,t24hrs,t24_5hrs,t1wk,t2wks,t3wks,t4wks"
fieldList="UserID, t, HrsFrom5pm, isSun,isMon,isTue,isWed,isThu,isFri,isSat, t10,t12hrs,t24hrs,t1wk,t2wks,t3wks,t4wks"

# Training parameters
training_iterations = 1
sample_iteration = 1
display_step = 5
userSample =1
timeStepSkip =5000

tblName='tblTimeSeriesData'  # 'tblName='tblTimeSeriesDataDummy'
trainModel = 1

# Dummy test
dummyTest = True
if dummyTest: 
    tblName='tblTimeSeriesDataDummy'  # 'tblName='tblTimeSeriesDataDummy'
    fieldList="UserID, t, HrsFrom5pm, isSun,isMon,isTue,isWed,isThu,isFri,isSat,t1wk,t2wks,t3wks,t4wks"
    trainModel = 1  # Which RNN model to use
    n_steps = 5 # has to be 1 for train model 2
    batch_size = 5  # Num of 'time steps' in model 2
    training_iterations = 10

<h3 style="background-color:#616161;color:white">1. Build Model</h3>

In [14]:
def RNN(x, weights, biases,n_steps):
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
    
    # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    if trainModel == 1:
        x = tf.unstack(x, n_steps, 1)  # See https://stackoverflow.com/questions/45278276/tensorflow-lstm-dropout-implementation-shape-problems/45279243#45279243
    elif trainModel == 2:
        x = tf.unstack(x, batch_size, 0)
    
    # Define a lstm cell with tensorflow
    #lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    if cellType == "BasicLSTMCell":
        lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
        outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    elif cellType == "TimeFreqLSTMCell":
        lstm_cell =rnn.TimeFreqLSTMCell(n_hidden, use_peepholes=True, feature_size= 22, forget_bias=1.0)
        outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    elif cellType == "GridLSTMCell":
        lstm_cell =rnn.GridLSTMCell(n_hidden, forget_bias=1.0)
        outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)        
    else:
        print("Did not recognize {}".format(cellType))
    # Get lstm cell output
    

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']

def buildGraph(n_steps,n_input):
    global x, y, pred, cost, optimizer,accuracy
    
    tf.reset_default_graph()
    # tf Graph input
    
    x = tf.placeholder("float", [None, n_steps, n_input])
    y = tf.placeholder("float", [None, n_classes])

    # Define weights
    weights = {
        'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
    }
    biases = {
        'out': tf.Variable(tf.random_normal([n_classes]))
    }

    pred = RNN(x, weights, biases,n_steps)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    
    
    # Evaluate model
    correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [15]:
n_input = len(fieldList.split(","))-2 # -2 as we drop UserID and t

# Build graph
buildGraph(n_steps,n_input = n_input)
# Initializing the variables
sess = tf.Session()
init = tf.global_variables_initializer()
saver = tf.train.Saver()
if loadFromSave:
    saver.restore(sess,'./3_Data/saves/model.ckpt')
else:
    sess.run(init)

<h3 style="background-color:#616161;color:white">2. Train Model</h3>

In [16]:
# Launch the graph
def trainModel2(_X, _Y, sess,training_iterations = 5):
    # Training cycle
    l=np.shape(_X)[0]
    predictions=np.zeros([l,n_classes])
    idx=0
    
    for i in range(training_iterations):
        if (training_iterations % 10) == 0: print("Now on iteration {}".format(i))
        #logging.info("Now on iteration {}".format(i))
        # Loop over all rows in order of earliest to latest
        for pos in range(0+batch_size, l,3):
            if (pos % 1000) == 0: 
                print("Now on pos {} of {} ({}%)".format(pos,l,round((pos/l)*100,2)))
                logging.info("Now on pos {} of {} ({}%)".format(pos,l,round((pos/l)*100,2)))
            
            # For each row, collect the previous batch_size num of rows
            batch_x = _X[pos-batch_size:pos].reshape((batch_size, n_steps, n_input)) 
            batch_y = _Y[pos].reshape((-1, n_classes)) 
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            
            # Test at the same time
            p = sess.run(pred, feed_dict={x: batch_x, y: batch_y})
            predictions[idx]=p=p.reshape(-1,n_classes)[-1]
            
    
        # Calculate loss & accuracy
        acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
        loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
        
        predictions = np.argmax(predictions,1).reshape(-1,1)
        labels=np.argmax(_Y,1).reshape(-1,1)
        
        print ("Iter {}. Minibatch Loss={:.6f}".format(i, loss) + ", Training Accuracy= " + "{:.5f}".format(acc))
        Tracer()()
        print(metrics.classification_report(labels,predictions))  # Need to feed it yTest not yTest_OneHot here

In [30]:
# Launch the graph
def trainModel1(_X, _Y, sess,_iterations = 5, numOfPeriods = 10, trainPeriods = None):
    # Num of periodss = batch size
    
    # Training cycle
    totalRows=np.shape(_X)[0]
    XCols=np.shape(_X)[1]
    YCols=np.shape(_Y)[1]
    depth = n_steps
    
    # If no trainPeriods were provided generate your own
    if trainPeriods is None:
        # Select periods where we will always get enough history to go with it
        trainPeriods = random.sample(range(batch_size+depth, totalRows), numOfPeriods)
    else:
        trainPeriods = trainPeriods + batch_size+depth-1
        numOfPeriods = len(trainPeriods)
    
    
    for i in range(_iterations):
        if (_iterations % 1) == 0: print("Now on iteration {}".format(i))
        #logging.info("Now on iteration {}".format(i))
        
        # Pre-Initialize batch arrays
        batch_x=np.zeros([numOfPeriods,depth,XCols])
        batch_y=np.zeros([numOfPeriods,YCols])
        
        batch_row =0
        for idx1 in trainPeriods:            
            # Each period will have one batch
            # Logging
            if (idx1 % 1) == 0: 
                timeNow =str(datetime.datetime.now())
                #print("{} Now training on Period {} ({}%)".format(timeNow,idx1,round((batch_row/numOfPeriods)*100,2)))
                logging.info("{} Now training on Period {} ({}%)".format(timeNow, idx1,round((batch_row/numOfPeriods)*100,2)))

            batch_x[batch_row] = _X[idx1-depth:idx1].reshape(1,depth,XCols)
            batch_y[batch_row] = _Y[idx1]
            batch_row +=1
            
         # Train
        batch_y = batch_y.reshape((-1, YCols))
        
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        
        # Calculate loss & accuracy
        acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
        loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
        print ("Iter {}. Minibatch Loss={:.6f}".format(i, loss) + ", Training Accuracy= " + "{:.5f}".format(acc))
        
        predictions = 1*sess.run(pred, feed_dict={x: batch_x, y: batch_y})
        predictions = np.argmax(predictions,1).reshape(-1,1)
        batch_y = np.argmax(batch_y,1).reshape(-1,1)
        print(metrics.classification_report(batch_y,predictions))  # Need to feed it yTest not yTest_OneHot here
        TestHiddenPeriods

In [31]:
## Begin training
numOfPeriods =20

for s in range(sample_iteration):
    timeNow =str(datetime.datetime.now())
    print('{} Now processing sample {}'.format(timeNow,s))
    logging.info('{} Now processing sample {}'.format(timeNow, s))
    
    if dummyTest:
        users = pd.DataFrame(data={'userID': [3]})
    else:
        users=cc.getUsers(dbPath).sample(userSample)
        
    for usr in users.itertuples():
        timeNow =str(datetime.datetime.now())
        print('{} Now processing User {}'.format(timeNow, usr.userID))
        logging.info('{} Now processing User {}'.format(timeNow, usr.userID))
        xTrain, yTrain_onehot, xTest, yTest_onehot = cc.getHiddenPeriodsData(dbPath,tblName,fieldList,oneHot=True,periodGranularity=periodGranularity,userIDs=[usr.userID])
        
        if xTrain is not None:
            if np.shape(yTrain_onehot)[1] !=1:  # Results have to have both 0's and 1's in them
                if trainModel ==1:
                    #trainModel1(xTrain, yTrain_onehot, sess,training_iterations, numOfPeriods=numOfPeriods)
                elif trainModel ==2:
                    trainModel2(xTrain, yTrain_onehot, sess,training_iterations)
        saver.save(sess,"./3_Data/saves/model.ckpt")

TestHiddenPeriods()       
print('Ok')

2017-08-08 00:50:49.210706 Now processing sample 0
2017-08-08 00:50:49.213495 Now processing User 3
Now on iteration 0
Iter 0. Minibatch Loss=1.768659, Training Accuracy= 0.30000
             precision    recall  f1-score   support

          0       0.33      0.40      0.36         5
          1       0.25      0.20      0.22         5

avg / total       0.29      0.30      0.29        10

Now on iteration 1
Iter 1. Minibatch Loss=1.577910, Training Accuracy= 0.30000
             precision    recall  f1-score   support

          0       0.33      0.40      0.36         5
          1       0.25      0.20      0.22         5

avg / total       0.29      0.30      0.29        10

Now on iteration 2
Iter 2. Minibatch Loss=1.491086, Training Accuracy= 0.50000
             precision    recall  f1-score   support

          0       0.50      0.40      0.44         5
          1       0.50      0.60      0.55         5

avg / total       0.50      0.50      0.49        10

Now on iteration 3

<h3 style="background-color:blue;color:white"></h3>

<h3 style="background-color:#616161;color:white">3. Test Model</h3>

In [21]:
def TestPredictions2(_X,_Y,sess):
    
    l=np.shape(_X)[0]
    labels=np.zeros([l,n_classes]) # Calculate the number of lead Y's we will end up with
    predictions=np.zeros([l,n_classes])
    
    # Testing cycle
    print("Now testing {} rows".format(l))
    logging.info("Now testing {} rows".format(l))
    
    # Pad rows at the beginning so we can get a prediction for every entry
    padX=np.zeros([batch_size-1,_X.shape[1]])
    _Y = _Y.reshape(-1,n_classes)
    padY=np.zeros([batch_size-1,n_classes])
    
    _X = np.append(padX, _X, axis=0)
    _Y = np.append(padY, _Y, axis=0)
    l=np.shape(_X)[0]  # Update length
    
    # Pre allocate arrays and regain sanity
    idx =0
    for pos in range(0+batch_size, l):
        
        if (pos % 10000) == 0: 
            print("Now on pos {} of {} ({}%)".format(pos,l,round((pos/l)*100,2)))
            #logging.info("Now on pos {} of {} ({}%)".format(pos,l,round((pos/l)*100,2)))

        # For each row, collect the previous batch_size num of rows
        batch_x = _X[pos-batch_size:pos]
        batch_y = _Y[pos]
        #if np.mod(len(batch_x),batch_size) == 0:batch_x, batch_y, _ = cc.padRows(batch_x, batch_y, batch_size)
        batch_x = batch_x.reshape((batch_size, n_steps, n_input))  # Rehsape into 3d, even though n_steps is 1            
        batch_y = batch_y.reshape((-1, n_classes))  # Rehsape into 3d, even though n_steps is 1            

        # Predict!
        p = 1*sess.run(pred, feed_dict={x: batch_x})
        p=p.reshape(-1,n_classes)
        
        #print(_Y[pos], batch_y)
        predictions[idx] = p[-1]
        labels[idx] = batch_y[-1]
        idx+=1
    
    
    # Remove padding and return predictions
    predictions = np.argmax(predictions,1)
    predictions = predictions.reshape(-1,1)
    labels = np.argmax(labels,1)
    labels = labels.reshape(-1,1)
    
    return predictions, labels

In [25]:
# Launch the graph
def TestPredictions1(_X, _Y, sess, numOfPeriods = 10, testPeriods = None):
    # Training cycle
    totalRows=np.shape(_X)[0]
    XCols=np.shape(_X)[1]
    YCols=np.shape(_Y)[1]
    depth = n_steps
    
    # If no test periods were provided generate your own
    if testPeriods is None:
        # Select periods where we will always get enough history to go with it
        testPeriods = random.sample(range(batch_size+depth, totalRows), numOfPeriods)
    else:
        testPeriods = testPeriods + batch_size+depth-1
        numOfPeriods = len(testPeriods)
     
    # Pre-Initialize batch arrays
    batch_x=np.zeros([numOfPeriods,depth,XCols])
    batch_y=np.zeros([numOfPeriods,YCols])

    batch_row =0
    for idx1 in testPeriods:            
        if (idx1 % 1) == 0: 
            timeNow =str(datetime.datetime.now())
            #print("{} Now testing on period {} ({}%)".format(timeNow,idx1,round((batch_row/numOfPeriods)*100,2)))
            logging.info("{} Now testing period {} ({}%)".format(timeNow, idx1,round((batch_row/numOfPeriods)*100,2)))

        batch_x[batch_row] = _X[idx1-depth:idx1].reshape(1,depth,XCols)
        batch_y[batch_row] = _Y[idx1]
        batch_row +=1

    # Predict for this period
    predictions = 1*sess.run(pred, feed_dict={x: batch_x, y: batch_y})
    predictions = np.argmax(predictions,1).reshape(-1,1)
    batch_y = np.argmax(batch_y,1).reshape(-1,1)
    return predictions, batch_y

<div style="background-color:#white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Test hidden periods</div>

In [26]:
def TestHiddenPeriods(hiddenTestPeriods=50):

    print('{} Hidden Periods\n'.format(hiddenTestPeriods))
    print ("Cell type= {}, learning_rate = {}, Iterations = {}, batch size = {}, Steps = {}, Hidden Layers = {}, Classes = {}\n".format(cellType,learning_rate,training_iterations,batch_size, n_steps ,n_hidden,n_classes))

    if trainModel == 1:
        predictions,labels = TestPredictions1(xTrain,yTrain_onehot,sess,numOfPeriods=hiddenTestPeriods)
    elif trainModel == 2:
        predictions,labels = TestPredictions2(xTrain,yTrain_onehot,sess)

    predictions = predictions.reshape(-1,1)
    labels = labels.reshape(-1,1)

    print(np.shape(labels),np.shape(predictions))    
    print(metrics.classification_report(labels,predictions))  # Need to feed it yTest not yTest_OneHot here

TestHiddenPeriods()

100 Hidden Periods

Cell type= BasicLSTMCell, learning_rate = 0.001, Iterations = 10, batch size = 5, Steps = 5, Hidden Layers = 160, Classes = 2

(100, 1) (100, 1)
             precision    recall  f1-score   support

          0       0.63      0.88      0.73        64
          1       0.27      0.08      0.13        36

avg / total       0.50      0.59      0.51       100



<div style="background-color:#white; color:#008000; font-family: 'Courier New, Monospace;font-weight: bold">Test hidden users</div>

In [None]:
# Get hidden users
users=cc.getUsers(dbPath).sample(2)
u=users.userID.values
_,_,xTest, yTest_onehot = cc.getHiddenPeriodsData(dbPath,tblName,fieldList,oneHot=True,periodGranularity=periodGranularity,userIDs=u)
print ('{} users selected for testing. Total rows {}'.format(len(u), len(xTest)))

xTest2, yTest2_onehot, testDf2 = cc.getHiddenUsersData(dbPath,tblName,fieldList,oneHot= True,firstNPerc=0.5,periodGranularity=periodGranularity)

In [None]:
print('\nHidden Users')
predictions = getTestPredictions(xTest2,yTest2_onehot)
print(metrics.classification_report(yTest2_onehot[:,1],predictions))  # Need to feed it yTest not yTest_OneHot here
print(np.shape(xTest2),np.shape(yTest2_onehot))

<h3 style="background-color:#616161;color:white">Appendices</h3>

<h4 style="background-color:#616161;color:white">Results</h4>

In [None]:
def load_data(seq_len, normalise_window):
    f = open("3_Data/sinwave.csv", 'rb').read()
    data = f.decode().split('\n')

    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])

    result = np.array(result)

    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))  

    return [x_train, y_train, x_test, y_test]

seq_len = 1
xdTrain, ydTrain, Xd_test, yd_test = load_data(seq_len, True)

In [8]:
# Dummy test
n_steps = 1 # timesteps
n_hidden = 160 # hidden layer num of features
n_classes = 1
batch_size = 20 #1344
training_iterations=100
learning_rate = 0.001
cellType = "BasicLSTMCell"  # Choose: TimeFreqLSTMCell BasicLSTMCell

#fieldList="UserID, t, HrsFrom5pm, isSun,isMon,isTue,isWed,isThu,isFri,isSat, t1,t2,t3,t4,t5,t10,t12hrs,t23_5hrs,t24hrs,t24_5hrs,t1wk,t2wks,t3wks,t4wks"
fieldList="UserID, t, HrsFrom5pm, isSun,isMon,isTue,isWed,isThu,isFri,isSat, t10,t12hrs,t24hrs,t1wk,t2wks,t3wks,t4wks"
n_input = 1

# Build graph
buildGraph(n_steps,n_input = n_input)
# Initializing the variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

trainModel(xdTrain,ydTrain,sess,training_iterations)

xdDummy = xdTrain.reshape(-1,1)
predictions = getTestPredictions(xdTrain,ydTrain)
print(metrics.classification_report(ydTrain[:,1],predictions))  # Need to feed it yTest not yTest_OneHot here

NameError: name 'buildGraph' is not defined

In [11]:
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
from matplotlib import pyplot
import numpy

# date-time parsing function for loading the dataset
def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')

# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
	df = DataFrame(data)
	columns = [df.shift(i) for i in range(1, lag+1)]
	columns.append(df)
	df = concat(columns, axis=1)
	df.fillna(0, inplace=True)
	return df

# create a differenced series
def difference(dataset, interval=1):
	diff = list()
	for i in range(interval, len(dataset)):
		value = dataset[i] - dataset[i - interval]
		diff.append(value)
	return Series(diff)

# invert differenced value
def inverse_difference(history, yhat, interval=1):
	return yhat + history[-interval]

# scale train and test data to [-1, 1]
def scale(train, test):
	# fit scaler
	scaler = MinMaxScaler(feature_range=(-1, 1))
	scaler = scaler.fit(train)
	# transform train
	train = train.reshape(train.shape[0], train.shape[1])
	train_scaled = scaler.transform(train)
	# transform test
	test = test.reshape(test.shape[0], test.shape[1])
	test_scaled = scaler.transform(test)
	return scaler, train_scaled, test_scaled

# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
	new_row = [x for x in X] + [value]
	array = numpy.array(new_row)
	array = array.reshape(1, len(array))
	inverted = scaler.inverse_transform(array)
	return inverted[0, -1]

# fit an LSTM network to training data
def fit_lstm(train, batch_size, nb_epoch, neurons):
	X, y = train[:, 0:-1], train[:, -1]
	X = X.reshape(X.shape[0], 1, X.shape[1])
	model = Sequential()
	model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
	model.add(Dense(1))
	model.compile(loss='mean_squared_error', optimizer='adam')
	for i in range(nb_epoch):
		model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
		model.reset_states()
	return model

# make a one-step forecast
def forecast_lstm(model, batch_size, X):
	X = X.reshape(1, 1, len(X))
	yhat = model.predict(X, batch_size=batch_size)
	return yhat[0,0]

# load dataset
series = read_csv('3_Data/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)

# transform data to be stationary
raw_values = series.values
diff_values = difference(raw_values, 1)

# transform data to be supervised learning
supervised = timeseries_to_supervised(diff_values, 1)
supervised_values = supervised.values

# split data into train and test-sets
train, test = supervised_values[0:-12], supervised_values[-12:]

# transform the scale of the data
scaler, train_scaled, test_scaled = scale(train, test)

# repeat experiment
repeats = 30
error_scores = list()
for r in range(repeats):
	# fit the model
	lstm_model = fit_lstm(train_scaled, 1, 3000, 4)
	# forecast the entire training dataset to build up state for forecasting
	train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
	lstm_model.predict(train_reshaped, batch_size=1)
	# walk-forward validation on the test data
	predictions = list()
	for i in range(len(test_scaled)):
		# make one-step forecast
		X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
		yhat = forecast_lstm(lstm_model, 1, X)
		# invert scaling
		yhat = invert_scale(scaler, X, yhat)
		# invert differencing
		yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
		# store forecast
		predictions.append(yhat)
	# report performance
	rmse = sqrt(mean_squared_error(raw_values[-12:], predictions))
	print('%d) Test RMSE: %.3f' % (r+1, rmse))
	error_scores.append(rmse)

# summarize results
results = DataFrame()
results['rmse'] = error_scores
print(results.describe())
results.boxplot()
pyplot.show()

ValueError: time data '190Sales of shampoo over a three year period' does not match format '%Y-%m'