In [2]:
import pandas_datareader.data as web
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from numpy import hstack
import warnings
import os
from fredapi import Fred
warnings.filterwarnings('ignore')

import itertools
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
import pmdarima as pm
from pmdarima import model_selection

from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

%matplotlib inline

Using TensorFlow backend.


In [3]:
start = datetime(2000,1,31)
end = datetime.now()

In [4]:
AMT = web.DataReader('AMT', 'av-daily', start = start, end = end, api_key = 'Y8YQFOIVHPA381U4')

In [5]:
AMT.drop(AMT.tail(5).index, inplace= True)

In [6]:
AMT['mid'] = round((AMT['low']+AMT['high'])/2, 2)

In [7]:
AMT = AMT[::-1]

In [8]:
AMT.dropna(inplace = True)

In [9]:
train = AMT['mid'][:3500].as_matrix()
test = AMT['mid'][3500:].as_matrix()

## Normalize Data

In [10]:
scaler = MinMaxScaler()
train = train.reshape(-1, 1)
test = test.reshape(-1, 1)

In [29]:
len(AMT)/2

2514.5

In [30]:

smoothing_window_size = 875

for di in range(0,2515,smoothing_window_size):
    scaler.fit(train[di:di+smoothing_window_size,:])
    train[di:di+smoothing_window_size,:] = scaler.transform(train[di:di+smoothing_window_size,:])

# You normalize the last bit of remaining data
scaler.fit(train[di+smoothing_window_size:,:])
train[di+smoothing_window_size:,:] = scaler.transform(train[di+smoothing_window_size:,:])

In [31]:
#Reshape the data 
train = train.reshape(-1)
test = scaler.transform(test).reshape(-1)

In [34]:
#Smooth data using exponential moving average
EMA = 0.0
gamma = 0.1
for ti in range(3500):
    EMA = gamma*train[ti] + (1-gamma)*EMA
    train[ti] = EMA
    
all_mid_data = np.concatenate([train, test], axis = 0)

In [39]:
#Use data generator to train model
# outputs a set of unrolled batches (sequential)
# each batch will have corresponding output batch

class DataGeneratorSeq(object):

    def __init__(self,prices,batch_size,num_unroll):
        self._prices = prices
        self._prices_length = len(self._prices) - num_unroll
        self._batch_size = batch_size
        self._num_unroll = num_unroll
        self._segments = self._prices_length //self._batch_size
        self._cursor = [offset * self._segments for offset in range(self._batch_size)]

    def next_batch(self):

        batch_data = np.zeros((self._batch_size),dtype=np.float32)
        batch_labels = np.zeros((self._batch_size),dtype=np.float32)

        for b in range(self._batch_size):
            if self._cursor[b]+1>=self._prices_length:
                #self._cursor[b] = b * self._segments
                self._cursor[b] = np.random.randint(0,(b+1)*self._segments)

            batch_data[b] = self._prices[self._cursor[b]]
            batch_labels[b]= self._prices[self._cursor[b]+np.random.randint(0,5)]

            self._cursor[b] = (self._cursor[b]+1)%self._prices_length

        return batch_data,batch_labels

    def unroll_batches(self):

        unroll_data,unroll_labels = [],[]
        init_data, init_label = None,None
        for ui in range(self._num_unroll):

            data, labels = self.next_batch()    

            unroll_data.append(data)
            unroll_labels.append(labels)

        return unroll_data, unroll_labels

    def reset_indices(self):
        for b in range(self._batch_size):
            self._cursor[b] = np.random.randint(0,min((b+1)*self._segments,self._prices_length-1))



dg = DataGeneratorSeq(train,5,5)
u_data, u_labels = dg.unroll_batches()

for ui,(dat,lbl) in enumerate(zip(u_data,u_labels)):   
    print('\n\nUnrolled index %d'%ui)
    dat_ind = dat
    lbl_ind = lbl
    print('\tInputs: ',dat )
    print('\n\tOutput:',lbl)



Unrolled index 0
	Inputs:  [0.09116118 0.09795801 0.18099165 0.17986889 0.2645283 ]

	Output: [0.31437966 0.26289052 0.18099165 0.16823266 0.25316912]


Unrolled index 1
	Inputs:  [0.17736569 0.18537089 0.16289249 0.16188201 0.24806233]

	Output: [0.25029925 0.18537089 0.24056786 0.16823266 0.24429448]


Unrolled index 2
	Inputs:  [0.25029925 0.26289052 0.24056786 0.16823266 0.242967  ]

	Output: [0.31437966 0.33660147 0.37574446 0.2514094  0.2688351 ]


Unrolled index 3
	Inputs:  [0.31437966 0.33660147 0.3119325  0.2514094  0.24429448]

	Output: [0.31437966 0.4972903  0.3119325  0.2514094  0.2688351 ]


Unrolled index 4
	Inputs:  [0.36988565 0.39713717 0.37574446 0.31455562 0.25316912]

	Output: [0.36988565 0.4972903  0.5529047  0.37000182 0.28470916]
