In [1]:
import numpy as np

from keras.models import Model, Sequential
from keras.layers import Dense, Activation, Masking, Input, Dropout
from keras.layers import LSTM
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
from keras import initializers
import keras.backend as K

import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 16})

import time
from datetime import datetime

Using TensorFlow backend.


In [2]:
stock = np.loadtxt('../data/AAPL_170801-180823_minute.txt')
fs = 1.  # 1/min
stock.shape

(104550, 3)

In [3]:
st_time = 1500000000
end_time = 1535000000

print 'Getting data from', datetime.fromtimestamp(st_time), 'to', datetime.fromtimestamp(end_time)

i_st =  np.argmax(stock[:,0]>st_time)
print i_st
i_end =  np.argmax(stock[:,0]>end_time)
print i_end


price = stock[i_st:i_end, 2]
volume = stock[i_st:i_end,1]
unxtime = stock[i_st:i_end,0]

print 'Total', unxtime.shape[0], 'points'

Getting data from 2017-07-13 19:40:00 to 2018-08-22 21:53:20
0
104160
Total 104160 points


## Create the dataset

In [4]:
print unxtime.shape
time_info = []
for t in unxtime:
    d = datetime.fromtimestamp(t)
    time_info.append([d.day,
                      d.month,
                      d.isoweekday(),                                 #day of the week
#                       d.second + 60*d.minute + d.hour*60*60 - 23460   #sec form market opening
                     ])
time_info = np.array(time_info)
print time_info.shape

raw = np.column_stack((price,
#                        volume, 
#                        time_info
                      ))
print raw.shape

(104160,)
(104160, 3)
(104160, 1)


In [5]:
observation_time = 60*18  #min

training_jumps = 30 #min

pred_delay = 100 #min
pred_acc = 10 #min


dataset = []
y_true = []
for i in range(observation_time, raw.shape[0]-pred_delay, training_jumps):
    i_st_eval = i - observation_time
    i_end_eval = i
    i_pred = i + pred_delay
    
    price_mean = np.mean(raw[i_st_eval:i_end_eval, 0])
    aux = np.column_stack((raw[i_st_eval:i_end_eval, 0] - price_mean, raw[i_st_eval:i_end_eval, 1:]))
    dataset.append(aux)
    
    price_avg = np.mean(raw[i_pred-pred_acc:i_pred+pred_acc, 0]) - price_mean
    price_rms = np.std(raw[i_pred-pred_acc:i_pred+pred_acc, 0] - price_mean)
    
    y_true.append([price_avg, price_rms])
    
dataset = np.array(dataset)
y_true = np.array(y_true)

print dataset.shape

(3433, 1080, 1)


In [6]:
i_stop_train = int(0.8*dataset.shape[0])
x_train = dataset[:i_stop_train]
x_val = dataset[i_stop_train:]

y_train = y_true[:i_stop_train]
y_val = y_true[i_stop_train:]

In [7]:
np.max(y_train[:,1])

1.2568188652307837

## LSTM declaration

In [8]:
inputs = Input((x_train.shape[1], x_train.shape[2]))
mdl = LSTM(100, activation = 'tanh', return_sequences=True)(inputs)
mdl = LSTM(50, activation = 'tanh')(mdl)
# mdl = Dense(30, activation = 'relu')(inputs)
# mdl = Dense(30, activation = 'relu')(mdl)
# mdl = Dropout(0.05)(mdl)
out = Dense(1, activation='relu',
            kernel_initializer='glorot_normal',
            use_bias=False
           )(mdl)

model = Model(inputs=inputs, outputs=out)

In [9]:
def AssistedGaussNLL(y_train, NETout):
    aux = K.tf.divide(y_train[:,0] - NETout, y_train[:,1])
    return K.mean(K.square(aux))

In [10]:
model.compile(optimizer='rmsprop',
              loss=AssistedGaussNLL)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1080, 1)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 1080, 100)         40800     
_________________________________________________________________
lstm_2 (LSTM)                (None, 50)                30200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 50        
Total params: 71,050
Trainable params: 71,050
Non-trainable params: 0
_________________________________________________________________


In [11]:
out = model.predict(x_train, batch_size=300)

In [12]:
aux = np.divide(out[:,0] - y_train[:,0],y_train[:,1])
np.mean(np.square(aux))

724.6399469588296

In [13]:
fit_report = model.fit(x=x_train, y=y_train,
            validation_data = (x_val, y_val),
            shuffle=True,
            epochs=20,
            batch_size=32,
            callbacks = [
                            EarlyStopping(monitor='val_loss', patience=3, verbose=1, min_delta=0.005),
                            ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, min_delta=0.01, verbose=1),
                            TerminateOnNaN()
                ])

Train on 2746 samples, validate on 687 samples
Epoch 1/20

KeyboardInterrupt: 