In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.utils import normalize
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import Orthogonal

from sklearn.preprocessing import MinMaxScaler, scale
import matplotlib.pyplot as plt

In [2]:
AAPL = pd.read_csv('../data/transactions/Tech/AAPL.csv')  
stockdata = AAPL


In [3]:
def getStock(stock):
    date = pd.to_datetime(stock["Date"])
    stock = stock.drop(['Date'],axis=1)
    stock= stock.astype('float64')

    return scale(stock), scale(stock['Close'])


#Samples. One sequence is one sample. A batch is comprised of one or more samples.
#Time Steps. One time step is one point of observation in the sample.
#Features. One feature is one observation at a time step.
def processData(stock, input_unit,output_unit):
    data,result = getStock(stock)
    X,Y = [],[]
    for i in range(len(data)-input_unit-output_unit):
        X.append(data[i:(i+input_unit)])
        temp = [result[j+i+input_unit] for j in range(output_unit)]
        Y.append(temp)
    return np.array(X),np.array(Y)

def category(data):
    data[data>=0] = 1
    data[data<0] = -1
    return data
    
def precentage(prediction, real):
    pred = prediction -real
    pred = np.squeeze(prediction, axis=1)
    pred = category(pred)
   
    return pred
def realPrecentage(stock):
    value = pd.Series(stock['Close']-stock['Close'].shift(1), index=stock.index) 
    value = value.bfill() 
    value = np.array(value) 
    val = category(value)
   
    return val
def compare(pred, val):
    t = len(pred)
    countTrue = 0
    for i in range(t):
        if pred[i] == val[i]:
            countTrue +=1
    return countTrue/t




In [4]:
time_size = 5
X,y = processData(stockdata,time_size,1)
train_size = int(X.shape[0]*0.80)
X_train,X_test = X[:int(X.shape[0]*0.80)],X[int(X.shape[0]*0.80):]
y_train,y_test = y[:int(y.shape[0]*0.80)],y[int(y.shape[0]*0.80):]
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)


(1001, 5, 6)
(1001, 1)
(251, 5, 6)
(251, 1)


In [9]:
model = tf.keras.models.load_model('LSTM')
if model == None:
    print('train from begining')
    model = Sequential() # most common
    model.add(LSTM(64, input_shape = (time_size,6), return_sequences=True)) 
    model.add(Dropout(0.2))
    model.add(LSTM(32, input_shape = (time_size,6), return_sequences=False)) 
    model.add(Dropout(0.2))
    model.add(Dense(16, kernel_initializer='uniform', activation = 'relu'))
    model.add(Dense(1, activation = 'linear'))

    model.compile(optimizer='adam', loss='mean_squared_error') 
callback = EarlyStopping(monitor='val_loss', patience=10,verbose=1,mode='auto')
history = model.fit(X_train,y_train,epochs=300,validation_data=(X_test,y_test),shuffle=False,callbacks=[callback])

Train on 1001 samples, validate on 251 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20

KeyboardInterrupt: 

In [6]:
#plt.plot(history.history['loss'],color="blue")
#plt.plot(history.history['val_loss'],color="green")
#plt.show()

In [7]:
prediction = model.predict(X_test)

real = y_test
#plt.plot(real,color="blue")
#plt.plot(prediction,color="orange")
#plt.show()

inputg,outputg = processData(stockdata,time_size,1)
prediction = model.predict(inputg)
pred = precentage(prediction, outputg)
val = realPrecentage(stockdata)
print(compare(pred, val))
#print(prediction)
#print(real)

0.5039936102236422


In [8]:
tf.keras.models.save_model(model,'LSTM')
