In [None]:
ticker = 'AAPL'

## Library

In [None]:
# !pip install yfinance
# !pip install fredapi

In [None]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

import yfinance as yf

import tensorflow as tf

from sklearn.metrics import precision_recall_fscore_support, accuracy_score

## reference: https://pyimagesearch.com/2020/03/23/using-tensorflow-and-gradienttape-to-train-a-keras-model/
## reference: https://regenerativetoday.com/implementation-of-simplernn-gru-and-lstm-models-in-keras-and-tensorflow-for-an-nlp-project/

## Stock data

In [None]:
def label_up_down(row):
    if row['Close'] > row['Lag1']:
        return 1 # up
    else:
        return 0 # down

def create_df(ticker, lag=10, start_date='2012-01-01', end_date='2022-03-09'):
    tickerData = yf.Ticker(ticker)
    tickerDF = tickerData.history(period='1d', start=start_date, end=end_date)

    ## add lag columns
    col_name_lst = []
    for i in range(lag):
        col_name = 'Lag'+str(i+1)
        col_name_lst.append(col_name)
        tickerDF[col_name] = tickerDF['Close'].shift(i+1)
    col_name_lst.append('Close')
    tickerDF_lag = tickerDF[col_name_lst]

    ## add label
    tickerDF_lag['Label'] = tickerDF_lag.apply(label_up_down, axis=1)

    ## add control variables (^DJI)
    tickerDataDJI = yf.Ticker('^DJI')
    tickerDFDJI = tickerDataDJI.history(period='1d', start=start_date, end=end_date)
    tickerDFDJI['DJI_Lag1'] = tickerDFDJI['Close'].shift(1)
    res = pd.merge(tickerDF_lag, tickerDFDJI['DJI_Lag1'], left_index=True, right_index=True)

    res = res.dropna()
    res = res.drop('Close', axis=1)
    res = res.reindex(columns=col_name_lst[:-1]+['DJI_Lag1', 'Label'])

    return res

In [None]:
df = create_df(ticker)

In [None]:
train = df.iloc[:int(df.shape[0] * 0.8)]
val = df.iloc[int(df.shape[0] * 0.8):int(df.shape[0] * 0.9)]
test = df.iloc[int(df.shape[0] * 0.9):]

X_train = train.iloc[:, :-1]
y_train = train.iloc[:,-1]
X_val = val.iloc[:, :-1]
y_val = val.iloc[:,-1]
X_test = test.iloc[:, :-1]
y_test = test.iloc[:,-1]

In [None]:
train

Unnamed: 0_level_0,Lag1,Lag2,Lag3,Lag4,Lag5,Lag6,Lag7,Lag8,Lag9,Lag10,DJI_Lag1,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2012-01-18 00:00:00-05:00,12.909642,12.760997,12.809030,12.844289,12.865261,12.819362,12.839729,12.706892,12.567368,12.500194,12482.070312,1
2012-01-19 00:00:00-05:00,13.043694,12.909642,12.760997,12.809030,12.844289,12.865261,12.819362,12.839729,12.706892,12.567368,12578.950195,0
2012-01-20 00:00:00-05:00,13.002351,13.043694,12.909642,12.760997,12.809030,12.844289,12.865261,12.819362,12.839729,12.706892,12623.980469,0
2012-01-23 00:00:00-05:00,12.775889,13.002351,13.043694,12.909642,12.760997,12.809030,12.844289,12.865261,12.819362,12.839729,12720.480469,1
2012-01-24 00:00:00-05:00,12.992019,12.775889,13.002351,13.043694,12.909642,12.760997,12.809030,12.844289,12.865261,12.819362,12708.820312,0
...,...,...,...,...,...,...,...,...,...,...,...,...
2020-02-21 00:00:00-05:00,78.513977,79.327789,78.195312,79.653824,79.634193,80.205345,78.344849,78.820381,78.447792,79.528801,29219.980469,0
2020-02-24 00:00:00-05:00,76.736801,78.513977,79.327789,78.195312,79.653824,79.634193,80.205345,78.344849,78.820381,78.447792,28992.410156,0
2020-02-25 00:00:00-05:00,73.091766,76.736801,78.513977,79.327789,78.195312,79.653824,79.634193,80.205345,78.344849,78.820381,27960.800781,0
2020-02-26 00:00:00-05:00,70.616020,73.091766,76.736801,78.513977,79.327789,78.195312,79.653824,79.634193,80.205345,78.344849,27081.359375,1


## FNN

In [None]:
## reference: https://www.tensorflow.org/guide/keras/train_and_evaluate

input_shape = (11,)
output_shape = 1

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=input_shape),             # numpy array to tensor
    tf.keras.layers.Dense(units=512, activation='relu'),
    tf.keras.layers.Dropout(0.1),                         # regularization
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dropout(0.1),                         # regularization
    tf.keras.layers.Dense(units=32, activation='relu'),  # FeedForward layer
    tf.keras.layers.Dropout(0.1),                         # regularization
    tf.keras.layers.Dense(units=8, activation='relu'),   # FeedForward layer
    tf.keras.layers.Dense(units=output_shape, activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',  # cross entropy loss
              metrics=['accuracy'])

train_batch = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_batch = train_batch.shuffle(10000).batch(32, drop_remainder=True)

history = model.fit( # train model
    train_batch,
    batch_size=256,
    epochs=10,
    validation_data=(X_val, y_val),
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
pred_probs = model.predict(X_test) #  predict
y_pred = np.where(pred_probs > 0.5, 1, 0)
perf = precision_recall_fscore_support(y_test, y_pred, average=None)

print('precision: ' + str(perf[0][1]))
print('recall: ' + str(perf[1][1]))
print('f-score: ' + str(perf[2][1]))
print('----------')
print('precision: ' + str(perf[0][0]))
print('recall: ' + str(perf[1][0]))
print('f-score: ' + str(perf[2][0]))
print('----------')
print('accuracy: ' + str(accuracy_score(y_test, y_pred)))

precision: 0.51953125
recall: 1.0
f-score: 0.6838046272493573
----------
precision: 0.0
recall: 0.0
f-score: 0.0
----------
accuracy: 0.51953125


## LSTM

In [None]:
X_train_lstm = X_train[['Lag1', 'DJI_Lag1']]
X_val_lstm = X_val[['Lag1', 'DJI_Lag1']]
X_test_lstm = X_test[['Lag1', 'DJI_Lag1']]

In [None]:
X_train_lstm = X_train_lstm.values.reshape(X_train_lstm.shape[0], X_train_lstm.shape[1], 1)
X_val_lstm = X_val_lstm.values.reshape(X_val_lstm.shape[0], X_val_lstm.shape[1], 1)
X_test_lstm = X_test_lstm.values.reshape(X_test_lstm.shape[0], X_test_lstm.shape[1], 1)

In [None]:
## reference: https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM

input_shape = X_train_lstm.shape[1:]
output_shape = 1

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=input_shape),       # numpy array to tensor
    tf.keras.layers.LSTM(5),                       # LSTM layer
    tf.keras.layers.Dense(10, activation='relu'),   # FeedForward
    tf.keras.layers.Dense(output_shape, activation='sigmoid')  # output to 3 classes
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',      # cross entropy loss
              metrics=['accuracy'])

history = model.fit(                                # train model
    X_train_lstm,
    y_train,
    batch_size=256,
    epochs=10,
    validation_data=(X_val_lstm, y_val),
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
pred_probs = model.predict(X_test_lstm) #  predict
y_pred = np.where(pred_probs > 0.5, 1, 0)
perf = precision_recall_fscore_support(y_test, y_pred, average=None)

print('precision: ' + str(perf[0][1]))
print('recall: ' + str(perf[1][1]))
print('f-score: ' + str(perf[2][1]))
print('----------')
print('precision: ' + str(perf[0][0]))
print('recall: ' + str(perf[1][0]))
print('f-score: ' + str(perf[2][0]))
print('----------')
print('accuracy: ' + str(accuracy_score(y_test, y_pred)))

precision: 0.0
recall: 0.0
f-score: 0.0
precision: 0.48046875
recall: 1.0
f-score: 0.6490765171503958
accuracy: 0.48046875
