In [1]:
import yfinance as yf
import numpy as np
from sklearn import preprocessing
from ta.momentum import RSIIndicator
from collections import deque
import random
import tensorflow as tf

## Setting up


### Defining constants


In [2]:
# Length of sequences to feed the RNN
SEQUENCE_LEN = 60
# Number of periods (days if data is daily) in the future to predict
PREDICTION_PERIOD_OFFSET = 20

RELEVANT_COLS = ["Close"]
INDICATOR_TICKERS = ["QQQ", "^TNX", "^VIX", "CL=F"]

### Defining functions for testing


In [3]:
def classify(current, future):
    return int(float(future) > float(current))


def preprocess_df(df, balance=True):
    TICKER = df.columns[0].split("_")[0]
    CLOSE_NAME = TICKER + "_Close"
    df = df.drop(columns=[f"{CLOSE_NAME}_Future"])

    # Scale the data
    for col in df.columns:
        if col != "Target":
            df[col] = df[col].pct_change()
            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)

    df.dropna(inplace=True)

    # Create the sequential data
    sequential_data = []
    data_queue = deque(maxlen=SEQUENCE_LEN)

    for datapoint in df.values:
        # The last column in the df will be Target. Don't include this in the list of independent features
        data_queue.append(datapoint[:-1])
        if len(data_queue) == SEQUENCE_LEN:
            sequential_data.append([np.array(data_queue), datapoint[-1]])

    random.shuffle(sequential_data)

    if balance:
        # Balance the dataset
        buys = []
        sells = []

        for seq, target in sequential_data:
            if target:
                buys.append([seq, target])
            else:
                sells.append([seq, target])

        minimum = min(len(buys), len(sells))

        random.shuffle(buys)
        random.shuffle(sells)

        buys = buys[:minimum]
        sells = sells[:minimum]

        sequential_data = buys + sells
        random.shuffle(sequential_data)

    X = []
    y = []

    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)

    return np.array(X), np.array(y)


def create_df(TICKER):
    df = yf.download(TICKER, progress=False)
    df.drop(columns=["Adj Close"], inplace=True)
    df.rename(columns={col: f"{TICKER}_{col}" for col in df.columns}, inplace=True)

    df[f"{TICKER}_Close_RSI"] = RSIIndicator(df[f"{TICKER}_Close"], window=14).rsi()
    df.dropna(inplace=True)

    for ticker in INDICATOR_TICKERS:
        ticker_data = yf.download(ticker, progress=False)
        relevant_data = ticker_data[RELEVANT_COLS]
        relevant_data = relevant_data.rename(
            columns={col: f"{ticker}_{col}" for col in RELEVANT_COLS}
        )
        # Only join if the columns aren't already present
        if len(set(df.columns).intersection(set(relevant_data.columns))) == 0:
            df = df.join(relevant_data)

    df.dropna(inplace=True)

    df[f"{TICKER}_Close_Future"] = df[f"{TICKER}_Close"].shift(
        -1 * PREDICTION_PERIOD_OFFSET
    )
    df.dropna(inplace=True)
    df["Target"] = list(
        map(classify, df[f"{TICKER}_Close"], df[f"{TICKER}_Close_Future"])
    )

    return df


def test_ticker(model, TICKER):
    df = create_df(TICKER)

    X, y = preprocess_df(df, balance=False)

    logits = model(X, training=False)
    predictions = tf.math.argmax(logits, axis=1, output_type=tf.int64)
    test_accuracy = tf.keras.metrics.Accuracy()

    return test_accuracy(predictions, y)

### Loading in the model


In [4]:
model = tf.keras.models.load_model("./models/SPY")

In [5]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 20)                2480      
                                                                 
 dropout_12 (Dropout)        (None, 20)                0         
                                                                 
 batch_normalization_4 (Batc  (None, 20)               80        
 hNormalization)                                                 
                                                                 
 dense_12 (Dense)            (None, 16)                336       
                                                                 
 dropout_13 (Dropout)        (None, 16)                0         
                                                                 
 dense_13 (Dense)            (None, 16)                272       
                                                      

## Backtesting accuracies


### Testing the model on various tickers


In [6]:
test_ticker(model, "SHEL")

<tf.Tensor: shape=(), dtype=float32, numpy=0.52080727>

In [7]:
test_ticker(model, "HDB")

<tf.Tensor: shape=(), dtype=float32, numpy=0.52999437>

In [8]:
test_ticker(model, "SAP")

<tf.Tensor: shape=(), dtype=float32, numpy=0.5092857>

In [9]:
test_ticker(model, "RY")

<tf.Tensor: shape=(), dtype=float32, numpy=0.5355357>

## Backtesting trading performance


In [10]:
test_ticker(model, "JPM")

<tf.Tensor: shape=(), dtype=float32, numpy=0.5292857>

In [11]:
create_df("RY")

Unnamed: 0_level_0,RY_Open,RY_High,RY_Low,RY_Close,RY_Volume,RY_Close_RSI,QQQ_Close,^TNX_Close,^VIX_Close,CL=F_Close,RY_Close_Future,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2000-08-23,14.468750,14.625000,14.453125,14.562500,23600,65.831297,97.062500,5.725,17.379999,32.049999,15.625000,1
2000-08-24,14.609375,14.609375,14.421875,14.468750,34800,62.465621,98.562500,5.716,17.040001,31.629999,15.593750,1
2000-08-25,14.406250,14.406250,14.265625,14.296875,16400,56.738409,98.031250,5.721,16.530001,32.049999,15.812500,1
2000-08-28,14.312500,14.375000,14.281250,14.375000,32000,58.596640,98.500000,5.766,16.540001,32.869999,15.515625,1
2000-08-29,14.359375,14.437500,14.312500,14.359375,15600,58.059503,99.000000,5.808,16.889999,32.720001,15.312500,1
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-29,94.110001,94.629997,93.830002,94.459999,615500,42.217710,312.720001,3.566,19.120001,72.970001,98.440002,1
2023-03-30,95.419998,95.660004,94.760002,94.949997,398900,44.402930,315.679993,3.551,19.020000,74.370003,99.309998,1
2023-03-31,95.139999,95.610001,95.010002,95.580002,591700,47.169366,320.929993,3.494,18.700001,75.669998,99.190002,1
2023-04-03,96.750000,97.610001,96.599998,97.580002,791900,54.850002,320.149994,3.430,18.549999,80.419998,96.489998,0
