In [1]:
import yfinance as yf
import pandas as pd
from matplotlib import pyplot as plt

import tensorflow as tf
from tensorflow import keras
import numpy as np
import datetime, pickle
np.set_printoptions(suppress=True)

print(tf.__version__)
# This code allows for the GPU to be utilized properly.
tf.autograph.set_verbosity(0)
physical_devices = tf.config.list_physical_devices("GPU")
try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
    pass

print(physical_devices)

2.10.1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
def sampleDataset(data, num_samples, interval_len, feature_size, offset=0):
    # creates samples of data from a dataset
    # returns the intervals, and the target price changes

    samples = np.ndarray((num_samples, interval_len, feature_size))
    target_prices = np.ndarray((num_samples, feature_size))
    for x in range(num_samples):
        start_index = round(x * (data.shape[0] - interval_len - 1 - offset) / num_samples) + offset
        # data_range = data.shape[0] - offset

        samples[x] = data[start_index:start_index + interval_len]
        target_prices[x] = data[start_index + interval_len]
    
    return samples, target_prices

def normalize_data(data, method="one"):
    if method=="one":
        maxes = np.max(data, axis=0)
        mins = np.min(data, axis=0)
        return (data - mins) / (maxes - mins)
    elif method=="zscore":
        mean = np.mean(data, axis=0)
        stdev = np.std(data, axis=0)
        return (data - mean) / stdev

In [None]:
TICKER = ["^GSPC"]

DATASET_SIZE = 10092 # Number of samples PER TICKER
INTERVAL = 7
FEATURE_SIZE = 5 # open, high, low, volume
DATA_OFFSET = 14000

# fetch hists
try:
    with open("hists.dat", "rb") as f:
        hists = pickle.load(f) 
except:
    hists = []
    for x in range(len(TICKER)):
        print("LOADING TICKER " + str(x) + "/" + str(len(TICKER)), end="\r")
        hist = yf.Ticker(TICKER[x]).history(interval="1d", period="max")
        if not hist.empty and np.sum(np.isnan(hist.loc[:, "Open"].to_numpy())) == 0:
            hist.drop(columns=["Dividends", "Stock Splits"], inplace=True)
            hists.append(hist)

# Define
price_intervals = np.ndarray((len(hists), DATASET_SIZE, INTERVAL, FEATURE_SIZE))
target_prices = np.ndarray((len(hists), DATASET_SIZE, FEATURE_SIZE))
max_prices = np.ndarray((len(hists),))
min_prices = np.ndarray((len(hists),))
means = np.ndarray((len(hists),))
stdevs = np.ndarray((len(hists),))


# Process features/data
for x in range(len(hists)):

    hist = hists[x]
    display(hist)
    columns = [hist.loc[:, col].to_numpy() for col in hist.columns]

    max_prices[x] = np.max(columns[0])
    min_prices[x] = np.min(columns[0])
    means[x] = np.mean(columns[0])
    stdevs[x] = np.std(columns[0])
    data = np.stack(columns, axis=1)
    data = normalize_data(data, method="zscore")

    price_intervals[x], target_prices[x] = sampleDataset(data, DATASET_SIZE, INTERVAL, FEATURE_SIZE, offset=DATA_OFFSET)

In [3]:
model = keras.models.load_model("model")

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 7, 64)             17920     
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dense (Dense)               (None, 64)                2112      
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dense_2 (Dense)             (None, 5)                 325       
                                                                 
Total params: 36,933
Trainable params: 36,933
Non-trainable params: 0
_________________________________________________________________
