<a href="https://colab.research.google.com/github/ajbrittle1975/fictional-memory/blob/main/Stock_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install Necessary Libraries
**TA-Lib Libraries**

In [1]:
url = 'https://launchpad.net/~mario-mariomedina/+archive/ubuntu/talib/+files'
ext = '0.4.0-oneiric1_amd64.deb -qO'
!wget $url/libta-lib0_$ext libta.deb
!wget $url/ta-lib0-dev_$ext ta.deb
!dpkg -i libta.deb ta.deb
!pip install ta-lib


Selecting previously unselected package libta-lib0.
(Reading database ... 124016 files and directories currently installed.)
Preparing to unpack libta.deb ...
Unpacking libta-lib0 (0.4.0-oneiric1) ...
Selecting previously unselected package ta-lib0-dev.
Preparing to unpack ta.deb ...
Unpacking ta-lib0-dev (0.4.0-oneiric1) ...
Setting up libta-lib0 (0.4.0-oneiric1) ...
Setting up ta-lib0-dev (0.4.0-oneiric1) ...
Processing triggers for man-db (2.8.3-2ubuntu0.1) ...
Processing triggers for libc-bin (2.27-3ubuntu1.6) ...
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ta-lib
  Downloading TA-Lib-0.4.25.tar.gz (271 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m272.0/272.0 KB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata 

**Alphavantage Libraries**

In [2]:
!pip install alpha-vantage

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting alpha-vantage
  Downloading alpha_vantage-2.3.1-py3-none-any.whl (31 kB)
Installing collected packages: alpha-vantage
Successfully installed alpha-vantage-2.3.1


**Pytorch Libraries**

In [3]:
!pip install torch torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Run Imports

In [4]:
import numpy as np
from talib import abstract
import math

# multivariate data preparation
from numpy import array
from numpy import hstack

from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.techindicators import TechIndicators

print("All libraries loaded")
plot_on = False

All libraries loaded


# Set Stock Configuration

In [5]:
config = {
    "alpha_vantage": {
        # Claim your free API key here: https://www.alphavantage.co/support/#api-key
        "key": "T7GH3GWEIV1DKHB0",
        "symbol": "TQQQ",
        "outputsize": "full",
        "key_adjusted_close": "5. adjusted close",
        "key_high": "2. high",
        "key_low": "3. low",
    },
    "data": {
        "window_size": 10,
        "train_split_size": 0.80,
    },
    "plots": {
        "xticks_interval": 90,  # show a date every 90 days
        "color_actual": "#001f3f",
        "color_train": "#3D9970",
        "color_val": "#0074D9",
        "color_pred_train": "#3D9970",
        "color_pred_val": "#0074D9",
        "color_pred_test": "#FF4136",
    },
    "model": {
        "input_size": 8,  # since we are only using 1 feature, close price
        "num_lstm_layers": 2,
        "lstm_size": 120,
        "dropout": 0.20,
    },
    "training": {
        "device": "cuda",  # "cuda" or "cpu"
        "batch_size": 64,
        "num_epoch": 300,
        "learning_rate": 0.01,
        "scheduler_step_size": 40,
    },
}



# Download Stock Data & Run Indicators 

In [None]:
def remove_nan_columns(data_array, data_date):
    # return data_array[~np.isnan(data_array).any(axis=1), :]
    nan_rows = np.argwhere(np.isnan(data_array).any(axis=1))
    print("Removing {0} nan rows from data stack...".format(len(nan_rows)))
    data_array = np.delete(data_array, nan_rows, 0)

    # given index of elements
    # remove largest indices first to not change length
    nan_row_list = list(nan_rows.flatten())
    data_date = list(data_date)
    for ele in sorted(nan_row_list, reverse=True):
        data_date.remove(data_date[ele])

    return data_array, data_date


class Normalizer:
    def __init__(self):
        self.mu = None
        self.sd = None

    def fit_transform(self, x):
        self.mu = np.nanmean(x, axis=(0), keepdims=True)
        self.sd = np.nanstd(x, axis=(0), keepdims=True)
        normalized_x = (x - self.mu) / self.sd
        return normalized_x

    def inverse_transform(self, x):
        return (x * self.sd) + self.mu


def download_data(config):
    ts = TimeSeries(key=config["alpha_vantage"]["key"])
    ti = TechIndicators(key=config["alpha_vantage"]["key"])

    print("Obtaining data for symbol: {0}".format(config["alpha_vantage"]["symbol"]))
    data, data_meta_data = ts.get_daily_adjusted(
        config["alpha_vantage"]["symbol"],
        outputsize=config["alpha_vantage"]["outputsize"],
    )

    print("Sorting data...")

    data_date = [date for date in data.keys()]
    data_date.reverse()

    print("Sorting close price...")
    data_close_price = [
        float(data[date][config["alpha_vantage"]["key_adjusted_close"]])
        for date in data.keys()
    ]
    data_close_price.reverse()
    data_close_price = np.array(data_close_price)
    data_price_pct_change = np.diff(data_close_price) / data_close_price[:-1] * 100
    data_price_pct_change = np.pad(data_price_pct_change, (1, 0), "constant")
    data_price_pct_change = data_price_pct_change.reshape(-1, 1)
    ## Normalize price percent change
    print("Normalizing price percent change data...")
    price_pct_change_scaler = MinMaxScaler()
    data_price_pct_change_norm = price_pct_change_scaler.fit_transform(
        data_price_pct_change
    )

    print("Sorting high prices...")
    data_high_price = [
        float(data[date][config["alpha_vantage"]["key_high"]]) for date in data.keys()
    ]
    data_high_price.reverse()
    data_high_price = np.array(data_high_price)

    print("Sorting low prices...")
    data_low_price = [
        float(data[date][config["alpha_vantage"]["key_low"]]) for date in data.keys()
    ]
    data_low_price.reverse()
    data_low_price = np.array(data_high_price)

    print("Calculating HT_TRENDLINE...")
    ## Get HT Trend data
    ht_trend = abstract.HT_TRENDLINE(data_close_price)
    ht_trend = ht_trend.reshape(-1, 1)
    ## Normalize MESA
    print("Normalizing HT_TRENDLINE data...")
    ht_trendline_scaler = MinMaxScaler()
    ht_trendline_norm = ht_trendline_scaler.fit_transform(ht_trend)

    print("Calculating MESA average price...")
    ## Get MESA average price
    mama, fama = abstract.MAMA(data_close_price, fastlimit=0.5, slowlimit=0.05)
    mesa_hist = mama - fama
    mesa_hist = mesa_hist.reshape(-1, 1)
    ## Normalize MESA
    print("Normalizing dynamic MACD histogram data...")
    mesa_hist_scaler = Normalizer()
    mesa_hist_norm = mesa_hist_scaler.fit_transform(mesa_hist)

    print("Calculating HT_TREND sine signals...")
    ## Get HT_SINE signals
    sine, leadsine = abstract.HT_SINE(data_close_price)
    trend = abstract.HT_TRENDMODE(data_close_price)
    ht_trend_sine = (sine - leadsine) * trend
    ht_trend_sine = ht_trend_sine.reshape(-1, 1)
    ## Normalize HT_TREND
    print("Normalizing dynamic MACD histogram data...")
    ht_trend_sine_scaler = Normalizer()
    ht_trend_sine_norm = ht_trend_sine_scaler.fit_transform(ht_trend_sine)

    print("Calculating HT_PERIODS for dynamic indicators...")
    # Get period of data close price
    ht_period = abstract.HT_DCPERIOD(data_close_price)
    ht_period = np.around(ht_period)  # the instant period is optimal for oscillators

    print("Provisioning arrays for dynamic indicators...")
    # Provision arrays for additional indicators
    num_data_points = len(data_date)
    adx_ht = np.zeros(num_data_points, dtype=float)
    aroon_osc_ht = np.zeros(num_data_points, dtype=float)
    rsi_ht = np.zeros(num_data_points, dtype=float)
    macd_ht = np.zeros(num_data_points, dtype=float)
    macdsignal_ht = np.zeros(num_data_points, dtype=float)
    macdhist_ht = np.zeros(num_data_points, dtype=float)
    slowk_ht = np.zeros(num_data_points, dtype=float)
    slowd_ht = np.zeros(num_data_points, dtype=float)
    bb_lowerband_ht = np.zeros(num_data_points, dtype=float)
    bb_upperband_ht = np.zeros(num_data_points, dtype=float)
    bb_pct_ht = np.zeros(num_data_points, dtype=float)
    ult_osc_ht = np.zeros(num_data_points, dtype=float)

    # Get frequency-adjusted ADX
    print("Calculating dynamic ADX...")
    i = 0
    for period in ht_period:
        if not math.isnan(period):
            period = int(period)
            slowperiod = (
                period if period % 2 == 0 else period + 1
            )  # Ensure slow period is even
            adx = abstract.ADX(
                data_high_price, data_low_price, data_close_price, timeperiod=slowperiod
            )
            adx_ht[i] = adx[i]

        i += 1

    adx_ht = adx_ht.reshape(-1, 1)
    ## Normalize ADX
    print("Normalizing dynamic ADX data...")
    adx_norm = adx_ht / 100

    # Get frequency-adjusted Aroon Oscillator
    print("Calculating dynamic AROONOSC...")
    i = 0
    for period in ht_period:
        if not math.isnan(period):
            period = int(period)
            slowperiod = (
                period if period % 2 == 0 else period + 1
            )  # Ensure slow period is even
            aroon_osc = abstract.AROONOSC(
                data_high_price, data_low_price, timeperiod=int(slowperiod/2)
            )
            aroon_osc_ht[i] = aroon_osc[i]

        i += 1

    aroon_osc_ht = aroon_osc_ht.reshape(-1, 1)
    ## Normalize Aroon Oscillator
    print("Normalizing dynamic AROONOSC data...")
    aroon_osc_norm = (aroon_osc_ht + 100) / 200  # oscillates between +/- 100

    # Get frequency-adjusted MACD
    print("Calculating dynamic MACD...")
    i = 0
    for period in ht_period:
        if not math.isnan(period):
            period = int(period)
            slowperiod = (
                period if period % 2 == 0 else period + 1
            )  # Ensure slow period is even
            fastperiod = int((slowperiod / 2) - 1)
            signalperiod = math.floor(0.75 * fastperiod)
            macd, macdsignal, macdhist = abstract.MACD(
                data_close_price,
                fastperiod=fastperiod,
                slowperiod=slowperiod,
                signalperiod=signalperiod,
            )
            macd_ht[i] = macd[i]
            macdsignal_ht[i] = macdsignal[i]
            macdhist_ht[i] = macdhist[i]

        i += 1

    macdhist_ht = macdhist_ht.reshape(-1, 1)
    ## Normalize MACD
    print("Normalizing dynamic MACD histogram data...")
    macd_scaler = Normalizer()
    macd_norm = macd_scaler.fit_transform(macdhist_ht)

    # Get frequency-adjusted Bollinger Bands
    print("Calculating dynamic BBANDS...")
    i = 0
    for period in ht_period:
        if not math.isnan(period):
            period = int(period)
            slowperiod = (
                period if period % 2 == 0 else period + 1
            )  # Ensure slow period is even
            bb_upperband, bb_middleband, bb_lowerband = abstract.BBANDS(
                data_close_price,
                timeperiod=slowperiod,
                nbdevup=float(2),
                nbdevdn=float(2),
                matype=1,
            )
            bb_upperband_ht[i] = bb_upperband[i]
            bb_lowerband_ht[i] = bb_lowerband[i]
            bb_pct_ht[i] = (data_close_price[i] - bb_lowerband[i]) / (
                bb_upperband[i] - bb_lowerband[i]
            )

        i += 1

    bb_pct_ht = bb_pct_ht.reshape(-1, 1)
    ## Normalize MACD
    print("Normalizing dynamic BBANDS Percentage data...")
    bb_pct_scaler = Normalizer()
    bb_pct_norm = bb_pct_scaler.fit_transform(bb_pct_ht)

    # Get frequency-adjusted RSI
    print("Calculating dynamic RSI...")
    i = 0
    for period in ht_period:
        if not math.isnan(period):
            period = (
                period / 2 if period % 2 == 0 else (period + 1) / 2
            )  # Ensure slow period is even and divide by 2
            period = int(period)
            rsi = abstract.RSI(
                data_close_price,
                timeperiod=period,
            )
            rsi_ht[i] = rsi[i]

        i += 1

    rsi_ht = rsi_ht.reshape(-1, 1)
    ## Normalize RSI
    print("Normalizing dynamic RSI data...")
    rsi_norm = rsi_ht / 100

    # Get frequency-adjusted Stochastic
    print("Calculating dynamic Stochastic...")
    i = 0
    for period in ht_period:
        if not math.isnan(period):
            period = (
                period / 2 if period % 2 == 0 else (period + 1) / 2
            )  # Ensure slow period is even and divide by 2
            period = int(period)
            slowk, slowd = abstract.STOCH(
                data_high_price,
                data_low_price,
                data_close_price,
                fastk_period=period,
                slowk_period=3,
                slowk_matype=1,
                slowd_period=3,
                slowd_matype=1,
            )
            slowk_ht[i] = slowk[i]
            slowd_ht[i] = slowd[i]

        i += 1

    stochastic_hist_ht = slowk_ht - slowd_ht
    stochastic_hist_ht = stochastic_hist_ht.reshape(-1, 1)
    ## Normalize dynamic stochastic histogram
    print("Normalizing dynamic stochastic histogram data...")
    stochastic_hist_scaler = Normalizer()
    stochastic_hist_norm = stochastic_hist_scaler.fit_transform(stochastic_hist_ht)
    
    # Get frequency-adjusted Ultimate Oscillator
    print("Calculating dynamic ULTOSC...")
    i = 0
    for period in ht_period:
        if not math.isnan(period):
            period = (
                period / 2 if period % 2 == 0 else (period + 1) / 2
            )  # Ensure slow period is even and divide by 2
            period = int(period)
            ult_osc = abstract.ULTOSC(
                data_high_price,
                data_low_price,
                data_close_price,
                timeperiod1=period,
                timeperiod2=period*2,
                timeperiod3=period*4,
            )
            ult_osc_ht[i] = slowk[i]

        i += 1

    ult_osc_ht = ult_osc_ht.reshape(-1, 1)
    ## Normalize dynamic stochastic histogram
    print("Normalizing dynamic ULTOSC data...")
    ult_osc_ht = ult_osc_ht / 100 # Naturally ranges between 0-100

    data_stack = hstack(
        (
            # mesa_hist,
            ht_trend_sine,
            macd_norm,
            adx_norm,
            aroon_osc_norm,
            bb_pct_norm,
            rsi_norm,
            stochastic_hist_norm,
            ult_osc_ht,
            data_price_pct_change_norm,
        )
    )

    # print(data_stack)
    # print(data_stack.size)
    data_stack, data_date = remove_nan_columns(data_stack, data_date)
    num_data_points = data_stack.shape[0]
    display_date_range = (
        "from " + data_date[0] + " to " + data_date[num_data_points - 1]
    )
    print("Number data points:", num_data_points, display_date_range)

    # print(data_stack)
    # print(data_stack.size)

    return data_date, data_stack, num_data_points, display_date_range, price_pct_change_scaler

# Run download_data() Routine

In [6]:
data_date, data_stack, num_data_points, display_date_range, price_pct_change_scaler = download_data(config)

if plot_on:
    # # First plot

    fig = figure(figsize=(25, 5), dpi=160)
    fig.patch.set_facecolor((1.0, 1.0, 1.0))
    plt.plot(data_date, data_stack[:, config["model"]["input_size"]], color=config["plots"]["color_actual"])
    xticks = [
        data_date[i]
        if (
            (
                i % config["plots"]["xticks_interval"] == 0
                and (num_data_points - i) > config["plots"]["xticks_interval"]
            )
            or i == num_data_points - 1
        )
        else None
        for i in range(num_data_points)
    ]  # make x ticks nice
    x = np.arange(0, len(xticks))
    plt.xticks(x, xticks, rotation="vertical")
    plt.title(
        "Daily close price for "
        + config["alpha_vantage"]["symbol"]
        + ", "
        + display_date_range
    )
    plt.grid(b=None, which="major", axis="y", linestyle="--")
    plt.show()


def prepare_data_x(x, window_size):
    # perform windowing
    n_row = x.shape[0] - window_size + 1
    output = np.lib.stride_tricks.as_strided(
        x, shape=(n_row, window_size), strides=(x.strides[0], x.strides[0])
    )
    return output[:-1], output[-1]


def prepare_data_y(x, window_size):
    # # perform simple moving average
    # output = np.convolve(x, np.ones(window_size), 'valid') / window_size

    # use the next day as label
    output = x[window_size:]
    return output

data_x, data_x_unseen = prepare_data_x(
    data_stack, window_size=config["data"]["window_size"]
)
# data_unseen = data_unseen.reshape(1,-1)
data_y = prepare_data_y(data_stack, window_size=config["data"]["window_size"])

# split dataset

split_index = int(data_y.shape[0] * config["data"]["train_split_size"])
data_x_train = data_x[:split_index]
data_x_val = data_x[split_index:]
data_y_train = data_y[:split_index]
data_y_val = data_y[split_index:]

if plot_on:
    # # prepare data for plotting

    to_plot_data_y_train = np.zeros(num_data_points)
    to_plot_data_y_val = np.zeros(num_data_points)

    to_plot_data_y_train[
        config["data"]["window_size"] : split_index + config["data"]["window_size"]
    ] = price_pct_change_scaler.inverse_transform(data_y_train[:, -1])
    to_plot_data_y_val[
        split_index + config["data"]["window_size"] :
    ] = price_pct_change_scaler.inverse_transform(data_y_val[:, -1])

    to_plot_data_y_train = np.where(to_plot_data_y_train == 0, None, to_plot_data_y_train)
    to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val)

    # # plots

    fig = figure(figsize=(25, 5), dpi=160)
    fig.patch.set_facecolor((1.0, 1.0, 1.0))
    plt.plot(
        data_date,
        to_plot_data_y_train,
        label="Prices (train)",
        color=config["plots"]["color_train"],
    )
    plt.plot(
        data_date,
        to_plot_data_y_val,
        label="Prices (validation)",
        color=config["plots"]["color_val"],
    )
    xticks = [
        data_date[i]
        if (
            (
                i % config["plots"]["xticks_interval"] == 0
                and (num_data_points - i) > config["plots"]["xticks_interval"]
            )
            or i == num_data_points - 1
        )
        else None
        for i in range(num_data_points)
    ]  # make x ticks nice
    x = np.arange(0, len(xticks))
    plt.xticks(x, xticks, rotation="vertical")
    plt.title(
        "Daily close prices for "
        + config["alpha_vantage"]["symbol"]
        + " - showing training and validation data"
    )
    plt.grid(b=None, which="major", axis="y", linestyle="--")
    plt.legend()
    plt.show()


class TimeSeriesDataset(Dataset):
    def __init__(self, x, target, features, sequence_length=5):
        self.features = features
        self.target = target
        self.sequence_length = sequence_length
        if x.ndim > 1:
            self.y = torch.tensor(x[:, -1]).float()
            self.x = torch.tensor(x[:,:features]).float()
        else:
            self.y = torch.tensor(x[-1]).float()
            self.x = torch.tensor(x[:features]).float()

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, i): 
        if i >= self.sequence_length - 1:
            i_start = i - self.sequence_length + 1
            x = self.x[i_start:(i + 1), :]
        else:
            padding = self.x[0].repeat(self.sequence_length - i - 1, 1)
            x = self.x[0:(i + 1), :]
            x = torch.cat((padding, x), 0)

        return x, self.y[i]

features = config["model"]["input_size"]
sequence_length = config["data"]["window_size"]
target = features
dataset_train = TimeSeriesDataset(data_x_train, target, features, sequence_length )
dataset_val = TimeSeriesDataset(data_x_val, target, features, sequence_length )
data_unseen = TimeSeriesDataset(data_x_unseen, target, features, sequence_length )
# print("Train data shape", dataset_train.x.size(), dataset_train.y)
# print("Validation data shape", dataset_val.x.size(), dataset_val.y)

train_dataloader = DataLoader(
    dataset_train, batch_size=config["training"]["batch_size"], shuffle=True
)
val_dataloader = DataLoader(
    dataset_val, batch_size=config["training"]["batch_size"], shuffle=True
)


class LSTMModel(nn.Module):
    def __init__(
        self,
        input_size=8,
        hidden_layer_size=32,
        num_layers=2,
        output_size=1,
        dropout=0.2,
    ):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.linear_1 = nn.Linear(input_size, hidden_layer_size)
        self.relu = nn.ReLU()
        self.lstm = nn.LSTM(
            hidden_layer_size,
            hidden_size=self.hidden_layer_size,
            num_layers=num_layers,
            batch_first=True,
        )
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(num_layers * hidden_layer_size, output_size)

        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0.0)
            elif "weight_ih" in name:
                nn.init.kaiming_normal_(param)
            elif "weight_hh" in name:
                nn.init.orthogonal_(param)

    def forward(self, x):
        batchsize = x.shape[0]

        # layer 1
        x = self.linear_1(x)
        x = self.relu(x)

        # LSTM layer
        lstm_out, (h_n, c_n) = self.lstm(x)

        # reshape output from hidden cell into [batch, features] for `linear_2`
        x = h_n.permute(1, 0, 2).reshape(batchsize, -1)

        # layer 2
        x = self.dropout(x)
        predictions = self.linear_2(x)
        return predictions[:, -1]


def run_epoch(dataloader, is_training=False):
    epoch_loss = 0

    if is_training:
        model.train()
    else:
        model.eval()

    for idx, (x, y) in enumerate(dataloader):
        if is_training:
            optimizer.zero_grad()

        batchsize = x.shape[0]

        x = x.to(config["training"]["device"])
        y = y.to(config["training"]["device"])

        out = model(x)
        loss = criterion(out.contiguous(), y.contiguous())

        if is_training:
            loss.backward()
            optimizer.step()

        epoch_loss += loss.detach().item() / batchsize

    lr = scheduler.get_last_lr()[0]

    return epoch_loss, lr


train_dataloader = DataLoader(
    dataset_train, batch_size=config["training"]["batch_size"], shuffle=True
)
val_dataloader = DataLoader(
    dataset_val, batch_size=config["training"]["batch_size"], shuffle=True
)

model = LSTMModel(
    input_size=config["model"]["input_size"],
    hidden_layer_size=config["model"]["lstm_size"],
    num_layers=config["model"]["num_lstm_layers"],
    output_size=1,
    dropout=config["model"]["dropout"],
)
model = model.to(config["training"]["device"])

criterion = nn.MSELoss()
optimizer = optim.Adam(
    model.parameters(),
    lr=config["training"]["learning_rate"],
    betas=(0.9, 0.98),
    eps=1e-9,
)
scheduler = optim.lr_scheduler.StepLR(
    optimizer, step_size=config["training"]["scheduler_step_size"], gamma=0.1
)

for epoch in range(config["training"]["num_epoch"]):
    loss_train, lr_train = run_epoch(train_dataloader, is_training=True)
    loss_val, lr_val = run_epoch(val_dataloader)
    scheduler.step()

    print(
        "Epoch[{}/{}] | loss train:{:.6f}, test:{:.6f} | lr:{:.6f}".format(
            epoch + 1, config["training"]["num_epoch"], loss_train, loss_val, lr_train
        )
    )


# here we re-initialize dataloader so the data doesn't shuffled, so we can plot the values by date

train_dataloader = DataLoader(
    dataset_train, batch_size=config["training"]["batch_size"], shuffle=False
)
val_dataloader = DataLoader(
    dataset_val, batch_size=config["training"]["batch_size"], shuffle=False
)

model.eval()

# predict on the training data, to see how well the model managed to learn and memorize

predicted_train = np.array([])

for idx, (x, y) in enumerate(train_dataloader):
    x = x.to(config["training"]["device"])
    out = model(x)
    out = out.cpu().detach().numpy()
    predicted_train = np.concatenate((predicted_train, out))

# predict on the validation data, to see how the model does

predicted_val = np.array([])

for idx, (x, y) in enumerate(val_dataloader):
    x = x.to(config["training"]["device"])
    out = model(x)
    out = out.cpu().detach().numpy()
    predicted_val = np.concatenate((predicted_val, out))

# prepare data for plotting

to_plot_data_y_train_pred = np.zeros(num_data_points)
to_plot_data_y_val_pred = np.zeros(num_data_points)

predicted_train = predicted_train.reshape(1, -1)
predicted_val = predicted_val.reshape(1, -1)

to_plot_data_y_train_pred[
    config["data"]["window_size"] : split_index + config["data"]["window_size"]
] = price_pct_change_scaler.inverse_transform(predicted_train)
to_plot_data_y_val_pred[
    split_index + config["data"]["window_size"] :
] = price_pct_change_scaler.inverse_transform(predicted_val)

to_plot_data_y_train_pred = np.where(
    to_plot_data_y_train_pred == 0, None, to_plot_data_y_train_pred
)
to_plot_data_y_val_pred = np.where(
    to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred
)


if plot_on:
    # # plots

    fig = figure(figsize=(25, 5), dpi=80)
    fig.patch.set_facecolor((1.0, 1.0, 1.0))
    plt.plot(
        data_date,
        data_stack[:, config["model"]["input_size"]], #Actual data
        label="Actual prices",
        color=config["plots"]["color_actual"],
    )
    plt.plot(
        data_date,
        to_plot_data_y_train_pred,
        label="Predicted prices (train)",
        color=config["plots"]["color_pred_train"],
    )
    plt.plot(
        data_date,
        to_plot_data_y_val_pred,
        label="Predicted prices (validation)",
        color=config["plots"]["color_pred_val"],
    )
    plt.title("Compare predicted prices to actual prices")
    xticks = [
        data_date[i]
        if (
            (
                i % config["plots"]["xticks_interval"] == 0
                and (num_data_points - i) > config["plots"]["xticks_interval"]
            )
            or i == num_data_points - 1
        )
        else None
        for i in range(num_data_points)
    ]  # make x ticks nice
    x = np.arange(0, len(xticks))
    plt.xticks(x, xticks, rotation="vertical")
    plt.grid(b=None, which="major", axis="y", linestyle="--")
    plt.legend()
    plt.show()

    # prepare data for plotting the zoomed in view of the predicted prices vs. actual prices

    to_plot_data_y_val_subset = price_pct_change_scaler.inverse_transform(data_y_val)
    to_plot_predicted_val = price_pct_change_scaler.inverse_transform(predicted_val)
    to_plot_data_date = data_date[split_index + config["data"]["window_size"] :]


    # plots

    fig = figure(figsize=(25, 5), dpi=80)
    fig.patch.set_facecolor((1.0, 1.0, 1.0))
    plt.plot(
        to_plot_data_date,
        to_plot_data_y_val_subset,
        label="Actual prices",
        color=config["plots"]["color_actual"],
    )
    plt.plot(
        to_plot_data_date,
        to_plot_predicted_val[:, -1],
        label="Predicted prices (validation)",
        color=config["plots"]["color_pred_val"],
    )
    plt.title("Zoom in to examine predicted price on validation data portion")
    xticks = [
        to_plot_data_date[i]
        if (
            (
                i % int(config["plots"]["xticks_interval"] / 5) == 0
                and (len(to_plot_data_date) - i) > config["plots"]["xticks_interval"] / 6
            )
            or i == len(to_plot_data_date) - 1
        )
        else None
        for i in range(len(to_plot_data_date))
    ]  # make x ticks nice
    xs = np.arange(0, len(xticks))
    plt.xticks(xs, xticks, rotation="vertical")
    plt.grid(b=None, which="major", axis="y", linestyle="--")
    plt.legend()
    plt.show()

# predict the closing price of the next trading day

model.eval()

x = (
    torch.tensor(data_x_unseen)
    .float()
    .to(config["training"]["device"])
    .unsqueeze(0)
    .unsqueeze(2)
)  # this is the data type and shape required, [batch, sequence, feature]
prediction = model(x)
prediction = prediction.cpu().detach().numpy()

# prepare plots

plot_range = 10
# to_plot_data_y_val = np.zeros((plot_range, plot_range-1))
# to_plot_data_y_val_pred = np.zeros((plot_range, plot_range-1))
# to_plot_data_y_test_pred = np.zeros((plot_range, plot_range-1))

to_plot_data_y_val = price_pct_change_scaler.inverse_transform(data_y_val)[
    -plot_range + 1 :
]
to_plot_data_y_val_pred = price_pct_change_scaler.inverse_transform(predicted_val)[
    -plot_range + 1 :
]

to_plot_data_y_test_pred = price_pct_change_scaler.inverse_transform(prediction)

to_plot_data_y_val = np.where(to_plot_data_y_val == 0, None, to_plot_data_y_val)
to_plot_data_y_val_pred = np.where(
    to_plot_data_y_val_pred == 0, None, to_plot_data_y_val_pred
)
to_plot_data_y_test_pred = np.where(
    to_plot_data_y_test_pred == 0, None, to_plot_data_y_test_pred
)

# plot
if plot_on:
    plot_date_test = data_date[-plot_range + 1 :]
    plot_date_test.append("tomorrow")

    fig = figure(figsize=(25, 5), dpi=80)
    fig.patch.set_facecolor((1.0, 1.0, 1.0))
    plt.plot(
        plot_date_test,
        to_plot_data_y_val,
        label="Actual prices",
        marker=".",
        markersize=10,
        color=config["plots"]["color_actual"],
    )
    plt.plot(
        plot_date_test,
        to_plot_data_y_val_pred,
        label="Past predicted prices",
        marker=".",
        markersize=10,
        color=config["plots"]["color_pred_val"],
    )
    plt.plot(
        plot_date_test,
        to_plot_data_y_test_pred,
        label="Predicted price for next day",
        marker=".",
        markersize=20,
        color=config["plots"]["color_pred_test"],
    )
    plt.title("Predicting the close price of the next trading day")
    plt.grid(b=None, which="major", axis="y", linestyle="--")
    plt.legend()
    plt.show()

print(
    "Predicted close price of the next trading day:",
    to_plot_data_y_test_pred
)

Obtaining data for symbol: TQQQ
Sorting data...
Sorting close price...
Normalizing price percent change data...
Sorting high prices...
Sorting low prices...
Calculating HT_TRENDLINE...
Normalizing HT_TRENDLINE data...
Calculating MESA average price...
Normalizing dynamic MACD histogram data...
Calculating HT_TREND sine signals...
Normalizing dynamic MACD histogram data...
Calculating HT_PERIODS for dynamic indicators...
Provisioning arrays for dynamic indicators...
Calculating dynamic ADX...
Normalizing dynamic ADX data...
Calculating dynamic AROONOSC...
Normalizing dynamic AROONOSC data...
Calculating dynamic MACD...
Normalizing dynamic MACD histogram data...
Calculating dynamic BBANDS...
Normalizing dynamic BBANDS Percentage data...
Calculating dynamic RSI...
Normalizing dynamic RSI data...
Calculating dynamic Stochastic...
Normalizing dynamic stochastic histogram data...
Calculating dynamic ULTOSC...
Normalizing dynamic ULTOSC data...
Removing 67 nan rows from data stack...
Number d

RuntimeError: ignored