# Toy Models

In this notebook, we will explore some Toy Models to perform regression on a few data sets of stock data. We will preprocess the stock data to contain the Times in the form of sines and cosines.

### Library Import

In [12]:
import os
import sys
import numpy as np
import pandas as pd
import pandas_ta as ta
import tensorflow as tf
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (20, 10)
%matplotlib inline

### Local Imports

In [2]:
from window_generator import WindowGenerator

In [3]:
# for python scripts use: "os.path.dirname(__file__)" instead of "os.path.abspath('')"
sys.path.append(
    os.path.abspath(os.path.join(os.path.abspath(''), os.path.pardir)))

from data_clean import get_trading_times

### Get the Data

In [4]:
data_path = r'..\data\raw\AAPL_15min.csv'
df = pd.read_csv(data_path, index_col=0, 
                 parse_dates=True, infer_datetime_format=True)

# df = get_trading_times(df)
df = df.dropna()

# add days, hours, and minutes to the dataset
dayofweek = df.index.dayofweek
hour = df.index.hour
minute = df.index.minute

# encode the days, hours, and minutes with sin and cos functions
eps = 1e-4 # ensure that encodings don't have NaNs
# df['sin_day'] = np.sin(2*np.pi/(dayofweek + eps))
# df['cos_day'] = np.cos(2*np.pi/(dayofweek + eps))
# df['sin_hour'] = np.sin(2*np.pi/(hour + eps))
# df['cos_hour'] = np.cos(2*np.pi/(hour + eps))
# df['sin_minute'] = np.sin(2*np.pi/(minute + eps))
# df['cos_minute'] = np.cos(2*np.pi/(minute + eps))


days_in_week = 7
hours_in_day = 24
minutes_in_hour = 60

df['sin_day'] = np.sin(2*np.pi*dayofweek/days_in_week)
df['cos_day'] = np.cos(2*np.pi*dayofweek/days_in_week)
df['sin_hour'] = np.sin(2*np.pi*hour/hours_in_day)
df['cos_hour'] = np.cos(2*np.pi*hour/hours_in_day)
df['sin_minute'] = np.sin(2*np.pi*minute/minutes_in_hour)
df['cos_minute'] = np.cos(2*np.pi*minute/minutes_in_hour)


### Add target columns
We will add a column for price change at each interval, this will be our regression target variable. We will also add another column that quantifys the magnitude of the price change, this will be out target variable for classification.

In [5]:
df['price_diff'] = df['close'].diff()

# thresh = 0.1 # dollars
# df['price_change'] = 1 # price stays the same
# df['price_change'][df['price_diff'] < -thresh] = 0 # downward price movement
# df['price_change'][df['price_diff'] > thresh] = 2 # upward prive movement

In [6]:
df = df.dropna()
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,sin_day,cos_day,sin_hour,cos_hour,sin_minute,cos_minute,price_diff
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-10-01 04:30:00,115.634512,115.792604,115.407254,115.407254,13550.0,0.433884,-0.900969,0.866025,0.5,5.665539e-16,-1.0,-0.207496
2020-10-01 04:45:00,115.367731,115.367731,115.120712,115.308447,12857.0,0.433884,-0.900969,0.866025,0.5,-1.0,-1.83697e-16,-0.098808
2020-10-01 05:00:00,115.308447,115.397374,115.298566,115.318327,10079.0,0.433884,-0.900969,0.965926,0.258819,0.0,1.0,0.009881
2020-10-01 05:15:00,115.417135,115.604869,115.377612,115.604869,3534.0,0.433884,-0.900969,0.965926,0.258819,1.0,2.832769e-16,0.286542
2020-10-01 05:30:00,115.604869,115.703677,115.555466,115.703677,7688.0,0.433884,-0.900969,0.965926,0.258819,5.665539e-16,-1.0,0.098808


### Get Standardized train, valid, and test sets

Split into train, valid, and test sets. And then standardize with training mean and standard deviation

In [7]:
train_df = df.loc['2020-10-01':'2021-10-01']
valid_df = df.loc['2021-10-02':'2022-05-01']
test_df = df.loc['2022-05-02':]


train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
valid_df = (valid_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std


print(train_df.shape)
print(valid_df.shape)
print(test_df.shape)

(16112, 12)
(9243, 12)
(6267, 12)


### Get Data Generator for each time step

In [9]:
data_gen = WindowGenerator(
                input_width=10, label_width=1, shift=1, 
                train_df=train_df, valid_df=valid_df, test_df=test_df,
                label_columns=['price_diff'])

In [10]:
for inputs, targets in data_gen.train.take(1):
    print(f'Inputs shape (batch, time, features): {inputs.shape}')
    print(f'Targets shape (batch, time, features): {targets.shape}')

Inputs shape (batch, time, features): (32, 10, 12)
Targets shape (batch, time, features): (32, 1, 1)


## **Start Training Models**

First we will need a baseline model to compare our results to. The most simple baseline model will just predict the next value by using the previous value.

In [13]:
class Baseline(tf.keras.Model):
    def __init__(self, label_index=None):
        super().__init__()
        self.label_index = label_index

    def call(self, inputs):
        if self.label_index is None:
            return inputs
        result = inputs[:, :, self.label_index]
        return result[:, :, tf.newaxis]

In [15]:
single_step_window = WindowGenerator(
        input_width=1, label_width=1, shift=1,
        train_df=train_df, valid_df=valid_df, test_df=test_df,
        label_columns=['price_diff'])

In [17]:
baseline = Baseline(label_index=single_step_window.column_indices['price_diff'])

baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                 metrics=[tf.keras.metrics.MeanAbsoluteError()])

val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(single_step_window.valid)
performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0)



### Now let's train some more in depth models

First we will define a helper function to streamline this process

In [18]:
def compile_and_fit(model, window, lr=1e-4, max_epochs=100, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=patience,
                                                      mode='min')

    model.compile(loss=tf.keras.losses.MeanSquaredError(),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  metrics=[tf.keras.metrics.MeanAbsoluteError()])

    history = model.fit(window.train, epochs=max_epochs,
                        validation_data=window.valid,
                        callbacks=[early_stopping])
    return history

In [20]:
dense = tf.keras.Sequential([
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

In [21]:
history = compile_and_fit(dense, single_step_window)

val_performance['Dense'] = dense.evaluate(single_step_window.valid)
performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100


Now let's train a dense NN model with a few time steps. We can use the Flatten() command to flatten out inputs as they are fed into the network.

In [23]:
multi_step_dense = tf.keras.Sequential([
    # Shape: (time, features) => (time*features)
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1),
    # Add back the time dimension.
    # Shape: (outputs) => (1, outputs)
    tf.keras.layers.Reshape([1, -1]),
])


In [24]:
conv_window = WindowGenerator(
        input_width=3, label_width=1, shift=1,
        train_df=train_df, valid_df=valid_df, test_df=test_df,
        label_columns=['price_diff'])

In [26]:
history = compile_and_fit(multi_step_dense, conv_window)

val_performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.valid)
performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.test, verbose=0)

Epoch 1/100
Epoch 2/100
Epoch 3/100


Now let's try an RNN

In [27]:
lstm_model = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, return_sequences=True),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(units=1)
])


In [29]:
history = compile_and_fit(lstm_model, data_gen, patience=5)

val_performance['LSTM'] = lstm_model.evaluate(data_gen.valid)
performance['LSTM'] = lstm_model.evaluate(data_gen.test, verbose=0)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


Now let's try a Transformer Encoder

In [30]:
from tensorflow import keras
from tensorflow.keras import layers

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res


def build_model(
            input_shape,
            head_size,
            num_heads,
            ff_dim,
            num_transformer_blocks,
            mlp_units,
            dropout=0,
            mlp_dropout=0,
        ):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(1)(x)
    return keras.Model(inputs, outputs)

In [31]:
input_shape = inputs.shape[1:]

xformer_model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=256,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

In [32]:
compile_and_fit(xformer_model, data_gen, lr=1e-4, max_epochs=100, patience=5)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


<keras.callbacks.History at 0x1b291db4520>

In [33]:
val_performance['xformer'] = xformer_model.evaluate(data_gen.valid)
performance['xfomrer'] = xformer_model.evaluate(data_gen.test, verbose=0)



In [34]:
val_performance

{'Baseline': [4.045819282531738, 1.2536100149154663],
 'Dense': [2.035062313079834, 0.8688646554946899],
 'Multi step dense': [2.0267698764801025, 0.867950439453125],
 'LSTM': [2.034999132156372, 0.8721564412117004],
 'xformer': [2.0155041217803955, 0.8600403666496277]}

In [40]:
targets

<tf.Tensor: shape=(32, 1, 1), dtype=float32, numpy=
array([[[ 0.09696072]],

       [[ 0.33631903]],

       [[-0.48091865]],

       [[-0.24839914]],

       [[ 0.5414833 ]],

       [[-0.3766268 ]],

       [[ 0.7757125 ]],

       [[ 1.3279463 ]],

       [[-1.202413  ]],

       [[-0.79208446]],

       [[ 0.7808416 ]],

       [[-2.5530777 ]],

       [[-1.8350028 ]],

       [[ 0.81503564]],

       [[-0.03947352]],

       [[ 0.54114133]],

       [[-0.1601785 ]],

       [[-1.2793496 ]],

       [[ 0.8755591 ]],

       [[ 0.07234101]],

       [[-1.2290844 ]],

       [[-0.6724053 ]],

       [[ 0.44163668]],

       [[-0.24805719]],

       [[ 0.5076312 ]],

       [[ 0.37051308]],

       [[-0.20565657]],

       [[ 0.9589926 ]],

       [[-0.8043943 ]],

       [[-0.7917425 ]],

       [[ 0.74391204]],

       [[ 1.0738846 ]]], dtype=float32)>

In [41]:
xformer_model(inputs) 

<tf.Tensor: shape=(32, 1), dtype=float32, numpy=
array([[ 0.07047774],
       [ 0.08038411],
       [ 0.05739209],
       [ 0.05441375],
       [ 0.06962143],
       [ 0.06844871],
       [ 0.07363428],
       [ 0.03191207],
       [ 0.05784791],
       [ 0.11115447],
       [ 0.05245746],
       [ 0.0112911 ],
       [ 0.11171508],
       [ 0.10598822],
       [ 0.01331589],
       [ 0.03273129],
       [ 0.05926988],
       [ 0.07002015],
       [ 0.06132753],
       [-0.00444111],
       [ 0.00827672],
       [ 0.06400785],
       [ 0.04987158],
       [-0.03143305],
       [ 0.00879576],
       [ 0.07991422],
       [ 0.04328666],
       [-0.0254989 ],
       [-0.01554505],
       [ 0.06986066],
       [ 0.04208784],
       [-0.04300736]], dtype=float32)>

Function to determine how well the model predicts the upcoming price movements

In [71]:
yhat = tf.squeeze(xformer_model(inputs))
y = tf.squeeze(targets)

true_moves = np.ones_like(y.numpy())
true_moves[y > 0.1] = 2
true_moves[y < -0.1] = 0

pred_moves = np.ones_like(yhat.numpy())
pred_moves[yhat > 0.1] = 2
pred_moves[yhat < -0.1] = 0


In [72]:
true_moves = tf.convert_to_tensor(true_moves)
pred_moves = tf.convert_to_tensor(pred_moves)

In [73]:
pred_moves

<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 2., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
      dtype=float32)>

In [61]:
tf.boolean_mask(tmp, tf.greater(y, 0.1))

<tf.Tensor: shape=(14,), dtype=float32, numpy=
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
      dtype=float32)>

In [None]:
def price_movement_loss(y, yhat, thresh=0.1):
   