# Toy Models

In this notebook, we will explore some Toy Models to perform classification of price movement on a few data sets of stock data. We will preprocess the stock data to contain the Times in the form of sines and cosines.

### Library Import

In [201]:
import os
import sys
import numpy as np
import pandas as pd
import pandas_ta as ta
import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (20, 10)
%matplotlib inline

### Local Imports

In [2]:
from window_generator import WindowGenerator

In [3]:
# for python scripts use: "os.path.dirname(__file__)" instead of "os.path.abspath('')"
sys.path.append(
    os.path.abspath(os.path.join(os.path.abspath(''), os.path.pardir)))

from data_clean import get_trading_times

#### Ensure that GPU is available

In [84]:
tf.config.list_physical_devices('GPU')  

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [87]:
true = [0.0, 1.0]
pred = [[0.1,0.9],[0.0,1.0]]

tt = tf.convert_to_tensor(true)
tp = tf.convert_to_tensor(pred)

l = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)
ret = l(tt,tp)

print(ret)  

tf.Tensor(0.7421812, shape=(), dtype=float32)


### Get the Data

In [4]:
data_path = r'..\data\raw\AAPL_15min.csv'
df = pd.read_csv(data_path, index_col=0, 
                 parse_dates=True, infer_datetime_format=True)

# df = get_trading_times(df)
df = df.dropna()

# add days, hours, and minutes to the dataset
dayofweek = df.index.dayofweek
hour = df.index.hour
minute = df.index.minute

# encode the days, hours, and minutes with sin and cos functions
eps = 1e-4 # ensure that encodings don't have NaNs
# df['sin_day'] = np.sin(2*np.pi/(dayofweek + eps))
# df['cos_day'] = np.cos(2*np.pi/(dayofweek + eps))
# df['sin_hour'] = np.sin(2*np.pi/(hour + eps))
# df['cos_hour'] = np.cos(2*np.pi/(hour + eps))
# df['sin_minute'] = np.sin(2*np.pi/(minute + eps))
# df['cos_minute'] = np.cos(2*np.pi/(minute + eps))


days_in_week = 7
hours_in_day = 24
minutes_in_hour = 60

df['sin_day'] = np.sin(2*np.pi*dayofweek/days_in_week)
df['cos_day'] = np.cos(2*np.pi*dayofweek/days_in_week)
df['sin_hour'] = np.sin(2*np.pi*hour/hours_in_day)
df['cos_hour'] = np.cos(2*np.pi*hour/hours_in_day)
df['sin_minute'] = np.sin(2*np.pi*minute/minutes_in_hour)
df['cos_minute'] = np.cos(2*np.pi*minute/minutes_in_hour)


### Add target columns
We will add a column for price change at each interval, this will be our regression target variable. We will also add another column that quantifys the magnitude of the price change, this will be out target variable for classification.

In [5]:
df['price_diff'] = df['close'].diff()

thresh = 0.1 # dollars
df['price_change'] = 1 # price stays the same
df['price_change'][df['price_diff'] < -thresh] = 0 # downward price movement
df['price_change'][df['price_diff'] > thresh] = 2 # upward prive movement

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['price_change'][df['price_diff'] < -thresh] = 0 # downward price movement
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['price_change'][df['price_diff'] > thresh] = 2 # upward prive movement


In [6]:
df = df.dropna()
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,sin_day,cos_day,sin_hour,cos_hour,sin_minute,cos_minute,price_diff,price_change
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-10-01 04:30:00,115.634512,115.792604,115.407254,115.407254,13550.0,0.433884,-0.900969,0.866025,0.5,5.665539e-16,-1.0,-0.207496,0
2020-10-01 04:45:00,115.367731,115.367731,115.120712,115.308447,12857.0,0.433884,-0.900969,0.866025,0.5,-1.0,-1.83697e-16,-0.098808,1
2020-10-01 05:00:00,115.308447,115.397374,115.298566,115.318327,10079.0,0.433884,-0.900969,0.965926,0.258819,0.0,1.0,0.009881,1
2020-10-01 05:15:00,115.417135,115.604869,115.377612,115.604869,3534.0,0.433884,-0.900969,0.965926,0.258819,1.0,2.832769e-16,0.286542,2
2020-10-01 05:30:00,115.604869,115.703677,115.555466,115.703677,7688.0,0.433884,-0.900969,0.965926,0.258819,5.665539e-16,-1.0,0.098808,1


### Get Standardized train, valid, and test sets

Split into train, valid, and test sets. And then standardize with training mean and standard deviation

In [75]:
train_df = df.loc['2020-10-01':'2021-10-01']
valid_df = df.loc['2021-10-02':'2022-05-01']
test_df = df.loc['2022-05-02':]

train_mean = train_df.mean()
train_std = train_df.std()

# ensure that target column is not standardized
train_mean.price_change = 0
train_std.price_change = 1

train_df = (train_df - train_mean) / train_std
valid_df = (valid_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std


print(train_df.shape)
print(valid_df.shape)
print(test_df.shape)

(16112, 13)
(9243, 13)
(6267, 13)


### Get Data Generator for each time step

In [78]:
data_gen = WindowGenerator(
                input_width=12, label_width=1, shift=1, 
                train_df=train_df, valid_df=valid_df, test_df=test_df,
                label_columns=['price_change'])

In [79]:
for inputs, targets in data_gen.train.take(1):
    print(f'Inputs shape (batch, time, features): {inputs.shape}')
    print(f'Targets shape (batch, time, features): {targets.shape}')

Inputs shape (batch, time, features): (32, 12, 13)
Targets shape (batch, time, features): (32, 1, 1)


## **Start Training Models**

First we will need a baseline model to compare our results to. The most simple baseline model will just predict the next value by using the previous value.

In [80]:
class Baseline(tf.keras.Model):
    def __init__(self, label_index=None):
        super().__init__()
        self.label_index = label_index

    def call(self, inputs):
        if self.label_index is None:
            return inputs
        result = inputs[:, :, self.label_index]
        return result[:, :, tf.newaxis]

In [81]:
single_step_window = WindowGenerator(
        input_width=1, label_width=1, shift=1,
        train_df=train_df, valid_df=valid_df, test_df=test_df,
        label_columns=['price_change'])

In [82]:
baseline = Baseline(label_index=single_step_window.column_indices['price_change'])

baseline.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                 metrics=['accuracy'])

val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(single_step_window.valid)
performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0)



### Now let's train some more in depth models

First we will define a helper function to streamline this process

In [163]:
def compile_and_fit(model, window, lr=1e-4, max_epochs=100, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      patience=patience,
                                                      mode='min')

    model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                  metrics=['accuracy'])

    history = model.fit(window.train, epochs=max_epochs,
                        validation_data=window.valid,
                        callbacks=[early_stopping])
    return history

In [202]:
dense = tf.keras.Sequential([
    layers.Dense(units=64, activation='relu'),
    layers.Dense(units=64, activation='relu'),
    layers.Dense(units=3, activation='softmax')
])

In [165]:
history = compile_and_fit(dense, single_step_window, max_epochs=10)

val_performance['Dense'] = dense.evaluate(single_step_window.valid)
performance['Dense'] = dense.evaluate(single_step_window.test, verbose=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [166]:
next(iter(single_step_window.train.take(1)))[0].shape

TensorShape([32, 1, 13])

In [167]:
dense(next(iter(single_step_window.train.take(1)))[0]).shape

TensorShape([32, 1, 3])

Now let's train a dense NN model with a few time steps. We can use the Flatten() command to flatten out inputs as they are fed into the network.

In [203]:
multi_step_dense = tf.keras.Sequential([
    # Shape: (time, features) => (time*features)
    layers.Flatten(),
    layers.Dense(units=64, activation='relu'),
    layers.Dense(units=64, activation='relu'),
    layers.Dense(units=3, activation='softmax'),
    # Add back the time dimension.
    # Shape: (outputs) => (1, outputs)
    layers.Reshape([1, -1]),
])


In [204]:
history = compile_and_fit(multi_step_dense, data_gen, max_epochs=10)

val_performance['Multi step dense'] = multi_step_dense.evaluate(data_gen.valid)
performance['Multi step dense'] = multi_step_dense.evaluate(data_gen.test, verbose=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Now let's try an RNN

In [248]:
lstm_model = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    layers.LSTM(32, return_sequences=True),
    layers.LSTM(32),
    # Shape => [batch, time, features]
    # layers.TimeDistributed(layers.Dense(units=3, activation='softmax'))
    layers.Dense(units=3, activation='softmax'),
    layers.Reshape([1, -1])
])

In [249]:
history = compile_and_fit(lstm_model, data_gen, patience=5, max_epochs=10)

val_performance['LSTM'] = lstm_model.evaluate(data_gen.valid)
performance['LSTM'] = lstm_model.evaluate(data_gen.test, verbose=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Now let's try a Transformer Encoder

In [250]:
from tensorflow import keras
from tensorflow.keras import layers


def transformer_encoder(inputs, n_heads, d_k, d_v, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        num_heads=n_heads, key_dim=d_k, value_dim=d_v, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res


def build_model(
            input_shape,
            n_heads,
            d_k,
            d_v,
            ff_dim,
            num_transformer_blocks,
            mlp_units,
            n_outputs=1,
            dropout=0.1,
            mlp_dropout=0.1,
        ):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, n_heads, d_k, d_v, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(n_outputs, activation='softmax')(x)
    return keras.Model(inputs, outputs)

In [251]:
input_shape = inputs.shape[1:]

xformer_model = build_model(
    input_shape,
    n_heads=4,
    d_k=512,
    d_v=512,
    ff_dim=256,
    num_transformer_blocks=2,
    mlp_units=[256],
    n_outputs=3,
    dropout=0.1,
    mlp_dropout=0.1,
)

In [252]:
compile_and_fit(xformer_model, data_gen, lr=1e-3, patience=5, max_epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


<keras.callbacks.History at 0x19c56807490>

In [246]:
val_performance['xformer'] = xformer_model.evaluate(data_gen.valid)
performance['xfomrer'] = xformer_model.evaluate(data_gen.test, verbose=0)



In [253]:
val_performance

{'Baseline': [nan, 0.40911057591438293],
 'Multi step dense': [1.0593593120574951, 0.46257176995277405],
 'Dense': [1.0622918605804443, 0.4489288032054901],
 'LSTM': [1.0376485586166382, 0.45542195439338684],
 'xformer': [1.0005801916122437, 0.4757881164550781]}

Function to determine how well the model predicts the upcoming price movements

In [71]:
yhat = tf.squeeze(xformer_model(inputs))
y = tf.squeeze(targets)

true_moves = np.ones_like(y.numpy())
true_moves[y > 0.1] = 2
true_moves[y < -0.1] = 0

pred_moves = np.ones_like(yhat.numpy())
pred_moves[yhat > 0.1] = 2
pred_moves[yhat < -0.1] = 0


In [72]:
true_moves = tf.convert_to_tensor(true_moves)
pred_moves = tf.convert_to_tensor(pred_moves)

In [73]:
pred_moves

<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 2., 2., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
      dtype=float32)>

In [61]:
tf.boolean_mask(tmp, tf.greater(y, 0.1))

<tf.Tensor: shape=(14,), dtype=float32, numpy=
array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
      dtype=float32)>

In [None]:
def price_movement_loss(y, yhat, thresh=0.1):
   