In [18]:
!pip install yfinance



In [19]:
import math
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, Add, GlobalAveragePooling1D

import yfinance as yf

In [20]:
def calculate_bollinger_bands(data, window=10, num_of_std=2):
    """Calculate Bollinger Bands"""
    rolling_mean = data.rolling(window=window).mean()
    rolling_std = data.rolling(window=window).std()
    upper_band = rolling_mean + (rolling_std * num_of_std)
    lower_band = rolling_mean - (rolling_std * num_of_std)
    return upper_band, lower_band

def calculate_rsi(data, window=10):
    """Calculate Relative Strength Index"""
    delta = data.diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(window=window, min_periods=1).mean()
    avg_loss = loss.rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_roc(data, periods=10):
    """Calculate Rate of Change."""
    roc = ((data - data.shift(periods)) / data.shift(periods)) * 100
    return roc

In [21]:
tickers = ['META', 'AAPL', 'MSFT', 'AMZN', 'GOOG']

In [22]:
ticker_data_frames = []
stats = {}
for ticker in tickers:

    # Download historical data for the ticker
    data = yf.download(ticker, period="1mo", interval="5m")

    # Calculate the daily percentage change
    close = data['Close']
    upper, lower = calculate_bollinger_bands(close, window=14, num_of_std=2)
    width = upper - lower
    rsi = calculate_rsi(close, window=14)
    roc = calculate_roc(close, periods=14)
    volume = data['Volume']
    diff = data['Close'].diff(1)
    percent_change_close = data['Close'].pct_change() * 100

    # Create a DataFrame for the current ticker and append it to the list
    ticker_df = pd.DataFrame({
        ticker+'_close': close,
        ticker+'_width': width,
        ticker+'_rsi': rsi,
        ticker+'_roc': roc,
        ticker+'_volume': volume,
        ticker+'_diff': diff,
        ticker+'_percent_change_close': percent_change_close,
    })

    MEAN = ticker_df.mean()
    STD = ticker_df.std()

    # Keep track of mean and std
    for column in MEAN.index:
      stats[f"{column}_mean"] = MEAN[column]
      stats[f"{column}_std"] = STD[column]

    # Normalize the training features
    ticker_df = (ticker_df - MEAN) / STD

    ticker_data_frames.append(ticker_df)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [23]:
# Convert stats from dict to df
stats = pd.DataFrame([stats], index=[0])
stats.head()

Unnamed: 0,META_close_mean,META_close_std,META_width_mean,META_width_std,META_rsi_mean,META_rsi_std,META_roc_mean,META_roc_std,META_volume_mean,META_volume_std,...,GOOG_rsi_mean,GOOG_rsi_std,GOOG_roc_mean,GOOG_roc_std,GOOG_volume_mean,GOOG_volume_std,GOOG_diff_mean,GOOG_diff_std,GOOG_percent_change_close_mean,GOOG_percent_change_close_std
0,484.822863,15.746056,4.169157,3.165505,50.893476,17.682784,0.051772,0.652936,117441.459096,119394.564532,...,49.778738,17.452753,-0.014317,0.487646,151418.393162,180722.975073,-0.001228,0.262529,-0.000582,0.148248


In [24]:
# Concatenate all ticker DataFrames
df = pd.concat(ticker_data_frames, axis=1)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
df.head()

Unnamed: 0_level_0,META_close,META_width,META_rsi,META_roc,META_volume,META_diff,META_percent_change_close,AAPL_close,AAPL_width,AAPL_rsi,...,AMZN_volume,AMZN_diff,AMZN_percent_change_close,GOOG_close,GOOG_width,GOOG_rsi,GOOG_roc,GOOG_volume,GOOG_diff,GOOG_percent_change_close
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-05-20 10:40:00-04:00,-0.827056,-0.243538,-0.185691,-0.264102,0.500186,0.575418,0.590837,-0.708001,0.558623,1.850961,...,-0.37821,-0.155313,-0.153601,1.296152,-0.293427,0.706943,0.787445,0.161145,0.358874,0.354088
2024-05-20 10:45:00-04:00,-0.851823,-0.250691,-0.075215,-0.111774,-0.242469,-0.505126,-0.518513,-0.702106,0.409069,1.737899,...,-0.498962,0.984683,0.968969,1.206814,-0.348898,0.127098,0.143962,0.242269,-0.643211,-0.636254
2024-05-20 10:50:00-04:00,-0.856149,-0.236992,0.061301,0.061201,-0.362273,-0.105347,-0.108379,-0.716737,0.167665,1.48125,...,-0.429798,0.093272,0.091041,1.160594,-0.233238,-0.448752,-0.379921,0.409652,-0.330514,-0.327592
2024-05-20 10:55:00-04:00,-0.88199,-0.357632,0.358524,0.388816,-0.416815,-0.526125,-0.540564,-0.710574,-0.146582,1.460356,...,-0.041516,-1.23888,-1.218858,1.025605,-0.012514,-0.51921,-0.45718,-0.229326,-0.974276,-0.964773
2024-05-20 11:00:00-04:00,-0.900084,-0.364608,-0.298463,-0.316446,-0.339224,-0.37462,-0.385271,-0.703382,-0.385915,1.25149,...,-0.452868,0.714766,0.703349,0.996713,0.178696,-1.252204,-0.958768,-0.119417,-0.204854,-0.203709


In [25]:
# Shift the df data to create labels
labels = df.shift(-1)

# Drop the last row in both percent_change_data and labels as it won't have a corresponding label
df = df.iloc[:-1]
labels = labels.iloc[:-1]

In [26]:
# Sequence len = 24 means that we have 2 hours of 5 min data
SEQUENCE_LEN = 24

# Function to create X-day sequences for each ticker
def create_sequences(data, labels, mean, std, sequence_length=SEQUENCE_LEN):
    sequences = []
    lab = []
    data_size = len(data)

    # + 12 because we want to predict the next hour
    for i in range(data_size - (sequence_length + 13)):
      if i == 0:
        continue

      sequences.append(data[i:i + sequence_length])
      lab.append([labels[i-1], labels[i + 12], mean[0], std[0]])

    for i in range(0, len(lab)):
      last_price_data = sequences[i][-1][0]
      last_price_label = lab[i][0]

      if not last_price_data == last_price_label:
        print(f"ERROR : {last_price_data=} and {last_price_label=} are not equal")

    return np.array(sequences), np.array(lab)

In [27]:
sequences_dict = {}
sequence_labels = {}
for ticker in tickers:

    # Extract close and volume data for the ticker
    close = df[ticker+'_close'].values
    width = df[ticker+'_width'].values
    rsi = df[ticker+'_rsi'].values
    roc = df[ticker+'_roc'].values
    volume = df[ticker+'_volume'].values
    diff = df[ticker+'_diff'].values
    pct_change = df[ticker+'_percent_change_close'].values

    # Combine close and volume data
    ticker_data = np.column_stack((close,
                                   width,
                                   rsi,
                                   roc,
                                   volume,
                                   diff,
                                   pct_change))
    print(ticker_data.shape)

    # Generate sequences
    attribute = ticker+"_close"
    print(labels[attribute].values[SEQUENCE_LEN-1:])
    print(labels[attribute].values[SEQUENCE_LEN-1:].shape)
    ticker_sequences, lab = create_sequences(ticker_data,
                                             labels[attribute].values[SEQUENCE_LEN-1:],
                                             stats[attribute+"_mean"].values,
                                             stats[attribute+"_std"].values)

    sequences_dict[ticker] = ticker_sequences
    print(sequences_dict[ticker].shape)
    sequence_labels[ticker] = lab
    print(sequence_labels[ticker].shape)

(1623, 7)
[-0.81054367 -0.86770046 -0.83594647 ...  0.887025    0.87019445
  0.94545124]
(1600,)
(1585, 24, 7)
(1585, 4)
(1623, 7)
[-0.72086281 -0.71777613 -0.71022427 ...  1.63332112  1.65597825
  1.64568983]
(1600,)
(1585, 24, 7)
(1585, 4)
(1623, 7)
[-0.24801027 -0.27038381 -0.2556279  ...  1.70930456  1.76451936
  1.73120164]
(1600,)
(1585, 24, 7)
(1585, 4)
(1623, 7)
[0.7031386  0.66965901 0.71979673 ... 0.00406851 0.14287099 0.17130312]
(1600,)
(1585, 24, 7)
(1585, 4)
(1623, 7)
[ 0.6683335   0.6605594   0.72359372 ... -0.42668445 -0.10628737
 -0.10628737]
(1600,)
(1585, 24, 7)
(1585, 4)


In [28]:
# Combine data and labels from all tickers
all_sequences = []
all_labels = []

for ticker in tickers:
    all_sequences.extend(sequences_dict[ticker])
    all_labels.extend(sequence_labels[ticker])

# Convert to numpy arrays
all_sequences = np.array(all_sequences)
all_labels = np.array(all_labels)
print(all_sequences.shape)

(7925, 24, 7)


In [29]:
np.random.seed(42)
shuffled_indices = np.random.permutation(len(all_sequences))
all_sequences = all_sequences[shuffled_indices]
all_labels = all_labels[shuffled_indices]

train_size = int(len(all_sequences) * 0.9)

# Split sequences
train_sequences = all_sequences[:train_size]
train_labels    = all_labels[:train_size]

other_sequences = all_sequences[train_size:]
other_labels    = all_labels[train_size:]

shuffled_indices = np.random.permutation(len(other_sequences))
other_sequences = other_sequences[shuffled_indices]
other_labels = other_labels[shuffled_indices]

val_size = int(len(other_sequences) * 0.5)

validation_sequences = other_sequences[:val_size]
validation_labels = other_labels[:val_size]

test_sequences = other_sequences[val_size:]
test_labels = other_labels[val_size:]

In [None]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Add()([x, inputs])

    # Feed Forward Part
    y = LayerNormalization(epsilon=1e-6)(x)
    y = Dense(ff_dim, activation="relu")(y)
    y = Dropout(dropout)(y)
    y = Dense(inputs.shape[-1])(y)
    return Add()([y, x])

def build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout=0):
    inputs = Input(shape=input_shape)
    x = inputs

    # Create multiple layers of the Transformer block
    for _ in range(num_layers):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    # Final part of the model
    x = GlobalAveragePooling1D()(x)
    x = LayerNormalization(epsilon=1e-6)(x)
    outputs = Dense(1, activation="linear")(x)

    # Compile model
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Model parameters
input_shape = train_sequences.shape[1:]
head_size = 256
num_heads = 16
ff_dim = 1024
num_layers = 12
dropout = 0.20

# Build the model
model = build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_layers, dropout)
model.summary()

In [31]:
def custom_mae_loss(y_true, y_pred):
    y_true_next = tf.cast(y_true[:, 1], tf.float64)
    y_pred_next = tf.cast(y_pred[:, 0], tf.float64)
    abs_error = tf.abs(y_true_next - y_pred_next)

    return tf.reduce_mean(abs_error)

def dir_acc(y_true, y_pred):
    mean, std = tf.cast(y_true[:, 2], tf.float64), tf.cast(y_true[:, 3], tf.float64)

    y_true_prev = (tf.cast(y_true[:, 0], tf.float64) * std) + mean
    y_true_next = (tf.cast(y_true[:, 1], tf.float64) * std) + mean
    y_pred_next = (tf.cast(y_pred[:, 0], tf.float64) * std) + mean

    true_change = y_true_next - y_true_prev
    pred_change = y_pred_next - y_true_prev

    correct_direction = tf.equal(tf.sign(true_change), tf.sign(pred_change))

    return tf.reduce_mean(tf.cast(correct_direction, tf.float64))

In [32]:
# Compile the model
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer, loss=custom_mae_loss, metrics=[dir_acc])

In [33]:
# Define a callback to save the best model
checkpoint_callback_train = ModelCheckpoint(
    "transformer_train_model.keras",  # Filepath to save the best model
    monitor="dir_acc",  #"loss",  # Metric to monitor
    save_best_only=True,  # Save only the best model
    mode="max",  # Minimize the monitored metric
    verbose=1,  # Display progress
)

# Define a callback to save the best model
checkpoint_callback_val = ModelCheckpoint(
    "transformer_val_model.keras",  # Filepath to save the best model
    monitor="val_dir_acc", #"val_loss",  # Metric to monitor
    save_best_only=True,  # Save only the best model
    mode="max",  # Minimize the monitored metric
    verbose=1,  # Display progress
)

def get_lr_callback(batch_size=16, mode='cos', epochs=500, plot=False):
    lr_start, lr_max, lr_min = 0.0001, 0.005, 0.00001  # Adjust learning rate boundaries
    lr_ramp_ep = int(0.30 * epochs)  # 30% of epochs for warm-up
    lr_sus_ep = max(0, int(0.10 * epochs) - lr_ramp_ep)  # Optional sustain phase, adjust as needed

    def lrfn(epoch):
        if epoch < lr_ramp_ep:  # Warm-up phase
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
        elif epoch < lr_ramp_ep + lr_sus_ep:  # Sustain phase at max learning rate
            lr = lr_max
        elif mode == 'cos':
            decay_total_epochs, decay_epoch_index = epochs - lr_ramp_ep - lr_sus_ep, epoch - lr_ramp_ep - lr_sus_ep
            phase = math.pi * decay_epoch_index / decay_total_epochs
            lr = (lr_max - lr_min) * 0.5 * (1 + math.cos(phase)) + lr_min
        else:
            lr = lr_min  # Default to minimum learning rate if mode is not recognized

        return lr

    if plot:  # Plot learning rate curve if plot is True
        plt.figure(figsize=(10, 5))
        plt.plot(np.arange(epochs), [lrfn(epoch) for epoch in np.arange(epochs)], marker='o')
        plt.xlabel('Epoch')
        plt.ylabel('Learning Rate')
        plt.title('Learning Rate Scheduler')
        plt.show()

    return tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)

In [34]:
BATCH_SIZE = 64
EPOCHS = 100
model.fit(train_sequences, train_labels,
          validation_data=(validation_sequences, validation_labels),
          epochs=EPOCHS,
          batch_size=BATCH_SIZE,
          shuffle=True,
          callbacks=[checkpoint_callback_train, checkpoint_callback_val, get_lr_callback(batch_size=BATCH_SIZE, epochs=EPOCHS)])


Epoch 1: LearningRateScheduler setting learning rate to 0.0001.
Epoch 1/100


I0000 00:00:1718765623.837030     164 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1718765623.903553     164 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 622ms/step - dir_acc: 0.5120 - loss: 0.4427

W0000 00:00:1718765692.942510     162 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1718765698.844594     163 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update



Epoch 1: dir_acc improved from -inf to 0.51401, saving model to transformer_train_model.keras

Epoch 1: val_dir_acc improved from -inf to 0.49330, saving model to transformer_val_model.keras
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 724ms/step - dir_acc: 0.5120 - loss: 0.4417 - val_dir_acc: 0.4933 - val_loss: 0.2891 - learning_rate: 1.0000e-04

Epoch 2: LearningRateScheduler setting learning rate to 0.00026333333333333336.
Epoch 2/100
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 143ms/step - dir_acc: 0.5280 - loss: 0.3035
Epoch 2: dir_acc improved from 0.51401 to 0.52386, saving model to transformer_train_model.keras

Epoch 2: val_dir_acc did not improve from 0.49330
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 150ms/step - dir_acc: 0.5280 - loss: 0.3035 - val_dir_acc: 0.4598 - val_loss: 0.2965 - learning_rate: 2.6333e-04

Epoch 3: LearningRateScheduler setting learning rate to 0.00042666666666666667.
Epoch 3/100
[1

<keras.src.callbacks.history.History at 0x7ea6c1f51360>

In [39]:
# Load Weights
model.load_weights("transformer_val_model.keras")

# Make predictions
accuracy = model.evaluate(test_sequences, test_labels)[1]
print(accuracy)

# Calculate additional metrics as needed
from sklearn.metrics import r2_score

predictions = model.predict(test_sequences)
print(test_labels.shape)
r2 = r2_score(test_labels[:, 1], predictions[:, 0])
print(f"R-squared: {r2}")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - dir_acc: 0.8028 - loss: 0.1009
0.8065828084945679
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
(397, 4)
R-squared: 0.9806653884139327
