In [1]:
# imports
import pandas as pd
import numpy as np
import ta

# transformer imports
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization
from tensorflow.keras.layers import MultiHeadAttention, Add, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# custom imports
from metrics_calculation import *

## 1. Data preparation

In [2]:
# Load and prepare the dataset
df = pd.read_csv('data/crude_oil_daily.csv', index_col = 0)
df.index = pd.to_datetime(df.index)

In [3]:
# Train-test split creation
df_test = df[(df.index >= '2018-01-01') & (df.index < '2024-01-01')]
df_train = df[df.index < '2018-01-01']

In [4]:
# Set the sequence lengths for input and output
input_sequence_length = 10  # Number of past days to consider for predicting
output_sequence_length = 5  # Number of future days to predict

In [ ]:
# Normalize the training and testing data
scaler = MinMaxScaler()
df_train_scaled = scaler.fit_transform(df_train[['Open', 'High', 'Low', 'Close']])
df_test_scaled = scaler.transform(df_test[['Open', 'High', 'Low', 'Close']])

# Create a separate scaler for inverse transforming the 'Close' price predictions later
scaler_currency = MinMaxScaler()
currency_values = df_train['Close'].values.reshape(-1, 1)
scaled_currency = scaler_currency.fit(currency_values)

# Retrieve date indexes for both training and testing data
# This is used to organize the predictions later on
df_train_indexes = df_train.index
df_test_indexes = df_test.index

# Helper function to create input-output sequences, preprocessing for transformer
def create_sequences(input_sequence_length, output_sequence_length, data, dates):
    x, y = [], []
    x_dates, y_dates = [], []
    data_len = len(data)
    for i in range(input_sequence_length, data_len - output_sequence_length + 1):
        x.append(data[i - input_sequence_length:i])
        y.append(data[i:i + output_sequence_length, 3])  # Assuming 'Close' is at index 3
        x_dates.append(dates[i - input_sequence_length:i])
        y_dates.append(dates[i:i + output_sequence_length])
    return np.array(x), np.array(y), x_dates, y_dates

# Generate training and testing data using the helper function
x_train, y_train, x_train_indexes, y_train_indexes = create_sequences(input_sequence_length, output_sequence_length, df_train_scaled, df_train_indexes)
x_test, y_test, x_test_indexes, y_test_indexes = create_sequences(input_sequence_length, output_sequence_length, df_test_scaled, df_test_indexes)

# Print the shapes: the result is: (rows, training_sequence, features) (prediction value, )
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

## 2. Model creation

In [5]:
# Define a Transformer block as a reusable component
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention and Normalization
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads)(x, x)
    x = Dropout(dropout)(x)
    res = Add()([x, inputs])

    # Feed Forward Part
    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return Add()([x, res])

# Build a simple Transformer model
def build_model(input_shape, head_size=32, num_heads=2, ff_dim=32, dropout=0.1):
    inputs = Input(shape=input_shape)
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads)(x, x)
    x = Dropout(dropout)(x)
    x = Add()([x, inputs])

    x = LayerNormalization(epsilon=1e-6)(x)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    x = Add()([x, inputs])

    x = Flatten()(x)
    outputs = Dense(output_sequence_length)(x)  # predicting all 4 features at the next time step
    model = Model(inputs, outputs)
    model.compile(optimizer=Adam(), loss='mse')
    return model

# Configure early stopping to avoid overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=3, verbose=2)

# Instantiate and summarize the model
model = build_model(input_shape=(10, 4))
model.summary()

## 3. Model training

In [8]:
# Train the model
history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1, callbacks=[early_stop])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## 4. Model prediction

In [9]:
# Predict future values using the trained model
predictions_scaled = model.predict(x_test)

# Inverse transform the scaled predictions to get actual price predictions
predictions = scaled_currency.inverse_transform(predictions_scaled)



In [48]:
# Prepare final test DataFrame by selecting the relevant subset
df_test_final = df_test[df_test.index >= x_test_indexes[0][-1]]
df_test_final = df_test_final.iloc[:-5, :]

df_test_final['prediction'] = predictions
df_test_final['signal'] = 0

In [53]:
df_test_final

Unnamed: 0_level_0,Open,High,Low,Close,prediction,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-11,63.5000,64.7700,63.4300,63.8000,63.018944,0
2018-01-12,63.5700,64.5000,63.0600,64.3000,63.768158,0
2018-01-14,64.3554,64.4617,64.2424,64.4390,63.289131,0
2018-01-15,64.3776,64.9931,63.9680,64.7638,63.237473,0
2018-01-16,64.4300,64.8900,63.3900,63.7300,62.181446,0
...,...,...,...,...,...,...
2023-12-15,71.9100,72.5600,70.6400,71.7800,72.052574,0
2023-12-18,72.0500,74.6100,70.9900,72.8200,73.281517,0
2023-12-19,72.9300,74.4500,72.1400,73.9400,71.271759,0
2023-12-20,74.0600,75.3700,73.6000,74.2200,73.636803,0
