In [1]:
from preprocessing import get_model_dataset, create_train_test, min_max_scale, df_to_xy, read_file, lag_features
from lstm import create_model
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from datetime import date
from pathlib import Path

In [2]:
first_year = 2019
last_year = 2021
file = f"./data/processed_data/{first_year}-{last_year}_underlying-strike_only-price.csv"

df_read = read_file(file)
print(df_read)
df_read.info()
print(df_read)
print(df_read["Ttl"].max())

         Unnamed: 0  Quote_date Expire_date     Price  Underlying_last  \
0           1354913  2019-01-02  2019-01-04  1707.050          2509.98   
1           1354914  2019-01-02  2019-01-04  1607.495          2509.98   
2           1354915  2019-01-02  2019-01-04  1507.500          2509.98   
3           1354916  2019-01-02  2019-01-04  1458.295          2509.98   
4           1354917  2019-01-02  2019-01-04  1408.300          2509.98   
...             ...         ...         ...       ...              ...   
5123793     6521988  2021-12-31  2024-12-20   150.000          4766.39   
5123794     6521989  2021-12-31  2024-12-20   150.000          4766.39   
5123795     6521990  2021-12-31  2024-12-20   150.900          4766.39   
5123796     6521991  2021-12-31  2024-12-20   150.000          4766.39   
5123797     6521992  2021-12-31  2024-12-20   150.000          4766.39   

         Strike   Ttl  Volatility     R  
0         800.0     2    0.202726  2.40  
1         900.0     2    0.

In [3]:
features = ["Underlying_last", "Strike", "Ttl", "Volatility", "R"]
seq_length = 5
num_features = len(features)
num_outputs = 1

df_read_lags = lag_features(df_read, features, seq_length)

df_train_orginal, df_test_orginal = create_train_test(df_read_lags, "2021-01-01")

train_x_org, train_y_org, = df_to_xy(df_train_orginal, num_features, seq_length, num_outputs)
test_x_org, test_y_org = df_to_xy(df_test_orginal, num_features, seq_length, num_outputs)

train_x_scaled, test_x_scaled = min_max_scale(train_x_org, test_x_org)

train_x_scaled = np.reshape(train_x_scaled, (len(train_x_scaled), seq_length, num_features))
test_x_scaled = np.reshape(test_x_scaled, (len(test_x_scaled), seq_length, num_features))

print(f"Train_x shape: {train_x_scaled.shape}, train_y shape: {train_y_org.shape}")
print(f"Test_x shape: {test_x_scaled.shape}, test_y shape: {test_y_org.shape}")

Train_x shape: (2785226, 5, 5), train_y shape: (2785226, 1)
Test_x shape: (1845482, 5, 5), test_y shape: (1845482, 1)


In [4]:
from keras.callbacks import EarlyStopping
config = {
    "units": 32,
    "learning_rate": 0.0015,
    "layers": 4,
    "seq_length": seq_length,
    "num_features": num_features,
    "bn_momentum" : 0.4,
    "clip_norm": 0.7
}

def trainer(train_x, train_y, model):
    epochs = 100
    minibatch_size = 1024

    early_stopping = EarlyStopping(
        monitor='val_loss',
        mode='min',
        min_delta = 10,
        patience = 5,
    )

    model.fit(
        train_x,
        train_y,
        batch_size = minibatch_size,
        validation_split = 0.3,
        epochs = epochs,
        callbacks = [early_stopping]
    )

model = create_model(config)
model.summary()

trainer(train_x_scaled, train_y_org, model)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5, 32)             4864      
                                                                 
 batch_normalization (BatchN  (None, 5, 32)            128       
 ormalization)                                                   
                                                                 
 lstm_1 (LSTM)               (None, 5, 32)             8320      
                                                                 
 batch_normalization_1 (Batc  (None, 5, 32)            128       
 hNormalization)                                                 
                                                                 
 lstm_2 (LSTM)               (None, 5, 32)             8320      
                                                                 
 batch_normalization_2 (Batc  (None, 5, 32)            1

'path = f"./runs/model_w_validation/{first_year}-{last_year}-{date.today()}"\nmodel.save(path)'

In [4]:
def shapley_predict(x_2D):
    return np.array(model(np.reshape(x_2D, (len(x_2D), seq_length, num_features))))

def create_shapley_values():
    w