In [1]:
from preprocessing import get_model_dataset, create_train_test, min_max_scale, df_to_xy, read_file, lag_features
from lstm import create_model
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from datetime import date

In [2]:
first_year = 2019
last_year = 2021
file = f"./data/processed_data/{first_year}-{last_year}.csv"

df_read = read_file(file)
print(df_read)
df_read.info()
print(df_read)
print(df_read["Ttl"].max())

         Unnamed: 0  Quote_date Expire_date  Underlying_last  Strike      Ask  \
0           1354913  2019-01-02  2019-01-04          2509.98   800.0  1711.10   
1           1354914  2019-01-02  2019-01-04          2509.98   900.0  1611.40   
2           1354915  2019-01-02  2019-01-04          2509.98  1000.0  1511.40   
3           1354916  2019-01-02  2019-01-04          2509.98  1050.0  1462.19   
4           1354917  2019-01-02  2019-01-04          2509.98  1100.0  1412.20   
...             ...         ...         ...              ...     ...      ...   
5123793     6521988  2021-12-31  2024-12-20          4766.39  8400.0   300.00   
5123794     6521989  2021-12-31  2024-12-20          4766.39  8600.0   300.00   
5123795     6521990  2021-12-31  2024-12-20          4766.39  8800.0   300.90   
5123796     6521991  2021-12-31  2024-12-20          4766.39  9000.0   300.00   
5123797     6521992  2021-12-31  2024-12-20          4766.39  9200.0   300.00   

             Bid  Bid_strik

In [3]:
features = ["Moneyness", "Ttl", "R", "Volatility"]
seq_length = 5
num_features = len(features)

df_read_lags = lag_features(df_read, features, seq_length)

df_train_orginal, df_test_orginal = create_train_test(df_read_lags, "2021-01-01")

train_x_org, train_y_org, = df_to_xy(df_train_orginal, num_features, seq_length)
test_x_org, test_y_org = df_to_xy(df_test_orginal, num_features, seq_length)

train_x_scaled, test_x_scaled = min_max_scale(train_x_org, test_x_org)
train_y_scaled, test_y_scaled = min_max_scale(train_y_org, test_y_org)

"""shuffle = np.random.permutation(len(train_x_scaled))
train_x_scaled, train_y_scaled = train_x_scaled[shuffle], train_y_scaled[shuffle]"""

train_x_scaled = np.reshape(train_x_scaled, (len(train_x_scaled), seq_length, num_features))
test_x_scaled = np.reshape(test_x_scaled, (len(test_x_scaled), seq_length, num_features))

print(f"Train_x shape: {train_x_scaled.shape}, train_y shape: {train_y_scaled.shape}")
print(f"Test_x shape: {test_x_scaled.shape}, test_y shape: {test_y_scaled.shape}")

Train_x shape: (2785226, 5, 4), train_y shape: (2785226, 2)
Test_x shape: (1845482, 5, 4), test_y shape: (1845482, 2)


In [5]:
from keras.callbacks import EarlyStopping
config = {
    "units": 32,
    "learning_rate": 0.0015,
    "layers": 4,
    "seq_length": seq_length,
    "num_features": num_features,
    "bn_momentum" : 0.4,
    "clip_norm": 0.7
}

def trainer(train_x, train_y, model):
    epochs = 1000
    minibatch_size = 1024

    early_stopping = EarlyStopping(
        monitor='val_loss',
        mode='min',
        min_delta = 1e-6,
        patience = 7,
    )

    model.fit(
        train_x,
        train_y,
        batch_size = minibatch_size,
        validation_split = 0.3,
        epochs = epochs,
        callbacks = [early_stopping]
    )

model = create_model(config)
model.summary()

trainer(train_x_scaled, train_y_scaled, model)

path = f"/runs/model_w_validation/{first_year}-{last_year}-{date.today()}"
model.save(path)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_4 (LSTM)               (None, 5, 32)             4736      
                                                                 
 batch_normalization_4 (Batc  (None, 5, 32)            128       
 hNormalization)                                                 
                                                                 
 lstm_5 (LSTM)               (None, 5, 32)             8320      
                                                                 
 batch_normalization_5 (Batc  (None, 5, 32)            128       
 hNormalization)                                                 
                                                                 
 lstm_6 (LSTM)               (None, 5, 32)             8320      
                                                                 
 batch_normalization_6 (Batc  (None, 5, 32)           

In [None]:
def prediction(df_test, test_x, model, train_y_org, train_y_scaled):
    predictions = np.array(model(test_x))
    df_test["Raw_pred_bid"] = predictions[:, :1]
    df_test["Scaled_bid"] = train_y_scaled[:, :1]
    df_test["Raw_mae_bid"] = abs(df_test["Raw_pred_bid"] - df_test["Scaled_bid"] )
    scaler = MinMaxScaler().fit(train_y_org)
    predictions = scaler.inverse_transform(predictions)
    bid, ask = predictions[:, :1], predictions[:, 1:]
    df_test["Prediction_bid_strike"] = bid
    df_test["Prediction_ask_strike"] = ask
    df_test["Prediction_bid"] = df_test["Prediction_bid_strike"] * df_test["Strike"]
    df_test["Prediction_ask"] = df_test["Prediction_ask_strike"] * df_test["Strike"]
    df_test["MAE_bid"] = abs(df_test["Bid"] - df_test["Prediction_bid"])
    df_test["MAE_ask"] = abs(df_test["Ask"] - df_test["Prediction_ask"])
    return df_test

df_test = prediction(df_train_orginal, train_x_scaled, model, train_y_org, train_y_scaled)

path = f"/runs/data_w_validation/"
df_test.to_csv(f"{path}/{first_year}-{last_year}-{date.today()}")

df_test.info()
print(df_test.head())

In [None]:
print(df_test.head(20)[["Bid", "Ask", "Prediction_bid", "Prediction_ask", "MAE_bid", "MAE_ask"]])

In [None]:
print(f"MAE: {df_test['MAE_bid'].mean()}")
plt.scatter(df_test["Bid"], df_test["MAE_bid"])
plt.show()

In [None]:
print(f"MAE: {df_test['Raw_mae_bid'].mean()}")
plt.scatter(train_y_scaled[:,:1], df_test["Raw_mae_bid"])
plt.show()

In [None]:
print(f"MAE: {df_test['MAE_ask'].mean()}")
plt.scatter(df_test["Ask"], df_test["MAE_ask"])
plt.show()

In [None]:
plt.scatter(df_train_orginal["Strike"], df_train_orginal["Moneyness"])
plt.show()