In [1]:
from preprocessing import get_model_dataset, create_train_test, min_max_scale, df_to_xy
from trainer import trainer
from lstm import create_model
import numpy as np
import matplotlib.pyplot as plt

In [2]:
path_opt = "./data/options/"
filenames_opt = ["spx_eod_" + str(year) + (str(month) if month >= 10 else "0"+str(month)) +".txt" for year in range(2022, 2022) for month in range(1, 13)] + ["spx_eod_2022" + (str(month) if month >= 10 else "0"+str(month)) +".txt" for month in range(1, 10)]
path_r = "./data/rates/"
filenames_r = ["yield-curve-rates-2022.csv", "yield-curve-rates-1990-2021.csv"]

df_read = get_model_dataset(path_opt, filenames_opt, path_r, filenames_r, True)
print(df_read)
df_read.info()

        Quote_date Expire_date  Underlying_last   Strike  Moneyness    Ask  \
725110  2022-05-09  2022-05-10          3993.26   3100.0   1.288148  893.3   
725111  2022-05-09  2022-05-10          3993.26   3200.0   1.247894  792.9   
725112  2022-05-09  2022-05-10          3993.26   3300.0   1.210079  692.2   
725113  2022-05-09  2022-05-10          3993.26   3400.0   1.174488  592.2   
725114  2022-05-09  2022-05-10          3993.26   3500.0   1.140931  493.0   
...            ...         ...              ...      ...        ...    ...   
1533707 2022-09-30  2026-12-18          3589.70   8800.0   0.407920   31.4   
1533708 2022-09-30  2026-12-18          3589.70   9000.0   0.398856   29.5   
1533709 2022-09-30  2026-12-18          3589.70   9200.0   0.390185   16.2   
1533710 2022-09-30  2026-12-18          3589.70   9600.0   0.373927   24.5   
1533711 2022-09-30  2026-12-18          3589.70  10000.0   0.358970   21.8   

           Bid   Ttl  Volatility     R  
725110   886.3     1  

In [3]:
features = ["Moneyness", "Ttl", "R", "Volatility"]
seq_length = 5
num_features = 4

df_train_orginal, df_test_orginal = create_train_test(df_read, features,  "2022-09-01", 5)

train_x_org, train_y_org, = df_to_xy(df_train_orginal, num_features, seq_length)
test_x_org, test_y_org = df_to_xy(df_test_orginal, num_features, seq_length)

train_x_scaled, test_x_scaled = min_max_scale(train_x_org, test_x_org)
train_y_scaled, test_y_scaled = min_max_scale(train_y_org, test_y_org)

train_x_scaled = np.reshape(train_x_scaled, (len(train_x_scaled), seq_length, num_features))
test_x_scaled = np.reshape(test_x_scaled, (len(test_x_scaled), seq_length, num_features))

print(f"Train_x shape: {train_x_scaled.shape}, train_y shape: {train_y_scaled.shape}")
print(f"Test_x shape: {test_x_scaled.shape}, test_y shape: {test_y_scaled.shape}")

Train_x shape: (519593, 5, 4), train_y shape: (519593, 2)
Test_x shape: (121672, 5, 4), test_y shape: (121672, 2)


In [4]:
from keras.callbacks import EarlyStopping
config = {
    "units": 100,
    "dropout": 0.1,
    "recurrent_dropout": 0.1,
    "learning_rate": 0.001,
    "layers": 5,
    "seq_length": seq_length,
    "num_features": num_features
}

def trainer(train_x, train_y, model):
    epochs = 10
    minibatch_size = 1024

    early_stopping = EarlyStopping(
        monitor='loss',
        mode='min',
        min_delta=1e-4,
        patience=10,
    )

    model.fit(
        train_x,
        train_y,
        batch_size = minibatch_size,
        epochs = epochs,
        
        callbacks = [early_stopping]
    )

model = create_model(config)

trainer(train_x_scaled, train_y_scaled, model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
from sklearn.preprocessing import MinMaxScaler
def prediction(df_test, test_x, model, train_y_org):
    predictions = np.array(model(test_x))
    scaler = MinMaxScaler().fit(train_y_org)
    predictions = scaler.inverse_transform(predictions)
    bid, ask = predictions[:, :1], predictions[:, 1:]
    df_test["Prediction_bid"] = bid
    df_test["Prediction_ask"] = ask
    return df_test

df_test = prediction(df_test_orginal, test_x_scaled, model, train_y_org)

df_test.info()
print(df_test.head())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 121672 entries, 1395271 to 1533711
Data columns (total 34 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   Quote_date       121672 non-null  datetime64[ns]
 1   Expire_date      121672 non-null  datetime64[ns]
 2   Underlying_last  121672 non-null  float64       
 3   Strike           121672 non-null  float64       
 4   Moneyness        121672 non-null  float64       
 5   Ask              121672 non-null  float64       
 6   Bid              121672 non-null  float64       
 7   Ttl              121672 non-null  int64         
 8   Volatility       121672 non-null  float64       
 9   R                121672 non-null  float64       
 10  Moneyness-4      121672 non-null  float64       
 11  Ttl-4            121672 non-null  float64       
 12  R-4              121672 non-null  float64       
 13  Volatility-4     121672 non-null  float64       
 14  Moneyness-3  

In [7]:
print(df_test.head(20)[["Bid", "Ask", "Prediction_bid", "Prediction_ask"]])
df_test.info()

            Bid     Ask  Prediction_bid  Prediction_ask
1395271  2997.2  3004.8     2966.038574     2921.669189
1395272  2797.3  2804.8     2743.442139     2708.014893
1395273  2597.1  2604.6     2526.692627     2497.127441
1395274  2397.1  2405.4     2311.479736     2285.217529
1395275  2197.6  2205.4     2094.024414     2069.406738
1395276  1998.1  2004.9     1872.450684     1848.897949
1395277  1797.7  1805.5     1647.211060     1625.150513
1395278  1597.5  1605.0     1420.696899     1401.313965
1395279  1397.3  1405.0     1196.016235     1180.693481
1395280  1297.3  1305.0     1084.816772     1071.880371
1395281  1197.3  1204.8      974.020264      963.546387
1395282  1150.9  1155.4      918.587463      909.333984
1395283  1100.9  1105.4      863.031799      854.973938
1395284  1051.0  1055.4      807.306580      800.409729
1395285  1000.4  1005.4      751.412109      745.633728
1395286   950.4   955.4      695.412231      690.703613
1395287   900.4   906.1      639.446594      635