In [1]:
from preprocessing import get_model_dataset, create_train_test, min_max_scale, df_to_xy
from trainer import trainer
from lstm import create_model
import numpy as np
import matplotlib.pyplot as plt

# Hyperparameter tuning
import wandb
from wandb.keras import WandbCallback
wandb.init(project="LSTM option pricing")


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mvinje[0m ([33mavogadro[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
# Capture a dictionary of hyperparameters with config
wandb.config = {
  "lr": 0.001,
  "n_epochs": 100,
  "n_batch": 128
}

In [5]:
path_opt = "./data/options/"
filenames_opt = ["spx_eod_" + str(year) + (str(month) if month >= 10 else "0"+str(month)) + ".txt" for year in range(2022, 2022) for month in range(1, 13)] + ["spx_eod_2022" + (str(month) if month >= 10 else "0" + str(month)) + ".txt" for month in range(1, 10)]
path_r = "./data/rates/"
filenames_r = ["yield-curve-rates-2022.csv", "yield-curve-rates-1990-2021.csv"]

df_read = get_model_dataset(path_opt, filenames_opt, path_r, filenames_r, True)
print(df_read)
df_read.info()

        Quote_date Expire_date  Underlying_last   Strike  Moneyness    Ask  \
725110  2022-05-09  2022-05-10          3993.26   3100.0   1.288148  893.3   
725111  2022-05-09  2022-05-10          3993.26   3200.0   1.247894  792.9   
725112  2022-05-09  2022-05-10          3993.26   3300.0   1.210079  692.2   
725113  2022-05-09  2022-05-10          3993.26   3400.0   1.174488  592.2   
725114  2022-05-09  2022-05-10          3993.26   3500.0   1.140931  493.0   
...            ...         ...              ...      ...        ...    ...   
1533707 2022-09-30  2026-12-18          3589.70   8800.0   0.407920   31.4   
1533708 2022-09-30  2026-12-18          3589.70   9000.0   0.398856   29.5   
1533709 2022-09-30  2026-12-18          3589.70   9200.0   0.390185   16.2   
1533710 2022-09-30  2026-12-18          3589.70   9600.0   0.373927   24.5   
1533711 2022-09-30  2026-12-18          3589.70  10000.0   0.358970   21.8   

           Bid   Ttl  Volatility     R  
725110   886.3     1  

In [6]:
features = ["Moneyness", "Ttl", "R", "Volatility"]
seq_length = 5
num_features = 4

df_train_orginal, df_test_orginal = create_train_test(df_read, features,  "2022-09-01", 5)

train_x_org, train_y_org, = df_to_xy(df_train_orginal, num_features, seq_length)
test_x_org, test_y_org = df_to_xy(df_test_orginal, num_features, seq_length)

train_x_scaled, test_x_scaled = min_max_scale(train_x_org, test_x_org)
train_y_scaled, test_y_scaled = min_max_scale(train_y_org, test_y_org)

train_x_scaled = np.reshape(train_x_scaled, (len(train_x_scaled), seq_length, num_features))
test_x_scaled = np.reshape(test_x_scaled, (len(test_x_scaled), seq_length, num_features))

print(f"Train_x shape: {train_x_scaled.shape}, train_y shape: {train_y_scaled.shape}")
print(f"Test_x shape: {test_x_scaled.shape}, test_y shape: {test_y_scaled.shape}")

Train_x shape: (519593, 5, 4), train_y shape: (519593, 2)
Test_x shape: (121672, 5, 4), test_y shape: (121672, 2)


In [7]:
sweep_configuration = {
    'method': 'random',
    'name': 'sweep2',
    'metric': {
        'goal': 'minimize', 
        'name': 'loss'
		},
    'parameters': {
        "units": {'values': [75, 100, 200]},
        "dropout": {'values': [0.05, 0.1, 0.2]},
        "recurrent_dropout": {'values': [0.05, 0.1, 0.2]},
        "learning_rate": {'values': [0.001, 0.01, 0.1]},
        "layers": {'values': [3, 5, 8]},
        "seq_length": {'values': [5, 10, 15]},
        "num_features": {'values': [5, 10, 15]}
     }
}

sweep_id = wandb.sweep(sweep=sweep_configuration, project="LSTM option pricing")

Create sweep with ID: o19b188y
Sweep URL: https://wandb.ai/avogadro/LSTM%20option%20pricing/sweeps/o19b188y


In [8]:
from keras.callbacks import EarlyStopping
config_norm = {
    "units": 100,
    "dropout": 0.1,
    "recurrent_dropout": 0.1,
    "learning_rate": 0.001,
    "layers": 5,
    "seq_length": seq_length,
    "num_features": num_features
}

def trainer(train_x, train_y, model):
    epochs = 10
    minibatch_size = 1024

    early_stopping = EarlyStopping(
        monitor='loss',
        mode='min',
        min_delta=1e-4,
        patience=10,
    )

    model.fit(
        train_x,
        train_y,
        batch_size = minibatch_size,
        epochs = epochs,
        callbacks = [early_stopping, WandbCallback()]
    )

model = create_model(config_norm)

wandb.agent(sweep_id=sweep_id, function=model)

trainer(train_x_scaled, train_y_scaled, model)

[34m[1mwandb[0m: Agent Starting Run: 11tyspbl with config:
[34m[1mwandb[0m: 	dropout: 0.05
[34m[1mwandb[0m: 	layers: 3
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	num_features: 5
[34m[1mwandb[0m: 	recurrent_dropout: 0.2
[34m[1mwandb[0m: 	seq_length: 15
[34m[1mwandb[0m: 	units: 100
Exception in thread Thread-9:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/wandb/agents/pyagent.py", line 298, in _run_job
    self._function()
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/base_layer.py", line 967, in __call__
    inputs, args, kwargs = self._split_out_first_arg(args, kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/base_layer.py", line 3011, in _split_out_first_arg
    raise ValueError(
ValueError: The first argument to `Layer.call` must always be passed.

During handling

In [None]:
from sklearn.preprocessing import MinMaxScaler
def prediction(df_test, test_x, model, train_y_org):
    predictions = np.array(model(test_x))
    scaler = MinMaxScaler().fit(train_y_org)
    predictions = scaler.inverse_transform(predictions)
    bid, ask = predictions[:, :1], predictions[:, 1:]
    df_test["Prediction_bid"] = bid
    df_test["Prediction_ask"] = ask
    return df_test

df_test = prediction(df_test_orginal, test_x_scaled, model, train_y_org)

df_test.info()
print(df_test.head())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 121672 entries, 1395271 to 1533711
Data columns (total 34 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   Quote_date       121672 non-null  datetime64[ns]
 1   Expire_date      121672 non-null  datetime64[ns]
 2   Underlying_last  121672 non-null  float64       
 3   Strike           121672 non-null  float64       
 4   Moneyness        121672 non-null  float64       
 5   Ask              121672 non-null  float64       
 6   Bid              121672 non-null  float64       
 7   Ttl              121672 non-null  int64         
 8   Volatility       121672 non-null  float64       
 9   R                121672 non-null  float64       
 10  Moneyness-4      121672 non-null  float64       
 11  Ttl-4            121672 non-null  float64       
 12  R-4              121672 non-null  float64       
 13  Volatility-4     121672 non-null  float64       
 14  Moneyness-3  

In [None]:
print(df_test.head(20)[["Bid", "Ask", "Prediction_bid", "Prediction_ask"]])
df_test.info()

            Bid     Ask  Prediction_bid  Prediction_ask
1395271  2997.2  3004.8     2958.293945     2938.088623
1395272  2797.3  2804.8     2743.209229     2720.194580
1395273  2597.1  2604.6     2525.886963     2500.249023
1395274  2397.1  2405.4     2305.510254     2278.655273
1395275  2197.6  2205.4     2084.317627     2057.875977
1395276  1998.1  2004.9     1864.251953     1839.673096
1395277  1797.7  1805.5     1644.697632     1623.189331
1395278  1597.5  1605.0     1422.385498     1404.850708
1395279  1397.3  1405.0     1193.704590     1180.607300
1395280  1297.3  1305.0     1076.631226     1065.778442
1395281  1197.3  1204.8      958.215576      949.564575
1395282  1150.9  1155.4      898.755554      891.181519
1395283  1100.9  1105.4      839.310547      832.795837
1395284  1051.0  1055.4      780.044556      774.571716
1395285  1000.4  1005.4      721.139832      716.691528
1395286   950.4   955.4      662.790710      659.352112
1395287   900.4   906.1      605.201416      602