In [26]:
from preprocessing import get_model_dataset, create_train_test, min_max_scale, df_to_xy
from trainer import trainer
import numpy as np
import matplotlib.pyplot as plt

# Hyperparameter tuning


import wandb
from wandb.keras import WandbCallback
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "/Users/hjalmarvinje/Documents/LSTM for option pricing"
wandb.login()

True

In [7]:
# Load dataset
path_opt = "./data/options/"
filenames_opt = ["spx_eod_" + str(year) + (str(month) if month >= 10 else "0"+str(month)) + ".txt" for year in range(2022, 2022) for month in range(1, 13)] + ["spx_eod_2022" + (str(month) if month >= 10 else "0" + str(month)) + ".txt" for month in range(1, 10)]
path_r = "./data/rates/"
filenames_r = ["yield-curve-rates-2022.csv", "yield-curve-rates-1990-2021.csv"]

df_read = get_model_dataset(path_opt, filenames_opt, path_r, filenames_r, True)
print(df_read)
df_read.info()

        Quote_date Expire_date  Underlying_last   Strike  Moneyness    Ask  \
725110  2022-05-09  2022-05-10          3993.26   3100.0   1.288148  893.3   
725111  2022-05-09  2022-05-10          3993.26   3200.0   1.247894  792.9   
725112  2022-05-09  2022-05-10          3993.26   3300.0   1.210079  692.2   
725113  2022-05-09  2022-05-10          3993.26   3400.0   1.174488  592.2   
725114  2022-05-09  2022-05-10          3993.26   3500.0   1.140931  493.0   
...            ...         ...              ...      ...        ...    ...   
1533707 2022-09-30  2026-12-18          3589.70   8800.0   0.407920   31.4   
1533708 2022-09-30  2026-12-18          3589.70   9000.0   0.398856   29.5   
1533709 2022-09-30  2026-12-18          3589.70   9200.0   0.390185   16.2   
1533710 2022-09-30  2026-12-18          3589.70   9600.0   0.373927   24.5   
1533711 2022-09-30  2026-12-18          3589.70  10000.0   0.358970   21.8   

           Bid   Ttl  Volatility     R  
725110   886.3     1  

In [27]:
# Splitting dataset
features = ["Moneyness", "Ttl", "R", "Volatility"]
seq_length = 5
num_features = 4

df_train_orginal, df_test_orginal = create_train_test(df_read, features,  "2022-09-01", 5)

train_x_org, train_y_org, = df_to_xy(df_train_orginal, num_features, seq_length)
test_x_org, test_y_org = df_to_xy(df_test_orginal, num_features, seq_length)

train_x_scaled, test_x_scaled = min_max_scale(train_x_org, test_x_org)
train_y_scaled, test_y_scaled = min_max_scale(train_y_org, test_y_org)

train_x_scaled = np.reshape(train_x_scaled, (len(train_x_scaled), seq_length, num_features))
test_x_scaled = np.reshape(test_x_scaled, (len(test_x_scaled), seq_length, num_features))

print(f"Train_x shape: {train_x_scaled.shape}, train_y shape: {train_y_scaled.shape}")
print(f"Test_x shape: {test_x_scaled.shape}, test_y shape: {test_y_scaled.shape}")

Train_x shape: (519593, 5, 4), train_y shape: (519593, 2)
Test_x shape: (121672, 5, 4), test_y shape: (121672, 2)


In [34]:
sweep_configuration = {
    'method': 'bayes',
    'name': 'sweep10',
    'metric': {
        'goal': 'minimize', 
        'name': 'val_loss'
		},
    'parameters': {
        "units": {'values': [8, 12, 20, 40, 80]},
        "dropout": {"distribution": "uniform",
            'max': 0.5, 'min': 0.00001},
        "recurrent_dropout": {
            "distribution": "uniform",
            'max': 0.5, 'min': 0.00001},
        "lr": {
            "distribution": "uniform",
            'max': 0.01, 'min': 0.00001},
        "layers": {'values': [2, 4, 7]},
        "epochs": {'values': [50,100,500]},
        "minibatch_size": {'values': [512, 1024, 2048]}
    }
}


In [38]:
# Initialize sweep and creating sweepID
### sweep_id = wandb.sweep(sweep=sweep_configuration, project="LSTM option pricing")
#sweep8: sweep_id = 7finyjz4
#sweep10 
sweep_id = "ioxqmc3k"

Create sweep with ID: ioxqmc3k
Sweep URL: https://wandb.ai/avogadro/LSTM%20option%20pricing/sweeps/ioxqmc3k


In [30]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Input
from keras import backend as K
from tensorflow.keras.optimizers import Adam
import keras as KER
from sklearn.model_selection import train_test_split
from keras.activations import linear, relu

In [31]:
def create_model(config):
  """Builds an LSTM model of minimum 2 layers sequentially from a given config dictionary"""
  model = Sequential()


  seq_length = 5
  num_features =  4
  model.add(LSTM(
    units = config.units,
    activation = linear,
    input_shape = (seq_length, num_features),
    dropout = config.dropout,
    recurrent_dropout = config.recurrent_dropout,
    return_sequences = True
  )) 

  for i in range(config.layers - 2):
    model.add(LSTM(
    units = config.units,
    activation = linear,
    dropout = config.dropout,
    recurrent_dropout = config.recurrent_dropout,
    return_sequences = True
  ))

  model.add(LSTM(
    units = config.units,
    activation = linear,
    dropout = config.dropout,
    recurrent_dropout = config.recurrent_dropout,
    return_sequences = False
  ))

  model.add(Dense(
    units = 2,
    activation = relu
  ))  

  model.compile(
    optimizer = Adam(
      learning_rate = config.lr
    ),
    loss = "mse",
    metrics = ["accuracy"]
  )

  return model

In [32]:
from keras.callbacks import EarlyStopping


def trainer(train_x = train_x_scaled, train_y = train_y_scaled, config = None):
    # Initialize a new wandb run
    with wandb.init(config=sweep_configuration):

        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        model = create_model(config)
        
        minibatch_size = config.minibatch_size

        early_stopping = EarlyStopping(
            monitor='loss',
            mode='min',
            min_delta=1e-4,
            patience=10,
        )
        
        wandb_callback = WandbCallback(
            monitor='val_loss',
            mode='min',
            save_model=False
        )

        model.fit(
            train_x,
            train_y,
            batch_size = minibatch_size,
            validation_split = 0.2,
            epochs = config.epochs,
            callbacks = [early_stopping, wandb_callback] 
        )
        


In [39]:
wandb.agent(sweep_id=sweep_id, function=trainer, project="LSTM option pricing", count = 50)

[34m[1mwandb[0m: Agent Starting Run: lcr0co0x with config:
[34m[1mwandb[0m: 	dropout: 0.476704967922829
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	layers: 7
[34m[1mwandb[0m: 	lr: 0.00929512227852874
[34m[1mwandb[0m: 	minibatch_size: 2048
[34m[1mwandb[0m: 	recurrent_dropout: 0.48433080647861143
[34m[1mwandb[0m: 	units: 20


Epoch 1/50
Epoch 2/50
 43/203 [=====>........................] - ETA: 30s - loss: 0.0201 - accuracy: 0.6907