In [2]:
from preprocessing import get_model_dataset, create_train_test, min_max_scale, df_to_xy
from trainer import trainer
import numpy as np
import matplotlib.pyplot as plt

# Hyperparameter tuning


import wandb
from wandb.keras import WandbCallback
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "/Users/hjalmarvinje/Documents/LSTM for option pricing"
wandb.login()

True

In [3]:
# Load dataset
path_opt = "./data/options/"
filenames_opt = ["spx_eod_" + str(year) + (str(month) if month >= 10 else "0"+str(month)) + ".txt" for year in range(2022, 2022) for month in range(1, 13)] + ["spx_eod_2022" + (str(month) if month >= 10 else "0" + str(month)) + ".txt" for month in range(1, 10)]
path_r = "./data/rates/"
filenames_r = ["yield-curve-rates-2022.csv", "yield-curve-rates-1990-2021.csv"]

df_read = get_model_dataset(path_opt, filenames_opt, path_r, filenames_r, True)
print(df_read)
df_read.info()

        Quote_date Expire_date  Underlying_last   Strike  Moneyness    Ask  \
725110  2022-05-09  2022-05-10          3993.26   3100.0   1.288148  893.3   
725111  2022-05-09  2022-05-10          3993.26   3200.0   1.247894  792.9   
725112  2022-05-09  2022-05-10          3993.26   3300.0   1.210079  692.2   
725113  2022-05-09  2022-05-10          3993.26   3400.0   1.174488  592.2   
725114  2022-05-09  2022-05-10          3993.26   3500.0   1.140931  493.0   
...            ...         ...              ...      ...        ...    ...   
1533707 2022-09-30  2026-12-18          3589.70   8800.0   0.407920   31.4   
1533708 2022-09-30  2026-12-18          3589.70   9000.0   0.398856   29.5   
1533709 2022-09-30  2026-12-18          3589.70   9200.0   0.390185   16.2   
1533710 2022-09-30  2026-12-18          3589.70   9600.0   0.373927   24.5   
1533711 2022-09-30  2026-12-18          3589.70  10000.0   0.358970   21.8   

           Bid   Ttl  Volatility     R  
725110   886.3     1  

In [4]:
# Splitting dataset
features = ["Moneyness", "Ttl", "R", "Volatility"]
seq_length = 5
num_features = 4

df_train_orginal, df_test_orginal = create_train_test(df_read, features,  "2022-09-01", 5)

train_x_org, train_y_org, = df_to_xy(df_train_orginal, num_features, seq_length)
test_x_org, test_y_org = df_to_xy(df_test_orginal, num_features, seq_length)

train_x_scaled, test_x_scaled = min_max_scale(train_x_org, test_x_org)
train_y_scaled, test_y_scaled = min_max_scale(train_y_org, test_y_org)

train_x_scaled = np.reshape(train_x_scaled, (len(train_x_scaled), seq_length, num_features))
test_x_scaled = np.reshape(test_x_scaled, (len(test_x_scaled), seq_length, num_features))

print(f"Train_x shape: {train_x_scaled.shape}, train_y shape: {train_y_scaled.shape}")
print(f"Test_x shape: {test_x_scaled.shape}, test_y shape: {test_y_scaled.shape}")

Train_x shape: (519593, 5, 4), train_y shape: (519593, 2)
Test_x shape: (121672, 5, 4), test_y shape: (121672, 2)


In [5]:
sweep_configuration = {
    'method': 'random',
    'name': 'sweep3',
    'metric': {
        'goal': 'minimize', 
        'name': 'loss'
		},
    'parameters': {
        "units": {'values': [75, 100, 200]},
     }
}

In [6]:
# Initialize sweep and creating sweepID
sweep_id = wandb.sweep(sweep=sweep_configuration, project="LSTM option pricing")

Create sweep with ID: qbomwqx6
Sweep URL: https://wandb.ai/avogadro/LSTM%20option%20pricing/sweeps/qbomwqx6


In [7]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Input
from keras import backend as K
from tensorflow.keras.optimizers import Adam
import keras as KER
from sklearn.model_selection import train_test_split
from keras.activations import linear, relu

In [14]:
def create_model(config):
  """Builds an LSTM model of minimum 2 layers sequentially from a given config dictionary"""
  model = Sequential()

  model.add(LSTM(
    units = config.units,
    activation = linear,
    input_shape = (config.seq_length, config.num_features),
    dropout = config.dropout,
    recurrent_dropout = config.recurrent_dropout,
    return_sequences = True
  )) 

  for i in range(config.layers - 2):
    model.add(LSTM(
    units = config.units,
    activation = linear,
    dropout = config.dropout,
    recurrent_dropout = config.recurrent_dropout,
    return_sequences = True
  ))

  model.add(LSTM(
    units = config.units,
    activation = linear,
    dropout = config.dropout,
    recurrent_dropout = config.recurrent_dropout,
    return_sequences = False
  ))

  model.add(Dense(
    units = 2,
    activation = relu
  ))  

  model.compile(
    optimizer = Adam(
      learning_rate = config.lr
    ),
    loss = "mse",
    metrics = ["accuracy"]
  )

  return model

In [15]:
from keras.callbacks import EarlyStopping


def trainer(train_x = train_x_scaled, train_y = train_y_scaled, config = None):
    # Initialize a new wandb run
    with wandb.init(config=sweep_configuration):

        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        s_config = wandb.config

        model = create_model(s_config, config)
        
        minibatch_size = 1024

        early_stopping = EarlyStopping(
            monitor='loss',
            mode='min',
            min_delta=1e-4,
            patience=10,
        )

        model.fit(
            train_x,
            train_y,
            batch_size = minibatch_size,
            epochs = config.epochs,
            callbacks = [early_stopping, WandbCallback()] 
        )
        


In [16]:
wandb.agent(sweep_id=sweep_id, function=trainer, project="LSTM option pricing", count = 5)

Retry attempt failed:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/urllib3/connection.py", line 156, in _new_conn
    conn = connection.create_connection(
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/urllib3/util/connection.py", line 61, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py", line 918, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/urllib3/connectionpool.py", line 665, in urlopen
    httplib_response = self._make_request(
  File "/

(config.seq_length, config.num_features), (15, 5)


Run av3m4i24 errored: ValueError("in user code:\n\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *\n        return step_function(self, iterator)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:842 step_function  **\n        outputs = model.distribute_strategy.run(run_step, args=(data,))\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run\n        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica\n        return self._call_for_each_replica(fn, args, kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01683063431666672, max=1.0)…

(config.seq_length, config.num_features), (15, 15)
Epoch 1/5


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run qeb1cfzn errored: ValueError("in user code:\n\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *\n        return step_function(self, iterator)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:842 step_function  **\n        outputs = model.distribute_strategy.run(run_step, args=(data,))\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run\n        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica\n        return self._call_for_each_replica(fn, args, kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:

(config.seq_length, config.num_features), (5, 15)
Epoch 1/5


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run mg26d0e3 errored: ValueError("in user code:\n\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *\n        return step_function(self, iterator)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:842 step_function  **\n        outputs = model.distribute_strategy.run(run_step, args=(data,))\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run\n        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica\n        return self._call_for_each_replica(fn, args, kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:

(config.seq_length, config.num_features), (10, 5)


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.180781…

Run feoc3y2t errored: ValueError("in user code:\n\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *\n        return step_function(self, iterator)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:842 step_function  **\n        outputs = model.distribute_strategy.run(run_step, args=(data,))\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run\n        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica\n        return self._call_for_each_replica(fn, args, kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:

(config.seq_length, config.num_features), (5, 15)
Epoch 1/5


Run 136nqhtj errored: ValueError("in user code:\n\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:853 train_function  *\n        return step_function(self, iterator)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/keras/engine/training.py:842 step_function  **\n        outputs = model.distribute_strategy.run(run_step, args=(data,))\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run\n        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica\n        return self._call_for_each_replica(fn, args, kwargs)\n    /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:

In [None]:
from sklearn.preprocessing import MinMaxScaler
def prediction(df_test, test_x, model, train_y_org):
    predictions = np.array(model(test_x))
    scaler = MinMaxScaler().fit(train_y_org)
    predictions = scaler.inverse_transform(predictions)
    bid, ask = predictions[:, :1], predictions[:, 1:]
    df_test["Prediction_bid"] = bid
    df_test["Prediction_ask"] = ask
    return df_test

df_test = prediction(df_test_orginal, test_x_scaled, model, train_y_org)

df_test.info()
print(df_test.head())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 121672 entries, 1395271 to 1533711
Data columns (total 34 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   Quote_date       121672 non-null  datetime64[ns]
 1   Expire_date      121672 non-null  datetime64[ns]
 2   Underlying_last  121672 non-null  float64       
 3   Strike           121672 non-null  float64       
 4   Moneyness        121672 non-null  float64       
 5   Ask              121672 non-null  float64       
 6   Bid              121672 non-null  float64       
 7   Ttl              121672 non-null  int64         
 8   Volatility       121672 non-null  float64       
 9   R                121672 non-null  float64       
 10  Moneyness-4      121672 non-null  float64       
 11  Ttl-4            121672 non-null  float64       
 12  R-4              121672 non-null  float64       
 13  Volatility-4     121672 non-null  float64       
 14  Moneyness-3  

In [None]:
print(df_test.head(20)[["Bid", "Ask", "Prediction_bid", "Prediction_ask"]])
df_test.info()

            Bid     Ask  Prediction_bid  Prediction_ask
1395271  2997.2  3004.8     2958.293945     2938.088623
1395272  2797.3  2804.8     2743.209229     2720.194580
1395273  2597.1  2604.6     2525.886963     2500.249023
1395274  2397.1  2405.4     2305.510254     2278.655273
1395275  2197.6  2205.4     2084.317627     2057.875977
1395276  1998.1  2004.9     1864.251953     1839.673096
1395277  1797.7  1805.5     1644.697632     1623.189331
1395278  1597.5  1605.0     1422.385498     1404.850708
1395279  1397.3  1405.0     1193.704590     1180.607300
1395280  1297.3  1305.0     1076.631226     1065.778442
1395281  1197.3  1204.8      958.215576      949.564575
1395282  1150.9  1155.4      898.755554      891.181519
1395283  1100.9  1105.4      839.310547      832.795837
1395284  1051.0  1055.4      780.044556      774.571716
1395285  1000.4  1005.4      721.139832      716.691528
1395286   950.4   955.4      662.790710      659.352112
1395287   900.4   906.1      605.201416      602