In [1]:
import os
import sys
import time
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from model_settings import ms
os.chdir(os.path.abspath(str(Path())))
pd.set_option("display.max_columns",None)
pd.options.display.float_format = '{:.5f}'.format
notebook_dir = str(Path().resolve())
sys.path.append(os.path.join(notebook_dir,'historical_data','historical_generation'))
train_start = time.time()
train_start_datetime = datetime.fromtimestamp(train_start)
train_start_tag = train_start_datetime.strftime('%c')
print("\n"+"#"*18+"\n# training start #\n"+
      "#"*18+"\n"+f"\n{train_start_tag}\n")


##################
# training start #
##################

Thu Oct 24 14:11:35 2024



# Loading data

In [27]:
root = Path().resolve().parent.parent
datadir = os.path.join(root,ms.bloomberg_spx_barrier_dump)
files = [f for f in os.listdir(datadir) if f.endswith('.csv')]
files = [os.path.join(datadir,f) for f in files]
dfs = []
bar = tqdm(total=len(files))
for f in files:
    dfs.append(pd.read_csv(f).iloc[:,1:])
    bar.update(1)
bar.close()
dataset = pd.concat(dfs,ignore_index=True).dropna().reset_index(drop=True)
dataset

100%|██████████████████████████████████████████████████████████████████████████████| 1510/1510 [00:27<00:00, 55.17it/s]


Unnamed: 0,spot_price,strike_price,barrier,days_to_maturity,updown,outin,w,barrier_type_name,rebate,dividend_rate,risk_free_rate,theta,kappa,rho,eta,v0,calculation_date,barrier_price
0,1416.59000,1274.93100,708.29500,60,Down,Out,call,DownOut,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,146.29070
1,1416.59000,1274.93100,708.29500,60,Down,Out,put,DownOut,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,0.48509
2,1416.59000,1274.93100,708.29500,60,Down,In,call,DownIn,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,0.06683
3,1416.59000,1274.93100,708.29500,60,Down,In,put,DownIn,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,0.00000
4,1416.59000,1274.93100,708.29500,90,Down,Out,call,DownOut,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,149.46448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3226675,1425.77000,1568.34700,2138.65500,540,Up,In,put,UpIn,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,0.35048
3226676,1425.77000,1568.34700,2138.65500,720,Up,Out,call,UpOut,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,54.54956
3226677,1425.77000,1568.34700,2138.65500,720,Up,Out,put,UpOut,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,189.96091
3226678,1425.77000,1568.34700,2138.65500,720,Up,In,call,UpIn,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,51.74802


In [28]:
from model_settings import vanilla_pricer
vanillas = vanilla_pricer()
dataset['calculation_date'] = pd.to_datetime(dataset['calculation_date'],format='%Y-%m-%d')
dataset.dtypes


initializing vanilla pricer
Actual/365 (Fixed) day counter
seed: 123



spot_price                  float64
strike_price                float64
barrier                     float64
days_to_maturity              int64
updown                       object
outin                        object
w                            object
barrier_type_name            object
rebate                      float64
dividend_rate               float64
risk_free_rate              float64
theta                       float64
kappa                       float64
rho                         float64
eta                         float64
v0                          float64
calculation_date     datetime64[ns]
barrier_price               float64
dtype: object

# Preprocessing

In [29]:
from convsklearn import barrier_trainer
price = 'barrier_price'
trainer = barrier_trainer
trainer.activation_function = 'relu'
trainer.solver = 'sgd'
dataset[price] = pd.to_numeric(dataset[price],errors='coerce')
dataset['observed_price'] = np.maximum(dataset[price] + np.random.normal(scale=(0.15)**2,size=dataset.shape[0]),0)
dataset.dtypes

spot_price                  float64
strike_price                float64
barrier                     float64
days_to_maturity              int64
updown                       object
outin                        object
w                            object
barrier_type_name            object
rebate                      float64
dividend_rate               float64
risk_free_rate              float64
theta                       float64
kappa                       float64
rho                         float64
eta                         float64
v0                          float64
calculation_date     datetime64[ns]
barrier_price               float64
observed_price              float64
dtype: object

In [30]:
dataset.dropna()

Unnamed: 0,spot_price,strike_price,barrier,days_to_maturity,updown,outin,w,barrier_type_name,rebate,dividend_rate,risk_free_rate,theta,kappa,rho,eta,v0,calculation_date,barrier_price,observed_price
0,1416.59000,1274.93100,708.29500,60,Down,Out,call,DownOut,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,146.29070,146.30573
1,1416.59000,1274.93100,708.29500,60,Down,Out,put,DownOut,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,0.48509,0.47890
2,1416.59000,1274.93100,708.29500,60,Down,In,call,DownIn,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,0.06683,0.05142
3,1416.59000,1274.93100,708.29500,60,Down,In,put,DownIn,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,0.00000,0.00000
4,1416.59000,1274.93100,708.29500,90,Down,Out,call,DownOut,0.00000,0.01812,0.04000,0.08296,0.23344,-1.00000,0.09850,0.01066,2007-01-03,149.46448,149.47188
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3226675,1425.77000,1568.34700,2138.65500,540,Up,In,put,UpIn,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,0.35048,0.32400
3226676,1425.77000,1568.34700,2138.65500,720,Up,Out,call,UpOut,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,54.54956,54.54717
3226677,1425.77000,1568.34700,2138.65500,720,Up,Out,put,UpOut,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,189.96091,189.97997
3226678,1425.77000,1568.34700,2138.65500,720,Up,In,call,UpIn,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,51.74802,51.72121


## Train/test split

In [31]:
unique_dates = dataset['calculation_date'].sort_values(
    ascending=True).unique().tolist()
filter_date = unique_dates[int(0.85*len(unique_dates))]
train_data = dataset[
    (
      # (dataset['calculation_date']>=datetime(2007,1,1))
      #  &
      (dataset['calculation_date']<=filter_date)
    )
].copy()

test_data = dataset[
    (
        (dataset['calculation_date']>filter_date)
        # &
        # (dataset['calculation_date']<=datetime(2012,12,31))
    )
].copy()

In [32]:
test_data

Unnamed: 0,spot_price,strike_price,barrier,days_to_maturity,updown,outin,w,barrier_type_name,rebate,dividend_rate,risk_free_rate,theta,kappa,rho,eta,v0,calculation_date,barrier_price,observed_price
2738520,1347.14000,1212.42600,673.57000,60,Down,Out,call,DownOut,0.00000,0.02116,0.04000,0.05362,1.66744,-1.00000,0.13611,0.01825,2012-02-07,140.09327,140.09781
2738521,1347.14000,1212.42600,673.57000,60,Down,Out,put,DownOut,0.00000,0.02116,0.04000,0.05362,1.66744,-1.00000,0.13611,0.01825,2012-02-07,2.10481,2.10400
2738522,1347.14000,1212.42600,673.57000,60,Down,In,call,DownIn,0.00000,0.02116,0.04000,0.05362,1.66744,-1.00000,0.13611,0.01825,2012-02-07,0.06146,0.11593
2738523,1347.14000,1212.42600,673.57000,60,Down,In,put,DownIn,0.00000,0.02116,0.04000,0.05362,1.66744,-1.00000,0.13611,0.01825,2012-02-07,0.00000,0.00000
2738524,1347.14000,1212.42600,673.57000,90,Down,Out,call,DownOut,0.00000,0.02116,0.04000,0.05362,1.66744,-1.00000,0.13611,0.01825,2012-02-07,144.82074,144.77939
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3226675,1425.77000,1568.34700,2138.65500,540,Up,In,put,UpIn,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,0.35048,0.32400
3226676,1425.77000,1568.34700,2138.65500,720,Up,Out,call,UpOut,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,54.54956,54.54717
3226677,1425.77000,1568.34700,2138.65500,720,Up,Out,put,UpOut,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,189.96091,189.97997
3226678,1425.77000,1568.34700,2138.65500,720,Up,In,call,UpIn,0.00000,0.02223,0.04000,0.05621,0.89821,-0.41897,0.32487,0.02466,2012-12-31,51.74802,51.72121


In [33]:
test_train_ratio = int(round(100*test_data.shape[0]/train_data.shape[0],0))
print(f"train/test: {100-test_train_ratio}/{test_train_ratio}")

train/test: 82/18


In [34]:
arrs = trainer.get_train_test_arrays(
    train_data, test_data,feature_set = trainer.feature_set, target_name=trainer.target_name)
preprocessor = trainer.preprocess()
train_X = arrs['train_X'] 
train_y = arrs['train_y']
test_X = arrs['test_X']
test_y = arrs['test_y']

# Training

In [35]:
sum([0 for f in ~train_y.isna()])

0

In [36]:
print("features:")
for f in trainer.feature_set:
    print(f"    {f.replace('_',' ')}")
print(f"target:\n    {trainer.target_name.replace('_',' ')}")

features:
    spot price
    strike price
    days to maturity
    risk free rate
    dividend rate
    kappa
    theta
    rho
    eta
    v0
    barrier
    barrier type name
    w
target:
    observed price


In [None]:
model_fit, runtime, specs = trainer.run_dnn(preprocessor,train_X,train_y)
train_end = time.time()
train_runtime = train_end-train_start
print(f"\ncpu: {train_runtime}")


training...

Deep Neural Network
hidden layers sizes: (13, 13, 13)
learning rate: adaptive
activation: relu
solver: sgd
alpha: 0.0001


# Testing

In [None]:
test_data.describe()

In [None]:
insample, outsample, errors = trainer.test_prediction_accuracy(
        model_fit,
        test_data,
        train_data
)
outofsample_RMSE = errors['outofsample_RMSE']

# Saving