In [1]:
import os
import time
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from datetime import datetime
from model_settings import ms
from feature_collector import preprocess_data
from df_collector import df_collector
pd.set_option("display.max_columns",None)
pd.options.display.float_format = '{:.5f}'.format
root = Path().resolve().parent.parent
df_collector.root = root
models_dir = os.path.join(root,ms.trained_models)
train_start = time.time()
train_start_datetime = datetime.fromtimestamp(train_start)
train_start_tag = train_start_datetime.strftime('%c')
print("\n"+"#"*18+"\n# training start #\n"+
      "#"*18+"\n"+f"\n{train_start_tag}")


##################
# training start #
##################

Wed Nov  6 21:14:42 2024


In [2]:
pricename = 'barrier_price'
data = df_collector.cboe_spx_barriers()

filetag = f'cboe spx {pricename[:pricename.find('_',0)]}'
data = data[data[pricename]<data['spot_price']]
data = data[data[pricename]>data['spot_price']*0.01].reset_index(drop=True)
og_count = data.shape[0]
print('data collected')
data

data collected


Unnamed: 0,spot_price,strike_price,barrier,days_to_maturity,updown,outin,w,barrier_type_name,rebate,dividend_rate,risk_free_rate,theta,kappa,rho,eta,v0,calculation_date,date,barrier_price
0,4424.00000,3981.60000,2212.00000,60,Down,Out,call,DownOut,0.00000,0.00000,0.04000,0.04949,26.62607,-0.61054,6.49034,0.15574,2022-01-26 14:39:54.042,2022-01-26,519.00142
1,4424.00000,3981.60000,2212.00000,90,Down,Out,call,DownOut,0.00000,0.00000,0.04000,0.04949,26.62607,-0.61054,6.49034,0.15574,2022-01-26 14:39:54.042,2022-01-26,546.03043
2,4424.00000,3981.60000,2212.00000,90,Down,Out,put,DownOut,0.00000,0.00000,0.04000,0.04949,26.62607,-0.61054,6.49034,0.15574,2022-01-26 14:39:54.042,2022-01-26,44.88864
3,4424.00000,3981.60000,2212.00000,180,Down,Out,call,DownOut,0.00000,0.00000,0.04000,0.04949,26.62607,-0.61054,6.49034,0.15574,2022-01-26 14:39:54.042,2022-01-26,621.26178
4,4424.00000,3981.60000,2212.00000,180,Down,Out,put,DownOut,0.00000,0.00000,0.04000,0.04949,26.62607,-0.61054,6.49034,0.15574,2022-01-26 14:39:54.042,2022-01-26,63.62685
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16064306,5813.50000,6394.85000,8720.25000,540,Up,Out,put,UpOut,0.00000,0.00000,0.04000,0.08750,1.98223,-0.65710,1.70557,0.01895,2024-10-15 16:50:04.131,2024-10-15,598.88951
16064307,5813.50000,6394.85000,8720.25000,540,Up,In,call,UpIn,0.00000,0.00000,0.04000,0.08750,1.98223,-0.65710,1.70557,0.01895,2024-10-15 16:50:04.131,2024-10-15,87.93904
16064308,5813.50000,6394.85000,8720.25000,720,Up,Out,call,UpOut,0.00000,0.00000,0.04000,0.08750,1.98223,-0.65710,1.70557,0.01895,2024-10-15 16:50:04.131,2024-10-15,377.39147
16064309,5813.50000,6394.85000,8720.25000,720,Up,Out,put,UpOut,0.00000,0.00000,0.04000,0.08750,1.98223,-0.65710,1.70557,0.01895,2024-10-15 16:50:04.131,2024-10-15,650.45980


In [None]:
data['observed_price'] = data[pricename].apply(lambda x: max(x + np.random.normal(scale=0.002),0) if x > 0 else 0)
data = data[data['observed_price']>0]

p = data[pricename]-data['observed_price']
p = p[p!=0]
plotprices = data['observed_price']
plotprices = plotprices[plotprices>0.01]

fig,axs = plt.subplots(2,figsize=(10,10))
axs[0].hist(p,bins=int(len(p)**0.5))
axs[0].set_xlabel('absolute peturbation different from zero')
axs[1].hist(plotprices,bins=int(len(plotprices)**0.5))
axs[1].set_xlabel('contract price')
plt.show()

In [None]:
from convsklearn import convsklearn
trainer = convsklearn()
help(trainer)
print('instance variables:')
trainer.__dict__

In [None]:
trainer.load_data(data)
trainer.__dict__
trainer.dataset

In [None]:
trainer.resample_data(dec=0,div=2)
trainer.dataset

# preprocessing

In [None]:
dates = pd.Series(np.sort(data['date'].unique()))
dates

In [None]:
development_dates = dates[:len(dates)//4]
test_dates = dates[~dates.isin(development_dates)]
trainer.preprocess_data(development_dates,test_dates)

# training

In [None]:
trainer.run_dnn()

In [None]:
print('instance varianbles:')
trainer.__dict__

In [None]:
trainer.model_fit

### initial test

In [None]:
trainer.test_X

In [None]:
train_test = trainer.test_prediction_accuracy()

# saving

In [None]:
train_end = time.time()
train_end_tag = str(datetime.fromtimestamp(
    train_end).strftime("%Y_%m_%d %H%M%S%f"))
file_tag = str(train_end_tag + " " + filetag)
files_dir = os.path.join(models_dir,file_tag)

def save_model():
    if Path(files_dir).exists():
        pass
    else:
        os.mkdir(files_dir)
    file_dir = os.path.join(files_dir,file_tag)
    joblib.dump(trainer.__dict__,str(f"{file_dir}.pkl"))
    pd.set_option("display.max_columns",None)
    print(f"model saved to {file_dir}")

print(f"execute the command 'save_model()' to save the following model: {file_tag}")
train_runtime = train_end-train_start
print(f"\ncpu: {train_runtime}")