In [1]:
import os
import time
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime
from model_settings import ms
from feature_collector import preprocess_data
from df_collector import df_collector
pd.set_option("display.max_columns",None)
pd.options.display.float_format = '{:.5f}'.format
root = Path().resolve().parent.parent
df_collector.root = root
models_dir = os.path.join(root,ms.trained_models)
train_start = time.time()
train_start_datetime = datetime.fromtimestamp(train_start)
train_start_tag = train_start_datetime.strftime('%c')
print("\n"+"#"*18+"\n# training start #\n"+
      "#"*18+"\n"+f"\n{train_start_tag}")


##################
# training start #
##################

Wed Nov  6 18:57:07 2024


In [None]:
filetag = 'cboe spx asians'
data = df_collector.cboe_spx_barriers().iloc[:,1:]
print('data collected')
data = data[data['barrier_price']<data['spot_price']]
data['observed_price'] = data['barrier_price'].values + np.random.normal(scale=0.1,size=data.shape[0])

In [None]:
from convsklearn import convsklearn, barrier_features
trainer = convsklearn()
trainer.__dict__

In [None]:
trainer.load_data(data)
trainer.__dict__

# preprocessing

In [None]:
dates = pd.Series(np.sort(data['date'].unique()))
dates

In [None]:
development_dates = dates[:1]#[:len(dates)//4]
test_dates = dates[1:2]#[~dates.isin(development_dates)]

In [None]:
trainer.__dict__

In [None]:
trainer.preprocess_data(development_dates,test_dates)

# training

In [None]:
trainer.run_dnn()
trainer.model_fit

In [None]:
trainer.__dict__

In [None]:
trainer.model_fit

### initial test

In [None]:
train_test = trainer.test_prediction_accuracy()

# saving

In [None]:
train_end = time.time()
train_end_tag = str(datetime.fromtimestamp(
    train_end).strftime("%Y_%m_%d %H%M%S%f"))
file_tag = str(train_end_tag + " " + filetag)
files_dir = os.path.join(models_dir,file_tag)

def save_model():
    if Path(files_dir).exists():
        pass
    else:
        os.mkdir(files_dir)
    file_dir = os.path.join(files_dir,file_tag)
    joblib.dump(trainer.__dict__,str(f"{file_dir}.pkl"))
    pd.set_option("display.max_columns",None)
    print(f"model saved to {file_dir}")

print(f"execute the command 'save_model()' to save the following model: {file_tag}")
train_runtime = train_end-train_start
print(f"\ncpu: {train_runtime}")