In [1]:
import os
import sys
import time
from datetime import datetime
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
from model_settings import ms
import matplotlib.pyplot as plt
os.chdir(os.path.abspath(str(Path())))
pd.set_option("display.max_columns",None)
pd.options.display.float_format = '{:.5f}'.format
notebook_dir = str(Path().resolve())
sys.path.append(os.path.join(notebook_dir,'historical_data','historical_generation'))
train_start = time.time()
train_start_datetime = datetime.fromtimestamp(train_start)
train_start_tag = train_start_datetime.strftime('%c')
print("\n"+"#"*18+"\n# training start #\n"+
      "#"*18+"\n"+f"\n{train_start_tag}\n")


##################
# training start #
##################

Fri Oct 18 23:03:20 2024



# Loading data

In [2]:
dataset = pd.read_csv([file for file in os.listdir(Path().resolve()) if file.endswith('.csv')][0]).iloc[:,1:]

# Preprocessing

In [3]:
import convsklearn
categorical_features = ['averaging_type', 'w']
numerical_features = [
    'spot_price',
    'strike_price',
    'days_to_maturity',
    'risk_free_rate',
    'dividend_rate',
    'kappa',
    'theta',
    'rho',
    'eta',
    'v0',
    'fixing_frequency',
    'n_fixings',
    'past_fixings'
]

target_name = 'asian_price'
trainer = convsklearn.convsklearn(categorical_features = categorical_features, numerical_features = numerical_features, target_name = target_name)
help(trainer)

Help on convsklearn in module convsklearn.convsklearn object:

class convsklearn(builtins.object)
 |  convsklearn(target_name, numerical_features, categorical_features, n_layers=None, random_state=None, max_iter=1000, solver='sgd', alpha=0.0001, learning_rate='adaptive', activation_function='relu', rf_n_estimators=50, rf_min_samples_leaf=2000)
 |
 |  a proprietary class of convenience wrappers for sklearn
 |
 |  Methods defined here:
 |
 |  __init__(self, target_name, numerical_features, categorical_features, n_layers=None, random_state=None, max_iter=1000, solver='sgd', alpha=0.0001, learning_rate='adaptive', activation_function='relu', rf_n_estimators=50, rf_min_samples_leaf=2000)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |
 |  get_train_test_arrays(self, train_data, test_data, feature_set=None, target_name=None)
 |
 |  plot_model_performance(self, df, X_name, Y_name, xlabel, ylabel, runtime, title)
 |
 |  preprocess(self)
 |
 |  run_dnn(self, preprocess

## Train/test split

In [4]:
for col in trainer.numerical_features:
    dataset[col] = pd.to_numeric(dataset[col],errors='coerce')

In [5]:
unique_dates = dataset['calculation_date'].sort_values(
    ascending=True).unique().tolist()
filter_date = unique_dates[int(0.85*len(unique_dates))]

test_data = dataset[dataset['days_to_maturity']==1]
train_data = dataset[dataset['days_to_maturity']!=1]

In [6]:
arrs = trainer.get_train_test_arrays(
    train_data, test_data,feature_set = trainer.feature_set, target_name=trainer.target_name)
preprocessor = trainer.preprocess()
train_X = arrs['train_X'] 
train_y = arrs['train_y']
test_X = arrs['test_X']
test_y = arrs['test_y']
# train_startdate = train_data.describe()['calculation_date']['min'].strftime('%A, %Y-%m-%d')
# train_enddate = train_data.describe()['calculation_date']['max'].strftime('%A, %Y-%m-%d')
# print(f"\ntraining from\n{train_startdate}\nto\n{train_enddate}\n")


# Training

In [7]:
model_fit, runtime, specs = trainer.run_dnn(preprocessor,train_X,train_y)
train_end = time.time()
train_runtime = train_end-train_start
print(f"\ncpu: {train_runtime}")


training...

Deep Neural Network
hidden layers sizes: (15, 15, 15)
learning rate: adaptive
activation: relu
solver: sgd
alpha: 0.0001

cpu: 0.41223669052124023


# Testing

In [8]:
test_data.describe()

Unnamed: 0,difference,vanilla,asian_price,spot_price,strike_price,risk_free_rate,dividend_rate,fixing_frequency,n_fixings,past_fixings,kappa,theta,rho,eta,v0,days_to_maturity,moneyness
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,-5e-05,15.0279,15.02796,100.0,100.0,0.04,0.0,1.0,1.0,0.0,0.8,0.008,0.2,0.1,0.005,1.0,0.08
std,0.00076,20.49466,20.4945,0.0,36.27381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.43808
min,-0.00168,0.0,0.0,100.0,50.0,0.04,0.0,1.0,1.0,0.0,0.8,0.008,0.2,0.1,0.005,1.0,-0.5
25%,-0.0007,0.0,0.0,100.0,75.0,0.04,0.0,1.0,1.0,0.0,0.8,0.008,0.2,0.1,0.005,1.0,-0.25
50%,0.0,0.14772,0.14817,100.0,100.0,0.04,0.0,1.0,1.0,0.0,0.8,0.008,0.2,0.1,0.005,1.0,0.0
75%,0.00013,25.00822,25.0093,100.0,125.0,0.04,0.0,1.0,1.0,0.0,0.8,0.008,0.2,0.1,0.005,1.0,0.33333
max,0.00163,50.00548,50.00636,100.0,150.0,0.04,0.0,1.0,1.0,0.0,0.8,0.008,0.2,0.1,0.005,1.0,1.0


In [9]:
insample, outsample, errors = trainer.test_prediction_accuracy(
        model_fit,
        test_data,
        train_data
        )


in sample:
     RSME: 4.402886141232166
     MAE: 3.6791417199574923

out of sample:
     RSME: 4.6063097752321776
     MAE: 3.7393983521613365


# Saving