In [1]:
import os
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from model_settings import ms
from convsklearn import asian_trainer

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn import ensemble
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer,TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV


trainer = asian_trainer
root = Path().resolve().parent.parent
models_dir = os.path.join(Path().resolve(),'trained_models','trained_models')
models = [f for f in os.listdir(models_dir) if f.find('ipynb')==-1]
for i,m in enumerate(models):
    print(f"{i}     {m}")

0     .DS_Store
1     2024_10_27 08-05-24 Deep Neural Network asian options


In [2]:
model = models[1]
model_dir = os.path.join(models_dir,model)
model_files = [f for f in os.listdir(model_dir) if f.find('ipynb')==-1 and f.find('.html')==-1]
for i,m in enumerate(model_files):
    print(f"{i}     {m}")

0     2024_10_27 08-05-24 Deep Neural Network asian options insample.csv
1     2024_10_27 08-05-24 Deep Neural Network asian options outsample.csv
2     2024_10_27 08-05-24 Deep Neural Network asian options.pkl
3     2024_10_27 08-05-24 Deep Neural Network asian options.txt


In [3]:
train_data = pd.read_csv(os.path.join(model_dir,[f for f in model_files if f.find('insample')!=-1][0])).iloc[:,1:].copy()
test_data = pd.read_csv(os.path.join(model_dir,[f for f in model_files if f. find('outsample')!=-1][0])).iloc[:,1:].copy()
arrs = trainer.get_train_test_arrays(train_data,test_data)
test_X = arrs['test_X']
test_y = arrs['test_y']
train_X = arrs['train_X']
train_y = arrs['train_y']

In [4]:
scaler = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', scaler, trainer.numerical_features),
        ('cat', OneHotEncoder(), trainer.categorical_features)
    ]
)

params = {
    "n_estimators": 500,
    "max_depth": 4,
    "min_samples_split": len(trainer.feature_set),
    "learning_rate": 0.01,
    "loss": "squared_error",
}

gbr = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', ensemble.GradientBoostingRegressor(**params))
])

gbr = TransformedTargetRegressor(
            regressor=gbr,
            transformer=Pipeline([
                ("feature_scaler", scaler),
            ])
)

In [None]:
param_grid = {
    "regressor__regressor__n_estimators": [100, 300, 500],
    "regressor__regressor__learning_rate": [0.01, 0.05, 0.1],
    "regressor__regressor__max_depth": [3, 4, 5],
    "regressor__regressor__min_samples_split": [2, 5, 10]
}

grid_search = GridSearchCV(
    estimator=gbr,
    param_grid=param_grid,
    cv=5,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1
)

grid_search.fit(train_X, train_y)
print("Best Parameters:", grid_search.best_params_)

Fitting 5 folds for each of 81 candidates, totalling 405 fits


In [None]:
gbr_fit = gbr.fit(train_X, train_y)
insample,outsample,errors = trainer.test_prediction_accuracy(gbr_fit,new_test,new_train)