In [10]:
import sys
from pathlib import Path
import numpy as np

current_dir = Path.cwd()
if str(current_dir) not in sys.path:
    sys.path.insert(0, str(current_dir))

print(current_dir)

from src import (
    DataConfig, DataLoader, ModelingStrategy, ReleaseManager, BenchmarkPipeline, create_config
)

np.random.seed(42)

/Users/ivn/Documents/PhD/Transformer Research/Code/Benchmarking


In [11]:
data_config = DataConfig(
    mapping_path = 'data/feature_mapping_train.pkl',
    features_path = "/Users/ivn/Documents/PhD/Transformer Research/Code/Benchmarking/data/db_snapshot_offsite/train_data/processed/train_data_features.feather",
    target_path = "/Users/ivn/Documents/PhD/Transformer Research/Code/Benchmarking/data/db_snapshot_offsite/train_data/train_data/train_data_target.feather",
    split_date="2016-01-01",
)

sku_tuples=[(1912, 7), (377, 1), (715, 7)]

quantiles = [0.5, 0.7, 0.9, 0.95, 0.99]
pipeline = BenchmarkPipeline(data_config)

In [12]:
loader = DataLoader(data_config)
data = loader.load_data()


In [13]:
results_lightning_std = pipeline.run_experiment(
    sku_tuples=sku_tuples,
    modeling_strategy=ModelingStrategy.INDIVIDUAL,
    model_type="xgboost_quantile",
    quantile_alphas=quantiles,
    hyperparameters = {
        "eta": 0.05,
        "max_depth": 8,
        "min_child_weight": 20,
        "subsample": 0.8,
        "colsample_bytree": 0.7,
        "gamma": 1.0,   
        "lambda": 10.0,
        "alpha": 1.0,
        "tree_method": "hist",
        "n_estimators": 500
},
    experiment_name="xgb_quantile_test",
    evaluate_on_test=True
)

100%|██████████| 3/3 [00:00<00:00,  3.81it/s]
Training models: 100%|██████████| 3/3 [00:05<00:00,  1.74s/it]


In [9]:
sample_result = results_lightning_std.training_results[6]
print(f"Model type: {sample_result.model_type}")
print(f"Strategy: {sample_result.modeling_strategy.value}")
print(f"SKU tuples: {sample_result.sku_tuples}")
print(f"Quantile level: {sample_result.quantile_level}")
if sample_result.performance_metrics:
    print(f"quantile_score: {sample_result.performance_metrics.get('quantile_score', 'N/A')}")
print(f"number of models trained: {len(results_lightning_std.training_results)}")

Model type: xgboost_quantile
Strategy: individual
SKU tuples: [(377, 1)]
Quantile level: 0.7
quantile_score: [0.36256971 0.90238266 0.62469134 0.26885844 0.71072459 1.07652683
 0.75566826 0.05417506 0.3071763  0.9561327  0.71911755 1.05563397
 0.26544808 0.94752131 0.40566859 0.64334979 0.58591722 0.42498215
 0.59437351 0.53652195 0.15656685 0.56167184 0.03336295 0.45888019
 0.49446731 0.61494155 0.10506878 0.60145183 0.23541781 0.8907526
 0.4537149  0.68104734 0.46762376 0.85188689 0.47231523 0.58349293
 0.61812701 0.40038414 1.28818159 0.08254652 0.32145138 0.79792199
 0.23931804 0.50033501 0.92318059 0.62207422 0.91969829 0.32377224
 0.14424577 0.1036623  1.12296824 0.7090014  0.38530471 0.7006259
 0.38586388 0.3701324  0.48179179 0.02007762 1.20431912 0.75701065
 0.06447744 0.33362117 0.9532495  0.14417267 0.09678111 0.86001134
 0.18045169 1.13758507 0.51523751 0.88999229 0.46002831 0.74253631
 0.52263154 0.59260479 0.48647894 0.58572702 0.98039246 0.47291397
 0.71521981 0.73186812

In [37]:
release_manager = ReleaseManager()
output_dir = Path("./xgb_releases_2")
release_path = release_manager.create_complete_release(
    experiment_results=results_lightning_std,  # Your ExperimentResults from pipeline
    base_output_dir=output_dir
  )