In [None]:
import yaml
import numpy as np
from sklearn.model_selection import ParameterSampler
from pathlib import Path
import os
from copy import deepcopy

In [None]:
# from vn1.data_loading import load_data
# sales, price = load_data(1,"../data/raw")

In [None]:
sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51]
sales_rolling_mean = [4,8,12,26,52]
price_lags = [0,1,2,3]
price_rolling_mean = [4,8,12,26,52]

sales_by_warehouse_lags = [0,1,2,3]
sales_by_client_lags = [0,1,2,3]

date_features = ["year", "month", "day"]
static_features = [
    {"name": "Client", "categorical": True},
    {"name": "Warehouse", "categorical": True},
    {"name": "Product", "categorical": True},
]
# check if product not categorical or no product is actually better

In [None]:
BASE_CONFIG = {
    "engine_params": {
        "verbose": 0,
    },
    "multi_horizon": True,
    "preprocessing_config": {
        "normalize_price": False,
        "normalize_sales": False,
        "static_feature_list": static_features,
        "date_features": date_features,
        "data_feature_eng_list": [
            {
                "source": "sales",
                "groupby": None,
                "feature_eng_list": [
                    {"kind": "rolling", "params": {"window": w, "statistic": "mean"}} for w in sales_rolling_mean
                ] + [
                    {"kind": "lag", "params": {"lag": l}} for l in sales_lags
                ],
            },
            {
                "source": "sales",
                "groupby": ["Warehouse"],
                "feature_eng_list": [
                    {"kind": "lag", "params": {"lag": l}} for l in sales_by_warehouse_lags
                ],
            },
            {
                "source": "sales",
                "groupby": ["Client"],
                "feature_eng_list": [
                    {"kind": "lag", "params": {"lag": l}} for l in sales_by_client_lags
                ],
            },
            {
                "source": "price",
                "groupby": None,
                "feature_eng_list": [
                    {"kind": "rolling", "params": {"window": w, "statistic": "mean"}} for w in price_rolling_mean
                ] + [
                    {"kind": "lag", "params": {"lag": l}} for l in price_lags
                ],
            },
        ],
    },
}

In [None]:
# with open("../config.yaml","w") as f:
#     yaml.safe_dump(BASE_CONFIG,f)

In [None]:
# Define the hyperparameter space
param_grid = {
    'num_leaves': list(range(20, 150)),
    'max_depth': list(range(3, 15)),
    'learning_rate': [float(v) for v in np.logspace(-4, 0, 50)],
    'n_estimators': list(range(50, 500)),
    'min_child_samples': list(range(5, 100)),
    'subsample': [0.5 + 0.05 * i for i in range(10)],
    'colsample_bytree': [0.5 + 0.05 * i for i in range(10)]
}

# Initialize ParamSampler
n_iter = 100
param_sampler = ParameterSampler(param_grid, n_iter=n_iter, random_state=42)

# Generate the combinations
combinations = list(param_sampler)

In [None]:
path = Path("../configs/1_hparams_search")
os.makedirs(path)

for i, params in enumerate(combinations):
    config = deepcopy(BASE_CONFIG)
    config["engine_params"].update(**params)
    i = str(i)
    i = "0"* (3 - len(i)) + i
    with open(path / f"{i}_config.yaml","w") as f:
        yaml.safe_dump(config,f)