In [None]:
import yaml
import numpy as np
from sklearn.model_selection import ParameterSampler
from pathlib import Path
import os
from copy import deepcopy
from helpers import build_data_feature_eng_list

In [None]:
BASE_CONFIG = {
    "engine_params": {
        "colsample_bytree": 0.6,
        "learning_rate": 0.033932217718953266,
        "max_depth": 11,
        "min_child_samples": 48,
        "n_estimators": 488,
        "num_leaves": 128,
        "subsample": 0.5,
        "verbose": 0,
    },
    "multi_horizon": True,
    "include_horizon_feature": True,
    "include_horizon_year": True,
    "include_horizon_month": True,
    "include_horizon_day": True,
    "preprocessing_config": {
        "normalize_price": False,
        "normalize_sales": False,
        "date_features": ["year", "month", "day"],
        "static_feature_list": [
            {"categorical": True, "name": "Client"},
            {"categorical": True, "name": "Warehouse"},
            {"categorical": True, "name": "Product"},
        ],
        "data_feature_eng_list": None, # <--- EDIT THIS PART ONLY HERE.
    },
}


In [None]:
data_configs = {
    "current_best": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_rolling_min": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [4,8,12,26,52,104],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "more_sales_lags": dict(
        sales_lags = [0,1,2,3,4,5,6,7,8,9,10,11,13,15,19,25,30,35,40,51,61,71,86,103,125,156],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "even_more_sales_lags": dict(
        sales_lags = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,19,25,30,35,40,51,61,71,86,103,125,156],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_product_lags": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [0,1,2,3],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_product_rolling_mean": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [4,8,12,26,52,104],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_product_rolling_lags_and_rolling_mean": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [0,1,2,3],
        sales_by_product_rolling_mean = [4,8,12,26,52,104],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_warehouse_rolling_mean": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [4,8,12,26,52,104],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_warehouse_rolling_mean_and_std": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [4,8,12,26,52,104],
        sales_by_warehouse_rolling_std = [4,8,12,26,52,104],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_client_rolling_mean": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [4,8,12,26,52,104],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_client_rolling_mean_and_std": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [4,8,12,26,52,104],
        sales_by_client_rolling_std = [4,8,12,26,52,104],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_more_product_lags": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_product_rolling_mean": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [4,8,12,26,52,104],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [4,8,12,26,52,104],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_more_warehouse_lags": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_more_client_lags": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_more_client_and_warehouse_lags": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
    "add_more_client_and_warehouse_and_product_lags": dict(
        sales_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_rolling_mean = [4,8,12,26,52,104],
        sales_rolling_std = [4,8,12,26,52,104],
        sales_rolling_max = [4,8,12,26,52,104],
        sales_rolling_min = [],
        # by warehouse
        sales_by_warehouse_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_by_warehouse_rolling_mean = [],
        sales_by_warehouse_rolling_std = [],
        sales_by_warehouse_rolling_max = [],
        sales_by_warehouse_rolling_min = [],
        # by client
        sales_by_client_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_by_client_rolling_mean = [],
        sales_by_client_rolling_std = [],
        sales_by_client_rolling_max = [],
        sales_by_client_rolling_min = [],
        # by product
        sales_by_product_lags = [0,1,2,3,4,5,6,7,11,15,19,25,51,103],
        sales_by_product_rolling_mean = [],
        sales_by_product_rolling_std = [],
        sales_by_product_rolling_max = [],
        sales_by_product_rolling_min = [],
    ),
}


In [None]:
OUTPUT_FOLDER = Path("../configs/5_data_eng_variations/")
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

In [None]:
for i, (config_name, params) in enumerate(data_configs.items()):
    config = deepcopy(BASE_CONFIG)
    config["preprocessing_config"]["data_feature_eng_list"] = build_data_feature_eng_list(**params)

    n = str(i)
    if len(n) == 1:
        n = f"00{n}"
    elif len(n) == 2:
        n = f"0{n}"

    with open(OUTPUT_FOLDER / f"{n}_{config_name}.yaml","w") as f:
        yaml.safe_dump(config,f)