In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from typing import Literal
from pydantic import BaseModel

class Args(BaseModel):
    company: Literal["LMK", "AMK", "GL", "RT"]
    env: Literal["dev", "prod"]
    is_running_on_databricks: bool


In [14]:
args = Args(
    company="RT",
    env="dev",
    is_running_on_databricks=False
)

is_running_on_databricks = args.is_running_on_databricks

In [15]:
from databricks_env import auto_setup_env
if is_running_on_databricks:
    auto_setup_env()

In [16]:
from dishes_forecasting.spark_context import create_spark_context
spark = create_spark_context()

In [17]:
import logging

from constants.companies import get_company_by_code

company_code = args.company
company = get_company_by_code(company_code=company_code)
company_id = company.company_id

In [18]:
from dishes_forecasting.train.configs.feature_lookup_config import feature_lookup_config_list
from dishes_forecasting.train.training_set import create_training_set
from databricks.feature_store import FeatureStoreClient

from dishes_forecasting.train.configs.train_configs import get_training_configs
train_config = get_training_configs(company_code=args.company)
if is_running_on_databricks:
    fs = FeatureStoreClient()
else:
    fs = None
training_set, df_training_pk_target = create_training_set(
    is_use_feature_store=False,
    env=args.env,
    company_id=company_id,
    train_config=train_config,
    spark=spark,
    feature_lookup_config_list=feature_lookup_config_list,
    fs=fs,
)


In [None]:
from dishes_forecasting.train.train_pipeline import train_model
from dishes_forecasting.train.configs.hyper_params import load_hyperparams
params_lgb, params_rf, params_xgb = load_hyperparams(company=args.company)

custom_pipeline, X_train, X_test, y_train, y_test, mape, mae, df_test_metrics, df_test_binned = train_model(
    training_set=training_set,
    params_lgb=params_lgb,
    params_rf=params_rf,
    params_xgb=params_xgb,
    is_running_on_databricks=is_running_on_databricks,
    env="dev",
    spark=spark,
    train_config=train_config,
    company=company
)

In [None]:
mae

In [None]:
mape

In [None]:
df_test_binned

In [56]:
import numpy as np
y_pred_transformed = np.exp(custom_pipeline.predict(X_test))

In [None]:
from dishes_forecasting.train.training_set import get_training_pk_target
df_training_target = get_training_pk_target(
    spark=spark,
    env="dev",
    company_id=company_id,
    min_yyyyww=train_config["train_start_yyyyww"],
    max_yyyyww=train_config["train_end_yyyyww"],
    is_training_set=False,
)

In [None]:
X_test

In [None]:
from dishes_forecasting.train.metrics import get_test_metrics
df_test, mae, mape, df_test_binned = get_test_metrics(
    spark=spark,
    env=args.env,
    X_test=X_test,
    company_id=company_id,
    y_pred_transformed=y_pred_transformed,
    min_yyyyww=train_config["train_start_yyyyww"],
    max_yyyyww=train_config["train_end_yyyyww"],
    is_normalized=True
)

In [None]:
mae

In [None]:
df_test[[
    "y_pred_transformed",
    "variation_ratio",
    "product_variation_qty_pred",
    "product_variation_quantity"
]].sample(50)