In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import logging

from lmkgroup_ds_utils.db.connector import DB

# prep work

In [None]:
class args:
    company = 'GL'
    db_pw = ''
    local = True
    write_to_db = False
    save_output_locally = True

company = args.company
local = args.local
save_output_locally = args.save_output_locally
db_pw = args.db_pw


In [None]:
from paths import CONFIG_DIR
from utils import read_yaml
db_configs = read_yaml(
    file_name="db",
    directory=CONFIG_DIR
)

read_db_name = db_configs["read"]["db_name"]
read_db_env = db_configs["read"]["env"]
read_db_settings_path = db_configs["read"]["db_settings_path"]

company_configs = read_yaml(
    file_name="company_configs",
    directory=CONFIG_DIR
)
company_config = company_configs[company]

In [None]:
logging.info("Connecting to db...")
read_db = DB(
    local=local,
    db_name=read_db_name,
    db_settings_path=read_db_settings_path,
    password=db_pw,
    env=read_db_env
)

# Data

In [None]:
from orders_forecasting.pipeline import get_data_from_db
df_order_history, df_estimations_total, df_estimations_dishes = get_data_from_db(
    read_db=read_db, company_config=company_config
)

In [None]:
import pandas as pd
from orders_forecasting.pipeline import process_and_create_dataset
from orders_forecasting.pipeline import run_model
prediction_date = pd.to_datetime("2023-11-01")
# prediction_date = None

target_cols = ["num_total_orders", "num_dishes_orders", "perc_dishes_orders"]

if company == "RT":
    is_augment_estimation = True
else:
    is_augment_estimation = False
df_preds = []
for target_col in target_cols:
    df_train, df_test, df_holdout = process_and_create_dataset(
        df_order_history=df_order_history,
        df_estimations_total=df_estimations_total,
        df_estimations_dishes=df_estimations_dishes,
        is_augment_estimation=is_augment_estimation,
        company_config=company_config,
        prediction_date=prediction_date,
        target_col=target_col,
    )
    train_params = read_yaml(file_name="train", directory=CONFIG_DIR)
    df_test, _, df_holdout_pred, _ = run_model(
        company=company,
        df_train=df_train,
        df_holdout=df_holdout,
        df_test=df_test,
        target=target_col,
        prediction_date=prediction_date,
        **train_params,
    )
    df_preds.append(df_test)

In [None]:
from orders_forecasting.pipeline import process_final_predictions
df_final = process_final_predictions(
    df_pred_total_orders=df_preds[0],
    df_pred_dishes_orders=df_preds[1],
    df_pred_dishes_perc=df_preds[2],
    prediction_date=prediction_date,
    is_return_additional_col=True
)

In [None]:
df_final

# Visualization

In [None]:
df_train.info()

In [None]:
from orders_forecasting.visualisation import plot_train_test_pred
from paths import PROJECT_DIR
fig = plot_train_test_pred(
    df_train=df_train, df_val=df_holdout, df_test=df_test, target=target
)
test_start_wk = (df_test["year"] * 100 + df_test["week"]).min()
n_week = (df_test["year"] * 100 + df_test["week"]).nunique()
html_file_name = f"train_test_pred_{test_start_wk}_{n_week}_wks.html"
html_file_dir = PROJECT_DIR
fig.write_html(f"{PROJECT_DIR}/{html_file_name}")

In [None]:
from lmkgroup_ds_utils.azure.storage import BlobConnector
datalake_handler = BlobConnector(
    local=local,
)

In [None]:
datalake_configs = read_yaml(
    file_name="datalake",
    directory=CONFIG_DIR
)

In [None]:
datalake_configs

In [None]:
from orders_forecasting.pipeline import save_and_upload_results_to_datalake
save_and_upload_results_to_datalake(
    datalake_handler=datalake_handler,
    datalake_configs=datalake_configs,
    company=company,
    df_prediction=df_test,
    visualisation=fig,
    local_file_dir=PROJECT_DIR
)