In [1]:
import logging
import sys

# Configure basic logging
date_strftime_format = "%Y-%m-%y %H:%M:%S"
logging.basicConfig(
    stream=sys.stdout,
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    datefmt=date_strftime_format
)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from constants.companies import get_company_by_code
company_code = "AMK"
company_id = get_company_by_code(company_code).company_id

In [4]:
from reci_pick.train.configs.train_configs import get_company_train_configs
company_configs = get_company_train_configs(company_code=company_code)
start_yyyyww = company_configs.train_start_yyyyww


In [None]:
from reci_pick.train.data import get_dataframes

df_recipes, df_menu_recipes, df_order_history = get_dataframes(
    company_id=company_id,
    start_yyyyww=start_yyyyww,
    env="dev"
)


In [None]:
from reci_pick.preprocessing import preprocess_recipes_dataframe
df_recipes_processed, fitted_preprocessor = preprocess_recipes_dataframe(
    df_recipes=df_recipes,
    company_configs=company_configs
)

df_recipes_processed.head()

In [7]:
from reci_pick.train.training_data import get_recipe_embeddings
id_to_recipe_embedding_lookup, _, = get_recipe_embeddings(
    df_recipes_processed=df_recipes_processed,
    recipe_numeric_features=company_configs.recipe_numeric_features,
)

In [9]:
from reci_pick.preprocessing import  split_train_test
df_order_history_train, df_order_history_test, split_yyyyww = split_train_test(
    df_order_history=df_order_history,
    num_prediction_weeks=company_configs.num_validation_weeks
)

In [None]:
from reci_pick.train.training_data import get_inputs_for_training
user_embeddings_input, recipe_embeddings_input, df_train, user_embeddings_pooled_dict = get_inputs_for_training(
    df_order_history_train=df_order_history_train,
    df_menu_recipes=df_menu_recipes,
    id_to_embedding_lookup=id_to_recipe_embedding_lookup,
    pooling_method="mean",
    training_size=company_configs.training_size,
    is_pad_popular_recipes=True,
    min_recipes_per_user=5,
)

target = df_train["is_purchase"].values

In [None]:
from reci_pick.train.model import train_nn_model
from mlflow.types.schema import Schema, TensorSpec
from mlflow.models import ModelSignature, infer_signature
import numpy as np
import mlflow
import pytz
from datetime import datetime

mlflow.set_tracking_uri(f"databricks://sylvia-liu")
mlflow.end_run()
mlflow.set_experiment("/Shared/ml_experiments/reci-pick")
timezone = pytz.timezone("UTC")
timestamp_now = datetime.now(tz=timezone).strftime("%Y-%m-%d-%H:%M:%S")
run_name = f"{company_code}_{timestamp_now}"
with mlflow.start_run(run_name=run_name) as run:
    logging.info("Logging preprocessor...")
    signature_preprocessor = infer_signature(
        model_input=df_recipes.head(1), model_output=df_recipes_processed.head(1)
    )
    mlflow.sklearn.log_model(
        fitted_preprocessor,
        artifact_path="preprocessor",
        signature=signature_preprocessor
    )
    trained_model, last_loss, last_accuracy = train_nn_model(
        user_embeddings_input=user_embeddings_input,
        recipe_embeddings_input=recipe_embeddings_input,
        target=target
    )
    mlflow.log_metrics({
        "last_loss": last_loss,
        "last_accuracy": last_accuracy
    })


    input_schema = Schema(
        [
            TensorSpec(np.dtype(np.float32), (-1, user_embeddings_input.shape[1]), "user_profile"),
            TensorSpec(np.dtype(np.float32), (-1, recipe_embeddings_input.shape[1]), "recipe_profile"),
        ]
    )
    output_schema = Schema(
        [
            TensorSpec(np.dtype(np.float32), (-1, 1), "predictions"),  # Assuming the model outputs a single prediction per input
        ]
    )

    signature = ModelSignature(inputs=input_schema, outputs=output_schema)
    mlflow.tensorflow.log_model(
        model=trained_model,
        artifact_path="model",
        signature=signature
    )

    run_uuid = run.info.run_id

In [None]:
# mlflow.set_tracking_uri(f"databricks://sylvia-liu")
# mlflow.set_registry_uri("databricks-uc")
# mlflow.register_model(f"runs:/{run_uuid}/model", "dev.mloutputs.reci_pick_from_notebook")
# mlflow.register_model(f"runs:/{run_uuid}/preprocessor", registered_model_name_preprocessor)

In [None]:
# from reci_pick.paths import PROJECT_DIR
# model_name = f"{company_code}_{split_yyyyww}.keras"
# model_path = PROJECT_DIR / "data" / "models" / f"{model_name}"
# trained_model.save(model_path)

# Postprocessing & make recommendations

In [48]:
# from tensorflow import keras
# from reci_pick.paths import PROJECT_DIR
# model_path = PROJECT_DIR / "data" / "models" / f"{company_code}_202511.keras"
# trained_model = keras.models.load_model(model_path)

In [None]:
from reci_pick.train.metrics import get_recommendation_precisions
df_precision, cold_start_precision, non_cold_start_precision = get_recommendation_precisions(
    df_order_history_train=df_order_history_train,
    df_order_history_test=df_order_history_test,
    user_embeddings_pooled_dict=user_embeddings_pooled_dict,
    id_to_recipe_embedding_lookup=id_to_recipe_embedding_lookup,
    df_menu_recipes=df_menu_recipes,
    trained_model=trained_model,
    num_test_users=2000,
)

In [None]:
df_precision

In [None]:
cold_start_precision

In [None]:
non_cold_start_precision