In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from constants.companies import get_company_by_code
company_code = "AMK"
company_id = get_company_by_code(company_code).company_id

In [3]:
from reci_pick.train.configs.train_configs import get_company_train_configs
company_configs = get_company_train_configs(company_code=company_code)
start_yyyyww = company_configs.train_start_yyyyww

In [None]:
from reci_pick.predict.data import get_dataframes

df_recipes, df_menu_recipes, df_order_history, df_active_users = get_dataframes(
    company_id=company_id,
    start_yyyyww=start_yyyyww,
    env="dev"
)


In [6]:
from reci_pick.predict.configs.predict_configs import get_company_predict_configs
company_predict_configs = get_company_predict_configs(company_code=company_code)

In [None]:
from reci_pick.preprocessing import preprocess_recipes_dataframe
import mlflow
mlflow.set_registry_uri("databricks-uc")
preprocessor_uri = company_predict_configs.preprocessor_uri["dev"]
recipe_preprocessor = mlflow.sklearn.load_model(preprocessor_uri)
df_recipes_processed, _ = preprocess_recipes_dataframe(
    df_recipes=df_recipes.drop(columns=["allergen_id_list"]),
    company_configs=company_predict_configs,
    fitted_recipe_transformer=recipe_preprocessor,
)

In [8]:
from reci_pick.train.training_data import get_recipe_embeddings
id_to_recipe_embedding_lookup, id_to_name_lookup = get_recipe_embeddings(
    df_recipes_processed=df_recipes_processed,
    recipe_numeric_features=company_configs.recipe_numeric_features
)

In [None]:
from reci_pick.predict.predict_data import get_user_embeddings
user_embedding_dict = get_user_embeddings(
    df_order_history=df_order_history,
    df_user_preferences=df_active_users,
    id_to_recipe_embedding_lookup=id_to_recipe_embedding_lookup,
    top_n_per_concept=8,
    top_n_per_user=5,
    look_back_weeks=24,
    pooling_method="mean"
)

In [10]:
from reci_pick.predict.predict_data import get_menu_to_predict
menus_to_predict = get_menu_to_predict(
    df_menu_recipes=df_menu_recipes,
    prediction_date="",
    cut_off_day=2,
    num_weeks=4
)
recipes_to_predict = menus_to_predict["main_recipe_id"].unique()

In [None]:
from reci_pick.postprocessing import map_new_recipes_with_old
df_similar_recipes = map_new_recipes_with_old(
    df_menu_recipes=df_menu_recipes,
    df_menus_to_predict=menus_to_predict,
    id_to_embedding_lookup=id_to_recipe_embedding_lookup,
    id_to_name_lookup=id_to_name_lookup,
    similarity_threshold=0.91
)

df_similar_recipes

In [None]:
model_name = f'models:/dev.mloutputs.reci_pick_{company_code.lower()}@champion'
trained_model = mlflow.tensorflow.load_model(model_name)

In [13]:
import numpy as np
user_id_list = np.array(list(user_embedding_dict.keys()))
if (company_code == "AMK") or (company_code == "RT"):
    num_chunks = 10
else:
    num_chunks = 20
user_chunks = np.array_split(user_id_list, num_chunks)

In [14]:
start_menu_yyyyww = menus_to_predict["menu_yyyyww"].min()
end_menu_yyyyww = menus_to_predict["menu_yyyyww"].max()

In [None]:
menus_to_predict

In [None]:
from reci_pick.train.model import predict_recipe_scores
from reci_pick.predict.recommendations import make_top_k_menu_recommendations, modify_scores_for_recommendations

for i, users in enumerate(user_chunks[:2]):
    print(i)
    df_scores = predict_recipe_scores(
        recipe_ids_to_predict=recipes_to_predict,
        user_billing_agreements=users,
        user_embeddings_pooled_dict=user_embedding_dict,
        id_to_embedding_lookup=id_to_recipe_embedding_lookup,
        model=trained_model
    )
    df_scores_modified = modify_scores_for_recommendations(
        df_scores=df_scores,
        df_order_history=df_order_history,
        df_similar_recipes=df_similar_recipes,
        df_taste_preferences=df_active_users,
        company_configs=company_predict_configs,
        df_recipes=df_recipes,
        df_menu_recipes=df_menu_recipes,
        df_menus_to_predict=df_menu_recipes,
    )
    n = 10
    df_top_k_recommendations = make_top_k_menu_recommendations(
        top_k=n, df_menu_scores=df_scores_modified
        )

    df_top_k_recommendations = df_top_k_recommendations.merge(
        df_recipes, how="left"
    )

    # file_path = PROJECT_DIR / "data" / "recommendations" / f"{company_code}" / f"{company_code}_{start_menu_yyyyww}_{end_menu_yyyyww}_chunk_{i}.csv"
    # df_top_k_recommendations.to_csv(file_path, index=False)


In [None]:
df_top_k_recommendations

In [None]:
df_top_k_recommendations[df_top_k_recommendations["menu_yyyyww"] == 202521][["menu_week", "recipe_name"]]

In [None]:
df_scores_modified.head()

In [None]:

from reci_pick.train.model import predict_recipe_scores
from reci_pick.postprocessing import modify_score_based_on_purchase_history, check_for_preference_violation, penalize_high_menu_occurance

users = [1350590]
score_df = predict_recipe_scores(
    recipe_ids_to_predict=recipes_to_predict,
    user_billing_agreements=users,
    user_embeddings_pooled_dict=user_embedding_dict,
    id_to_embedding_lookup=id_to_recipe_embedding_lookup,
    model=trained_model
)
df_score_modified = modify_score_based_on_purchase_history(
    score_df_exploded=score_df,
    df_order_history=df_order_history,
    bonus_factor=0.25,
    is_map_similar_recipes=True,
    df_similar_recipes=df_similar_recipes
)

df_score_modified = check_for_preference_violation(
    score_df=df_score_modified,
    df_taste_preference=df_active_users,
    df_recipes=df_recipes[["main_recipe_id", "recipe_main_ingredient_name_english", "allergen_id_list"]].drop_duplicates(subset="main_recipe_id")
)

df_score_modified = penalize_high_menu_occurance(
    df_menu_recipes=df_menu_recipes.copy(),
    df_menu_to_predict=menus_to_predict,
    df_scores=df_score_modified,
    alpha=-0.85,
    penalization_factor=0.1,
    score_col="score_modified"
)

df_menu_scores = menus_to_predict.merge(
    df_score_modified,
    how="left"
)

df_menu_scores = df_menu_scores.sort_values(
    by=["menu_yyyyww", "billing_agreement_id", "score_modified"],
    ascending=[True, True, False]
)
n = 10
df_top_n_recommendations = df_menu_scores.groupby(
    ["menu_year", "menu_week", "menu_yyyyww", "billing_agreement_id"]
).head(n)

df_top_n_recommendations = df_top_n_recommendations.merge(
    df_recipes, how="left"
)


In [None]:
df_top_n_recommendations.merge(df_recipes[["main_recipe_id", "recipe_name"]])[[
    "billing_agreement_id", "menu_yyyyww", "recipe_name", "score_modified"
]].sort_values(by=["billing_agreement_id", "menu_yyyyww"]).head(40)