In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
from constants.companies import get_company_by_code
company_code = "AMK"
company_id = get_company_by_code(company_code).company_id

In [8]:
from reci_pick.predict.configs.predict_configs import get_company_predict_configs
company_predict_configs = get_company_predict_configs(company_code=company_code)

In [None]:
from reci_pick.predict.data import get_dataframes

df_recipes, df_menu_recipes, df_order_history, df_active_users, df_concept_preferences = get_dataframes(
    company_id=company_id,
    start_yyyyww=company_predict_configs.user_profile_start_yyyyww,
    env="dev"
)

In [None]:
from reci_pick.preprocessing import preprocess_recipes_dataframe
import mlflow
mlflow.set_registry_uri("databricks-uc")
preprocessor_uri = company_predict_configs.preprocessor_uri["dev"]
recipe_preprocessor = mlflow.sklearn.load_model(preprocessor_uri)
df_recipes_processed, _ = preprocess_recipes_dataframe(
    df_recipes=df_recipes.drop(columns=[
        "allergen_preference_id_list",
        "allergen_name_combinations"
    ]),
    company_configs=company_predict_configs,
    fitted_recipe_transformer=recipe_preprocessor,
)

In [13]:
from reci_pick.train.training_data import get_recipe_embeddings
id_to_recipe_embedding_lookup, id_to_name_lookup = get_recipe_embeddings(
    df_recipes_processed=df_recipes_processed,
    recipe_numeric_features=company_predict_configs.recipe_numeric_features,
)

In [None]:
from reci_pick.predict.predict_data import get_user_embeddings
user_embedding_dict = get_user_embeddings(
    df_order_history=df_order_history,
    df_user_preferences=df_active_users,
    id_to_recipe_embedding_lookup=id_to_recipe_embedding_lookup,
    top_n_per_concept=8,
    top_n_per_user=5,
    look_back_weeks=24,
    pooling_method="mean"
)

In [None]:
from reci_pick.predict.predict_data import get_menu_to_predict
menus_to_predict = get_menu_to_predict(
    df_menu_recipes=df_menu_recipes,
    prediction_date="",
    cut_off_day=2,
    num_weeks=4
)
recipes_to_predict = menus_to_predict["main_recipe_id"].unique()

In [None]:
from reci_pick.postprocessing import map_new_recipes_with_old
df_similar_recipes = map_new_recipes_with_old(
    df_menu_recipes=df_menu_recipes,
    df_menus_to_predict=menus_to_predict,
    id_to_embedding_lookup=id_to_recipe_embedding_lookup,
    id_to_name_lookup=id_to_name_lookup,
    similarity_threshold=0.91
)


In [None]:
from reci_pick.predict.model import get_model_and_version
model_name = f'models:/dev.mloutputs.reci_pick_{company_code.lower()}@champion'
trained_model, model_version = get_model_and_version(model_uri=model_name)

In [18]:
from reci_pick.predict.predict_data import divide_users_into_chunks
users_list = list(user_embedding_dict.keys())
user_chunks = divide_users_into_chunks(user_id_list=users_list, num_chunks=company_predict_configs.num_user_chunks)

In [19]:
from reci_pick.predict.predict import prepare_meta_data_menus_predicted
from datetime import datetime
from pytz import timezone
import uuid

timestamp_prediction = datetime.now(tz=timezone("UTC")).strftime("%Y-%m-%d %H:%M:%S")
run_id = str(uuid.uuid4())

In [None]:
from reci_pick.train.model import predict_recipe_scores
from reci_pick.postprocessing import modify_score_based_on_purchase_history
from reci_pick.predict.outputs import prepare_recommendations_for_output, prepare_outputs
from reci_pick.db import append_pandas_df_to_catalog
from reci_pick.predict.recommendations import make_top_k_menu_recommendations, modify_scores_for_recommendations

# Find which chunk contains user 1350590
for i, chunk in enumerate(user_chunks):
    if 1350590 in chunk:
        user_chunk_num = i
        break
    else:
        user_chunk_num = 0

for i, users in enumerate(user_chunks[user_chunk_num:(user_chunk_num + 1)]):
    print(i)
    df_scores = predict_recipe_scores(
        recipe_ids_to_predict=recipes_to_predict,
        user_billing_agreements=users,
        user_embeddings_pooled_dict=user_embedding_dict,
        id_to_embedding_lookup=id_to_recipe_embedding_lookup,
        model=trained_model,
    )
    print("Modifying scores to reward repeated dishes...")
    df_score_modified = modify_score_based_on_purchase_history(
        score_df_exploded=df_scores,
        df_order_history=df_order_history,
        bonus_factor=company_predict_configs.repeated_purchase_bonus_factor,
        is_map_similar_recipes=company_predict_configs.is_map_similar_recipes,
        df_similar_recipes=df_similar_recipes,
    )
    df_outputs = prepare_outputs(
        df_scores=df_score_modified,
        model_version=model_version,
        identifier_col="billing_agreement_id",
        score_col="score_modified",
        company_id=company_id,
        timestamp_prediction=timestamp_prediction,
        run_id=run_id,
    )
    # append_pandas_df_to_catalog(
    #     df=df_outputs,
    #     table_name="mloutputs.reci_pick_scores",
    #     env="dev"
    # )
    df_scores_modified = modify_scores_for_recommendations(
        df_scores=df_score_modified,
        df_taste_preferences=df_active_users,
        company_configs=company_predict_configs,
        df_recipes=df_recipes,
        df_menu_recipes=df_menu_recipes,
        df_menus_to_predict=menus_to_predict,
    )
    df_topk_recommendations = make_top_k_menu_recommendations(
        top_k=20, df_menu_scores=df_scores_modified, score_col="score_modified"
    )
    df_top_k_outputs = prepare_recommendations_for_output(
        df_topk_recommendations=df_topk_recommendations,
        identifier_col="billing_agreement_id",
        score_col="score_modified",
        model_version=model_version,
        company_id=company_id,
        timestamp_prediction=timestamp_prediction,
        run_id=run_id,
    )
    # append_pandas_df_to_catalog(
    #     df=df_top_k_outputs, table_name="mloutputs.reci_pick_recommendations", env=args.env
    # )

    # file_path = PROJECT_DIR / "data" / "recommendations" / f"{company_code}" / f"{company_code}_{start_menu_yyyyww}_{end_menu_yyyyww}_chunk_{i}.csv"
    # df_top_k_recommendations.to_csv(file_path, index=False)


In [None]:
df_recipes.rename(
    columns={
        "allergen_preference_id_list": "allergen_preference_id_list_recipe",
    }
)

In [16]:
# create default scores for concept combinations
import pandas as pd
df_concept_users = pd.DataFrame(df_order_history["concept_combination_list"].drop_duplicates())
df_concept_users["billing_agreement_id"] = df_concept_users.index

In [None]:
from reci_pick.predict.predict_data import get_cold_start_users_embeddings
from reci_pick.predict.predict import prepare_concept_user_scores_for_output
from reci_pick.train.model import predict_recipe_scores
import logging
from reci_pick.db import append_pandas_df_to_catalog
logging.info("Generating recommendations for concept users....")
df_concept_users = pd.DataFrame(df_order_history["concept_combination_list"].drop_duplicates())
df_concept_users["billing_agreement_id"] = df_concept_users.index

concept_embeddings = get_cold_start_users_embeddings(
    df_non_cold_start_order_history=df_order_history,
    df_cold_start_user_preferences=df_concept_users,
    id_to_recipe_embedding_lookup=id_to_recipe_embedding_lookup,
    top_n_per_user=company_predict_configs.top_n_per_user,
    top_n_per_concept=company_predict_configs.top_n_per_concept,
    look_back_weeks=company_predict_configs.look_back_weeks,
    pooling_method=company_predict_configs.pooling_method,
)
df_scores_concept = predict_recipe_scores(
    recipe_ids_to_predict=recipes_to_predict,
    user_billing_agreements=df_concept_users["billing_agreement_id"],
    user_embeddings_pooled_dict=concept_embeddings,
    id_to_embedding_lookup=id_to_recipe_embedding_lookup,
    model=trained_model,
)
df_scores_concept_outputs = prepare_concept_user_scores_for_output(
    df_scores_concept=df_scores_concept,
    df_concept_users=df_concept_users,
    df_concept_preferences=df_concept_preferences,
    model_version=model_version,
    timestamp_prediction=timestamp_prediction,
    company_code=company_code,
    run_id=run_id
)
# append_pandas_df_to_catalog(
#     df=df_scores_concept_outputs, table_name="mloutputs.reci_pick_scores_concept_default", env="dev"
# )

In [None]:
df_menu_scores_concept = df_scores_concept.merge(menus_to_predict)
df_recs_concept = make_top_k_menu_recommendations(df_menu_scores=df_menu_scores_concept, top_k=10, score_col="score")

from reci_pick.predict.outputs import prepare_concept_recommendations
df_recs_concept_outputs = prepare_concept_recommendations(
    df_recs_concept=df_recs_concept,
    df_concept_users=df_concept_users,
    df_concept_preferences=df_concept_preferences,
    model_version=model_version,
    timestamp_prediction=timestamp_prediction,
    company_code=company_code,
    run_id=run_id
)

df_recs_concept_outputs

In [None]:
import numpy as np
df_menus_predicted = prepare_meta_data_menus_predicted(
    df_menus_predicted=menus_to_predict,
    run_id=run_id,
    timestamp_prediction=timestamp_prediction,
    company_code=company_code,
    num_users=np.array([len(i) for i in user_chunks[:2]]).sum()
)

df_menus_predicted
# append_pandas_df_to_catalog(
#     df=df_menus_predicted, table_name="mloutputs.reci_pick_scores_metadata_menus_predicted", env="dev"
# )

In [4]:
latest_recs_sql = """
with runs as (
    select
        menu_year,
        menu_week,
        run_id,
        company_id,
        created_at,
        row_number() OVER(partition by menu_year, menu_week, company_id order by created_at desc) AS row_num
    from dev.mloutputs.reci_pick_scores_metadata_menus_predicted
    where
        menu_year >= year(next_day(current_date(), 'Monday') - INTERVAL 3 DAYS)
        and menu_week >= weekofyear(current_date() + INTERVAL 1 WEEK)
),

latest_run as (
    select * from runs where row_num = 1
),

recommendations as (
    select
        company_id,
        billing_agreement_id,
        menu_year,
        menu_week,
        main_recipe_ids as main_recipe_id,
        scores as score,
        run_id,
        model_version,
        created_at
    from dev.mloutputs.reci_pick_recommendations
),

latest_recommendations as (
    select
        recommendations.*
    from recommendations
    inner join latest_run
    on recommendations.menu_year = latest_run.menu_year
        and recommendations.menu_week = latest_run.menu_week
        and recommendations.run_id = latest_run.run_id
        and recommendations.company_id = latest_run.company_id
)

select
    menu_year,
    menu_week,
    company_id,
    billing_agreement_id,
    arrays_zip(main_recipe_id, score) as recipes,
    model_version
from latest_recommendations
"""



In [1]:
sylvia_top_10_recs = """
    with top_10_recs as (
        select
            company_id,
            billing_agreement_id,
            menu_year,
            menu_week,
            slice(main_recipe_ids, 1, 10) as recs,
            slice(scores, 1, 10) as scores,
            created_at
        from dev.mloutputs.reci_pick_recommendations
        where billing_agreement_id = 1350590
    ),

    zipped as (
        select
            company_id,
            billing_agreement_id,
            menu_year,
            menu_week,
            explode(arrays_zip(recs, scores)) as rec_score,
            created_at
        from top_10_recs
    ),

    exploded as (
        select
            company_id,
            billing_agreement_id,
            menu_year,
            menu_week,
            rec_score.recs as main_recipe_id,
            rec_score.scores as score,
            created_at
        from zipped

    ),

    fact_menus as (
        select
            company_id,
            menu_year,
            menu_week,
            recipe_id,
            fk_dim_recipes
        from
            prod.gold.fact_menus
            where portion_id = 2 -- 4 portions
            and is_dish
    ),
    valid_recipes as (
        select
            fk_dim_recipes
    from exploded
    inner join
        fact_menus
    on fact_menus.menu_week = exploded.menu_week
        and fact_menus.menu_year = exploded.menu_year
        and fact_menus.company_id = exploded.company_id

    ),

    recipes_in_menu as (
        select
            distinct
            main_recipe_id,
            recipe_name
        from prod.gold.dim_recipes
        inner join valid_recipes
        on valid_recipes.fk_dim_recipes = dim_recipes.pk_dim_recipes
    ),

    final as (
        select
            exploded.*,
            recipe_name
        from exploded
        left join recipes_in_menu
        on exploded.main_recipe_id = recipes_in_menu.main_recipe_id

    )

    select * from final
    order by menu_year, menu_week,score desc
"""

In [None]:
from catalog_connector import connection

df_sylvia_top_10_recs = connection.sql(
    sylvia_top_10_recs
).toPandas()