In [204]:
import plotly.express as px
import pandas as pd

In [205]:
transactions = pd.read_pickle("data/transactions_ranked.pkl")

In [206]:
POCs_last_order_id = (transactions
    .groupby("POC")
    ["ORDER_ID"].max()
    .reset_index()
)

POCs_last_order = (transactions
    .merge(POCs_last_order_id, on=["POC","ORDER_ID"], how="inner")
)

POCs_last_order = (POCs_last_order
    [POCs_last_order["ITEMS_PHYS_CASES"] != 0.0]
    [["POC","SKU_ID"]]
    .copy()
)

In [207]:
transactions = (transactions
    .merge(POCs_last_order_id, on=["POC","ORDER_ID"], how="left", indicator=True)
)

transactions = (transactions
    [transactions["_merge"] == "left_only"]
    .drop(columns="_merge")
)

In [208]:
to_predict = (transactions
    .groupby(["POC","SKU_ID"])
    [["ORDER_PRODUCT_RANK","ORDER_RANK"]].max()
    .reset_index()
)

to_predict_firstorder = to_predict[to_predict["ORDER_PRODUCT_RANK"] == 0].copy()
to_predict_interorder = to_predict[to_predict["ORDER_PRODUCT_RANK"] != 0].copy()

to_predict_firstorder = to_predict_firstorder[["POC","SKU_ID","ORDER_RANK"]].copy()
to_predict_firstorder["ORDER_RANK"] += 1

to_predict_interorder["ORDERS_BETWEEN_ORDERS"] = to_predict_interorder["ORDER_RANK"] - to_predict_interorder["ORDER_PRODUCT_RANK"]
to_predict_interorder["ORDERS_BETWEEN_ORDERS"] += 1
to_predict_interorder = to_predict_interorder[["POC","SKU_ID","ORDERS_BETWEEN_ORDERS"]].copy()

In [209]:
firstorder_model = pd.read_pickle("models/probabilistic/first_order_rates.pkl")
interorders_model = pd.read_pickle("models/probabilistic/inter_orders_rates.pkl")

firstorder_model = firstorder_model[["SKU_ID","ORDER_RANK","RATE_cumsum"]].copy()
interorders_model = interorders_model[["SKU_ID","ORDERS_BETWEEN_ORDERS","RATE_cumsum"]].copy()

In [210]:
prediction_firstorder = to_predict_firstorder.merge(firstorder_model, on=["SKU_ID","ORDER_RANK"], how="inner")
prediction_interorder = to_predict_interorder.merge(interorders_model, on=["SKU_ID","ORDERS_BETWEEN_ORDERS"], how="inner")

prediction_firstorder = prediction_firstorder[["POC","SKU_ID","RATE_cumsum"]]
prediction_interorder = prediction_interorder[["POC","SKU_ID","RATE_cumsum"]]

prediction = pd.concat([prediction_firstorder, prediction_interorder], ignore_index=True, axis=0)
prediction = prediction.sort_values(["POC","RATE_cumsum"], ascending=False)

prediction["SKUID_rank"] = prediction.groupby("POC")["RATE_cumsum"].rank(method="first", ascending=False)
prediction["SKUID_rank"] = prediction["SKUID_rank"].astype(int)

In [211]:
N_predictions = 5

prediction = prediction[prediction["SKUID_rank"] <= N_predictions].copy()

prediction = prediction.merge(prediction["POC"].value_counts().reset_index())
prediction = (prediction
    [prediction["count"] == N_predictions]
    .drop(columns="count")
    .copy()
)

In [212]:
prediction.to_pickle("predictions/probabilistic.pkl")

Test accuracy

In [213]:
test_accuracy = prediction.merge(POCs_last_order, on=["POC","SKU_ID"], how="left", indicator=True)
test_accuracy["_merge"].value_counts()/len(test_accuracy)

_merge
left_only     0.790126
both          0.209874
right_only    0.000000
Name: count, dtype: float64