In [173]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_multilabel_classification
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from names import Columns

RANDOM_STATE = 12
N_PRODUCTS = 10
products_names = [f"product_{i}" for i in range(N_PRODUCTS)]
products_npv = {f"product_{i}": 10000 + i * 500 for i in range(N_PRODUCTS)}

In [62]:
X, y = make_multilabel_classification(
    n_labels=2,
    random_state=RANDOM_STATE,
    n_classes=N_PRODUCTS,
    n_samples=100,
    allow_unlabeled=False
)

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=RANDOM_STATE)

In [64]:
classif = OneVsRestClassifier(RandomForestClassifier())
classif.fit(X_train, y_train);



In [65]:
predictions_test = classif.predict_proba(X_test)

In [161]:
df_recommendations = pd.DataFrame(data=predictions_test, columns=products_names).reset_index(names=Columns.Query)
df_interactions = pd.DataFrame(data=y_test, columns=products_names).reset_index(names=Columns.Query)

df_recommendations = pd.melt(df_recommendations, id_vars=Columns.Query, var_name=Columns.Item, value_name=Columns.Score).sort_values(by=Columns.Query)
df_interactions = (
    pd.melt(df_interactions, id_vars=Columns.Query, var_name=Columns.Item, value_name=Columns.Weight)
    .query(f"{Columns.Weight} == 1")
    .drop(columns=Columns.Weight)
)

Given $rel^Y_{u,j}$ as a ground truth relevance variable that indicates whether the item recommended at position $j$ in the ordered ranking $Y_{u,k}$. 

# Value at k

$$\sum_{u\in U}\sum^{k}_{j=1}rel^{Y}_{u,j}\cdot p_{j}$$

In [175]:
def get_top_k_recommendations(recommendations: pd.DataFrame, k: int) -> pd.DataFrame:
    rnk = recommendations.groupby(Columns.Query)[Columns.Score].rank(method="first", ascending=False)
    at_k_indeces = rnk[rnk <= k].index
    return recommendations[recommendations.index.isin(at_k_indeces)]


def merge_recommendations_interactions(
    recommendations: pd.DataFrame,
    interactions: pd.DataFrame
) -> pd.DataFrame:
    merged = pd.merge(
        recommendations,
        interactions.assign(relevant=True),
        on=Columns.QueryItem,
        how="left"
    )
    return merged

In [176]:
top_recommendations = get_top_k_recommendations(df_recommendations, k=3)

In [177]:
merged_recommendations_interactions = merge_recommendations_interactions(top_recommendations, df_interactions)

In [183]:
merged_recommendations_interactions[Columns.Item]

0     product_5
1     product_8
2     product_6
3     product_5
4     product_6
        ...    
70    product_5
71    product_6
72    product_6
73    product_2
74    product_8
Name: item_column, Length: 75, dtype: object

In [186]:
merged_recommendations_interactions["revenue"] = merged_recommendations_interactions["relevant"] * merged_recommendations_interactions[Columns.Item].map(products_npv)

In [191]:
revenue_by_query = merged_recommendations_interactions.groupby(Columns.Query)["revenue"].sum()

In [193]:
revenue_by_query.sum()

515500

# Profit-At-Hit

PAH@k indicates the overall profit generated by the recommendation per user divided by the number of items sold;

In [198]:
revenue_by_query.sum() / merged_recommendations_interactions["relevant"].sum() / merged_recommendations_interactions[Columns.Query].nunique()

502.9268292682927