In [73]:
import numpy as np
import pandas as pd

from sklearn.datasets import make_multilabel_classification
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from names import Columns

RANDOM_STATE = 12
N_PRODUCTS = 10
products_npv = [10000 + i * 500 for i in range(N_PRODUCTS)]
products_names = [f"product_{i}" for i in range(N_PRODUCTS)]

In [62]:
X, y = make_multilabel_classification(
    n_labels=2,
    random_state=RANDOM_STATE,
    n_classes=N_PRODUCTS,
    n_samples=100,
    allow_unlabeled=False
)

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=RANDOM_STATE)

In [64]:
classif = OneVsRestClassifier(RandomForestClassifier())
classif.fit(X_train, y_train);



In [65]:
predictions_test = classif.predict_proba(X_test)

In [161]:
df_recommendations = pd.DataFrame(data=predictions_test, columns=products_names).reset_index(names=Columns.Query)
df_interactions = pd.DataFrame(data=y_test, columns=products_names).reset_index(names=Columns.Query)

df_recommendations = pd.melt(df_recommendations, id_vars=Columns.Query, var_name=Columns.Item, value_name=Columns.Score).sort_values(by=Columns.Query)
df_interactions = (
    pd.melt(df_interactions, id_vars=Columns.Query, var_name=Columns.Item, value_name=Columns.Weight)
    .query(f"{Columns.Weight} == 1")
    .drop(columns=Columns.Weight)
)

Given $rel^Y_{u,j}$ as a ground truth relevance variable that indicates whether the item recommended at position $j$ in the ordered ranking $Y_{u,k}$. 

# Revenue at k

$$\sum_{u\in U}\sum^{k}_{j=1}rel^{Y}_{u,j}\cdot p_{j}$$

In [165]:
def get_top_k_recommendations(recommendations: pd.DataFrame, k: int) -> pd.DataFrame:
    rnk = recommendations.groupby(Columns.Query)[Columns.Score].rank(method="dense", ascending=False)
    at_k_indeces = rnk[rnk <= k].index
    return recommendations[recommendations.index.isin(at_k_indeces)]


def merge_recommendations_interactions(
    recommendations: pd.DataFrame,
    interactions: pd.DataFrame
) -> pd.DataFrame:
    merged = pd.merge(
        recommendations,
        interactions.assign(relevant=True),
        on=Columns.QueryItem,
        how="left"
    )
    return merged

In [166]:
df_recommendations

Unnamed: 0,query_column,item_column,score
0,0,product_0,0.00
125,0,product_5,0.61
100,0,product_4,0.00
175,0,product_7,0.03
75,0,product_3,0.11
...,...,...,...
74,24,product_2,0.61
49,24,product_1,0.25
24,24,product_0,0.21
224,24,product_8,0.44


In [164]:
get_top_k_recommendations(df_recommendations, k=3)

Unnamed: 0,query_column,item_column,score
125,0,product_5,0.61
200,0,product_8,0.54
150,0,product_6,0.53
126,1,product_5,0.52
151,1,product_6,0.67
...,...,...,...
173,23,product_6,0.65
174,24,product_6,0.44
99,24,product_3,0.27
74,24,product_2,0.61
