### init environment

In [219]:
# import torch
import numpy as np
from functools import partial

In [339]:
good_count = 20        # общее кол-во товаров
user_count = 100         # кол-во пользователей для которых генерим данные
recommend_count = 10    # кол-во генерируемых рекомендаций для каждого пользователя
price_range = (199, 2490)

# генерируем проверочные данные для заданий
rnd = np.random.default_rng(11)
goods = np.arange(good_count) + 50      # список id существующих товаров
prices = np.round(rnd.random(good_count) * (max(price_range) - min(price_range)) + min(price_range), 2)     # цены товаров

recommends = []         # списки рекомендаций для пользователей
rec_prices = []         # списки цен рекомендованных товаров
boughts = []            # список покупок
b_prices = []           # список цен покупок
for _ in range(user_count):
    indexes = rnd.choice(goods.size, size=recommend_count, replace=False)    # индексы рекомендаций
    recommends.append(goods[indexes])
    rec_prices.append(prices[indexes])

    boughts_count = rnd.integers(good_count) + 1
    indexes = rnd.choice(goods.size, size=boughts_count, replace=False)
    boughts.append(goods[indexes])
    b_prices.append(prices[indexes])

In [338]:
rnd.choice(goods.size, size=(user_count, recommend_count), replace=(True, False), axis=0)

array([[19,  9, 17,  1, 14,  5,  5, 12, 12,  2],
       [ 3,  4, 19,  3, 14,  1,  8,  4,  9, 19],
       [ 7,  2, 17, 15, 12,  0,  2, 19,  1, 17],
       [15,  3,  5, 12,  8, 17, 12,  6,  5,  7],
       [15,  9, 14,  7,  1,  1, 11, 18, 18,  7],
       [13, 10,  4, 11,  5, 19, 17, 18, 15, 16],
       [ 5,  6, 10, 13,  6, 17, 16, 15,  1,  8],
       [ 0,  8, 19,  8,  4, 12,  4,  3, 16,  9],
       [17, 13, 11, 10,  3,  4,  4,  7,  3, 10],
       [ 0,  3,  0,  6,  8,  9, 11,  5,  2,  4],
       [ 0,  1,  3, 19,  2,  7,  1, 13,  5, 14],
       [ 5, 11,  2,  0,  2,  7,  0,  0,  9, 15],
       [ 1, 16, 12, 16, 18,  4,  4,  5,  4,  1],
       [14, 11, 10, 17,  1, 14, 18, 11,  0,  1],
       [11, 12,  4,  2, 15, 16,  2, 18, 10, 17],
       [ 9,  8,  8, 14,  9,  7,  8,  6,  5, 17],
       [ 9,  0,  2, 11,  2,  2, 11,  8,  9, 19],
       [11,  0, 10, 10, 12,  1,  5, 10, 19, 12],
       [ 5, 16,  5,  3,  5, 18,  0, 15, 19, 15],
       [16,  4, 19,  1,  9,  9, 16, 17, 15,  4],
       [ 4,  4, 10, 

### 1. hit rate at k

In [222]:
def hit_rate(recommended_list, bought_list):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list)
    hit_rate = (flags.sum() > 0) * 1
    return hit_rate


def hit_rate_at_k(recommended_list, bought_list, k=5):
    """ Hit rate@k = (был ли хотя бы 1 релевантный товар среди топ-k рекомендованных) """
    # с использованием numpy
    flags = np.isin(bought_list, recommended_list[:k])
    return (flags.sum() > 0) * 1

    # без использования numpy
    # return (len(set(bought_list) & set(recommended_list[:k])) > 0) * 1

In [340]:
# check
user = 0
val = hit_rate_at_k(recommends[user], boughts[user], 5)
print(f'Hit rate@k value (k=5): {val}')

Hit rate@k value (k=5): 1


In [224]:
# также можно в hit_rate() как recommended_list передавать нужный slice:
user = 0
val = hit_rate(recommends[user][:5], boughts[user])
print(f'Hit rate@k value (k=5): {val}')

Hit rate@k value (k=5): 1


### money precision at k

In [225]:
def money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5):
    """ Доля дохода по рекомендованным объектам
    :param recommended_list - список id рекомендаций
    :param bought_list - список id покупок
    :param prices_recommended - список цен для рекомендаций
    """
    flags = np.isin(recommended_list[:k], bought_list)
    prices = np.array(prices_recommended[:k])
    return flags @ prices / prices.sum()

In [348]:
# check
user = 10
money_precision_at_k(recommends[user], boughts[user], rec_prices[user], k=5)

0.6854211885678256

In [349]:
recommends[user], boughts[user], rec_prices[user]

(array([53, 58, 67, 68, 51, 62, 63, 61, 57, 59]),
 array([58, 66, 63, 50, 57, 51, 61, 55, 56, 67, 59, 62, 52, 64, 65, 54, 60]),
 array([ 264.73, 2371.62, 1372.87, 2070.14, 1342.85, 1717.57,  829.73,
        1370.59,  496.31, 1623.74]))

### recall at k

In [227]:
def recall_at_k(recommended_list, bought_list, k=5):
    """ Recall on top k items """
    flags = np.isin(bought_list, recommended_list[:k])
    return flags.sum() / len(bought_list)

In [347]:
# check
user = 10
recall_at_k(recommends[user], boughts[user], k=5)

0.17647058823529413

### money recall at k

In [229]:
def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    """ Доля дохода по релевантным рекомендованным объектам
    :param recommended_list - список id рекомендаций
    :param bought_list - список id покупок
    :param prices_recommended - список цен для рекомендаций
    :param prices_bought - список цен покупок
    """
    flags = np.isin(recommended_list[:k], bought_list)      # get recommend to bought matches
    prices = np.array(prices_recommended[:k])               # get prices of recommended items
    return flags @ prices / np.sum(prices_bought)

In [350]:
# check
user = 10
money_recall_at_k(recommends[user], boughts[user], rec_prices[user], b_prices[user], k=5)

0.23424252219100425

### map at k

In [231]:
def precision_at_k(recommended_list, bought_list, k=5):
    flags = np.isin(bought_list, recommended_list[:k])
    return flags.sum() / k

def ap_k(recommended_list, bought_list, k=5):
    # переработано
    flags = np.isin(recommended_list, bought_list)
    if sum(flags) == 0:
        return 0

    sum_ = 0
    for i in range(0, k-1):
        if flags[i]:
            sum_ += precision_at_k(recommended_list, bought_list, k=i+1)
    result = sum_ / sum(flags)
    return result

    # func = partial(precision_at_k, recommended_list, bought_list)
    # rel_items = np.arange(1, k + 1)[flags[:k]]                  # получаем номера релевантных объектов
    # return np.sum(list(map(func, rel_items))) / flags.sum()     # считаем avg precision@k для этих объектов

In [233]:
# v1
def map_k_v1(recommended_list, bought_list, k=5, u=1):
    """ Среднее AP@k по u пользователям """
    apk = []
    for user in range(u):
        apk.append(ap_k(recommended_list[user], bought_list[user]))
    
    return np.mean(apk)

In [234]:
# v2
def map_k_v2(recommended_list, bought_list, k=5, u=1):
    """ Среднее AP@k по u пользователям """
    func = partial(ap_k, k=k)
    apk = list(map(func, recommended_list[:u], bought_list[:u]))
    return np.mean(apk)

In [352]:
%%time
# check
map_k_v1(recommends, boughts, u=50)

CPU times: user 26.9 ms, sys: 0 ns, total: 26.9 ms
Wall time: 42.1 ms


0.29224801587301585

In [353]:
%%time
# check
map_k_v2(recommends, boughts, u=50)

CPU times: user 23.7 ms, sys: 2.61 ms, total: 26.3 ms
Wall time: 102 ms


0.29224801587301585

### mean reciprocal rank

Mean Reciprocal Rank

- Считаем для первых k рекоммендаций
- Найти ранк первого релевантного предсказания $k_u$
- Посчитать reciprocal rank = $\frac{1}{k_u}$

$$MRR = mean(\frac{1}{k_u})$$

In [354]:
def reciprocal_rank(recommended_list, bought_list, n=1, k=5):    
    """ обратный ранг n релевантных рекомендаций среди первых k рекомендаций
    (при n=1 оно как раз должно работать как сказано в задании)
    :param recommended_list - список рекомендаций
    :param bought_list - список покупок
    :param n - учитывать первые n релевантных рекомендаций
    :param k - искать релевантные среди первых k рекомендаций
    """
    flags = np.isin(recommended_list[:k], bought_list)
    ranks = np.arange(1, k + 1)[flags][:n]      # ранги первых n рекомендаций из первых k. равен 0 если рекомендация нерелевантна
    ideal_ranks = np.arange(1, n + 1)
    return (1 / ranks).sum() / (1 / ideal_ranks).sum() if flags.any() else 0

In [355]:
# check
user = 10
reciprocal_rank(recommends[user], boughts[user], n=5)

0.4525547445255474

In [356]:
recommends[user], boughts[user]

(array([53, 58, 67, 68, 51, 62, 63, 61, 57, 59]),
 array([58, 66, 63, 50, 57, 51, 61, 55, 56, 67, 59, 62, 52, 64, 65, 54, 60]))

In [357]:
def mean_reciprocal_rank(recommended_list, bought_list, k=5):
    """ Среднеобратный ранг """
    ranks = []
    for data in zip(recommended_list, bought_list):
        ranks.append(reciprocal_rank(*data, k))
    return np.mean(ranks)

In [359]:
# check
mean_reciprocal_rank(recommends, boughts, k=5)

0.549051094890511

### NDCG@k

Normalized discounted cumulative gain

$$DCG = \frac{1}{|r|} \sum_u{\frac{[bought fact]}{discount(i)}}$$  

$discount(i) = 1$ if $i <= 2$,   
$discount(i) = log_2(i)$ if $i > 2$


(!) Считаем для первых k рекоммендаций   
(!) - существуют вариации с другими $discount(i)$  
i - ранк рекомендованного товара  
|r| - кол-во рекомендованных товаров 

$$NDCG = \frac{DCG}{ideal DCG}$$

In [365]:
N = 5

rnd = np.random.default_rng(77)
# ys_true = torch.randint(0, 5, (N, ))
# ys_pred = torch.rand(N)
ys_true = rnd.integers(5, size=N)
ys_pred = rnd.random(size=N)


In [366]:
ys_true, ys_pred

(array([0, 3, 3, 2, 3]),
 array([0.33445321, 0.3186902 , 0.39015401, 0.80130068, 0.09080164]))

In [367]:
def compute_gain(y_value: float, gain_scheme: str) -> float:
    if gain_scheme == "exp2":
        gain = 2 ** y_value - 1
    elif gain_scheme == "const":
        gain = y_value
    else:
        raise ValueError(f"{gain_scheme} method not supported, only exp2 and const.")
    return float(gain)

In [380]:
def dcg(ys_true, ys_pred, gain_scheme: str = 'const') -> float:
    argsort = np.argsort(ys_pred)[::-1]
    ys_true_sorted = ys_true[argsort]
    ret = 0
    for idx, cur_y in enumerate(ys_true_sorted, 1):
        gain = compute_gain(cur_y, gain_scheme)
        # your code
        
    return ret

In [381]:
dcg(ys_true, ys_pred)

31.0

In [382]:
# def dcg(ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str) -> float:
#     _, argsort = torch.sort(ys_pred, descending=True, dim=0)
#     ys_true_sorted = ys_true[argsort]
#     ret = 0
#     for idx, cur_y in enumerate(ys_true_sorted, 1):
#         gain = compute_gain(cur_y, gain_scheme)
#         # your code
#     return ret

In [None]:
# _, argsort = torch.sort(ys_pred, descending=True, dim=0)

In [None]:
# ys_true_sorted = ys_true[argsort]

In [None]:
# for idx, cur_y in enumerate(ys_true_sorted, 1):
#     print(idx, cur_y)

In [None]:
# def ndcg(ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str = 'const') -> float:
#     pred_dcg = dcg(ys_true, ys_pred, gain_scheme)
#     # your code ideal_dcg = 
    
#     ndcg = pred_dcg / ideal_dcg
#     return ndcg