### init environment

In [595]:
import torch
import numpy as np
from functools import partial

In [634]:
# проверочные данные для заданий 1-4, 6
recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43]       # id рекомендованных товаров
bought_list = [521, 32, 143, 991]                                           # id купленных товаров
prices_recommended = [699, 419, 249, 390, 190, 1990, 269, 199, 3990, 299]  # цены рекомендованных товаров
prices_bought = np.array(prices_recommended)[np.isin(recommended_list, bought_list)]     # цены купленных товаров

### 1. hit rate at k

In [597]:
def hit_rate(recommended_list, bought_list):
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    flags = np.isin(bought_list, recommended_list)
    hit_rate = (flags.sum() > 0) * 1
    return hit_rate


def hit_rate_at_k(recommended_list, bought_list, k=5):
    """ Hit rate@k = (был ли хотя бы 1 релевантный товар среди топ-k рекомендованных) """
    # с использованием numpy
    flags = np.isin(bought_list, recommended_list[:k])
    return (flags.sum() > 0) * 1

    # без использования numpy
    # return (len(set(bought_list) & set(recommended_list[:k])) > 0) * 1

In [598]:
# check
val = hit_rate_at_k(recommended_list, bought_list, 5)
print(f'Hit rate@k value (k=5): {val}')

Hit rate@k value (k=5): 1


In [599]:
# также можно в hit_rate() как recommended_list передавать нужный slice:
val = hit_rate(recommended_list[:5], bought_list)
print(f'Hit rate@k value (k=5): {val}')

Hit rate@k value (k=5): 1


### money precision at k

In [600]:
def money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5):
    """ Доля дохода по рекомендованным объектам
    :param recommended_list - список id рекомендаций
    :param bought_list - список id покупок
    :param prices_recommended - список цен для рекомендаций
    """
    flags = np.isin(recommended_list[:k], bought_list)
    prices = np.array(prices_recommended[:k])
    return flags @ prices / prices.sum()

In [601]:
# check
money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5)

0.559322033898305

### recall at k

In [602]:
def recall_at_k(recommended_list, bought_list, k=5):
    """ Recall on top k items """
    flags = np.isin(bought_list, recommended_list[:k])
    return flags.sum() / len(bought_list)

In [603]:
# check
recall_at_k(recommended_list, bought_list, k=5)

0.5

### money recall at k

In [604]:
def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    """ Доля дохода по релевантным рекомендованным объектам
    :param recommended_list - список id рекомендаций
    :param bought_list - список id покупок
    :param prices_recommended - список цен для рекомендаций
    :param prices_bought - список цен покупок
    """
    flags = np.isin(recommended_list[:k], bought_list)      # get recommend to bought matches
    prices = np.array(prices_recommended[:k])               # get prices of recommended items
    return flags @ prices / np.sum(bought_list)

In [605]:
# check
money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5)

0.6455245998814464

### map at k

In [638]:
def precision_at_k(recommended_list, bought_list, k=5):
    flags = np.isin(bought_list, recommended_list[:k])
    return flags.sum() / k

def ap_k(recommended_list, bought_list, k=5):
    # переработано
    flags = np.isin(recommended_list, bought_list)
    if sum(flags) == 0:
        return 0

    # sum_ = 0
    # for i in range(0, k-1):
    #     if flags[i]:
    #         sum_ += precision_at_k(recommended_list, bought_list, k=i+1)
    # result = sum_ / sum(flags)
    # return result

    func = partial(precision_at_k, recommended_list, bought_list)
    rel_items = np.arange(1, k + 1)[flags[:k]]     # получаем номера релевантных объектов
    return np.mean(list(map(func, rel_items)))        # считаем avg precision@k для этих объектов

In [639]:
# данные для проверки
recommended_lists = [[143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43],
                    [146, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43],] #id товаров
bought_lists = [[521, 32, 143, 991], [146, 29]]

In [640]:
# v1
def map_k(recommended_list, bought_list, k=5, u=1):
    """ Среднее AP@k по u пользователям """
    apk = []
    for user in range(u):
        apk.append(ap_k(recommended_list[user], bought_list[user]))
    
    return np.mean(apk)

In [642]:
# v2
def map_k(recommended_list, bought_list, k=5, u=1):
    """ Среднее AP@k по u пользователям """
    func = partial(ap_k, k=k)
    apk = list(map(func, recommended_list[:u], bought_list[:u]))
    return np.mean(apk)

In [697]:
#check
map_k(recommended_lists, bought_lists, u=2)

0.875

### mean reciprocal rank

Mean Reciprocal Rank

- Считаем для первых k рекоммендаций
- Найти ранк первого релевантного предсказания $k_u$
- Посчитать reciprocal rank = $\frac{1}{k_u}$

$$MRR = mean(\frac{1}{k_u})$$

In [790]:
def reciprocal_rank(recommended_list, bought_list, k=5):
    """ обратный ранг
    :param recommended_list - список рекомендаций
    :param bought_list - список покупок
    """
    flags = np.isin(recommended_list[:k], bought_list)
    rank = flags.argmax() + 1
    return 1 / rank if flags.any() else 0

In [791]:
def mean_reciprocal_rank(recommended_list, bought_list, k=5):
    """ Среднеобратный ранг """
    ranks = []
    for data in zip(recommended_list, bought_list):
        ranks.append(reciprocal_rank(*data, k))
    return np.mean(ranks)

In [802]:
mean_reciprocal_rank(recommended_lists, bought_lists, k=5)

1.0

In [803]:
mean_reciprocal_rank([[1, 2, 3, 4, 5]] * 2, [[2, 3], [7, 5]], k=4)

0.25

### NDCG@k

Normalized discounted cumulative gain

$$DCG = \frac{1}{|r|} \sum_u{\frac{[bought fact]}{discount(i)}}$$  

$discount(i) = 1$ if $i <= 2$,   
$discount(i) = log_2(i)$ if $i > 2$


(!) Считаем для первых k рекоммендаций   
(!) - существуют вариации с другими $discount(i)$  
i - ранк рекомендованного товара  
|r| - кол-во рекомендованных товаров 

$$NDCG = \frac{DCG}{ideal DCG}$$

In [None]:
N = 5
ys_true = torch.randint(0, 5, (N, ))
ys_pred = torch.rand(N)

In [None]:
def compute_gain(y_value: float, gain_scheme: str) -> float:
    if gain_scheme == "exp2":
        gain = 2 ** y_value - 1
    elif gain_scheme == "const":
        gain = y_value
    else:
        raise ValueError(f"{gain_scheme} method not supported, only exp2 and const.")
    return float(gain)

In [None]:
def dcg(ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str) -> float:
    _, argsort = torch.sort(ys_pred, descending=True, dim=0)
    ys_true_sorted = ys_true[argsort]
    ret = 0
    for idx, cur_y in enumerate(ys_true_sorted, 1):
        gain = compute_gain(cur_y, gain_scheme)
        # your code
    return ret

In [None]:
_, argsort = torch.sort(ys_pred, descending=True, dim=0)

In [None]:
ys_true_sorted = ys_true[argsort]

In [None]:
for idx, cur_y in enumerate(ys_true_sorted, 1):
    print(idx, cur_y)

In [None]:
def ndcg(ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str = 'const') -> float:
    pred_dcg = dcg(ys_true, ys_pred, gain_scheme)
    # your code ideal_dcg = 
    
    ndcg = pred_dcg / ideal_dcg
    return ndcg