In [8]:
import pandas as pd
import numpy as np

### 1. Hit rate

In [9]:
recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43] #id товаров
bought_list = [521, 32, 143, 991]

In [10]:
def hit_rate(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    hit_rate = (flags.sum() > 0) * 1
    
    return hit_rate


def hit_rate_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    
    recommended_list = np.array(recommended_list[:k])
    
    flags = np.isin(bought_list, recommended_list)
    
    hit_rate = (flags.sum() > 0) * 1
    
    return hit_rate

In [11]:
hit_rate(recommended_list, bought_list)

1

In [12]:
hit_rate_at_k(recommended_list, bought_list, k=5)

1

### 2. MoneyPrecision@k

In [13]:
def money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5):
    
    bought_list = np.array(bought_list)
    
    recommended_list = np.array(recommended_list[:k])
    
    prices_recommended = np.array(prices_recommended[:k])
    
    flags = np.isin(recommended_list, bought_list)
    
    precision = flags @ prices_recommended / prices_recommended.sum()
    
    return precision

In [15]:
prices_recommended = [400, 60, 40, 40 , 90, 230, 452, 65, 70, 95]

In [16]:
money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5)

0.6984126984126984

### 3. Recall@k, MoneyRecall@k

In [17]:
def recall(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    recall = flags.sum() / len(bought_list)
    
    return recall


def recall_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    
    recommended_list = np.array(recommended_list[:k])
    
    flags = np.isin(bought_list, recommended_list)
    
    recall = flags.sum() / len(bought_list)
    
    return recall
    


def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    
    bought_list = np.array(bought_list)
    
    recommended_list = np.array(recommended_list[:k])
    
    prices_recommended = np.array(prices_recommended[:k])
    
    prices_bought = np.array(prices_bought)
    
    flags = np.isin(recommended_list, bought_list)
    
    recall = flags @ prices_recommended / prices_bought.sum()
    
    return recall

In [18]:
prices_bought = [70, 600, 200, 350]

In [19]:
recall_at_k(recommended_list,bought_list,k=5)

0.5

In [20]:
money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5)

0.36065573770491804

### 4.MAP@k

In [43]:
recommended_list_3_users = [[143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43], 
                    [1134, 533, 14, 4, 15, 1543, 1, 99, 27, 3345],
                    [991, 3345, 27, 533, 43, 143, 1543, 156, 1134, 11]
                           ]

bought_list_3_users = [[521, 32, 143],  # юзер 1
                       [143, 156, 991, 43, 11], # юзер 2
                       [1,2]] # юзер 3


In [44]:
def ap_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    relevant_indexes = np.nonzero(np.isin(recommended_list, bought_list))[0]
    
    if len(relevant_indexes) == 0:
        return 0
    
    amount_relevant = len(relevant_indexes)
    
    sum_ = sum([precision_at_k(recommended_list, bought_list, k=index_relevant+1) for index_relevant in relevant_indexes])
    return sum_/amount_relevant


In [45]:
def precision_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    bought_list = bought_list
    recommended_list = recommended_list[:k]
    
    flags = np.isin(bought_list, recommended_list)
    
    precision = flags.sum() / len(recommended_list)
    
    
    return precision


In [46]:
def map_k(recommended_list, bought_list, k=5):
    
    k_list = [k] * len(recommended_list)
    ap_k_list = np.array(list(map(ap_k, recommended_list, bought_list, k_list)))
    
    return ap_k_list.mean()

In [47]:
map_k(recommended_list_3_users, bought_list_3_users, k=5)

0.3333333333333333

### 5.NDCG@k

In [48]:
def ndcg_at_k(recommended_list, bought_list, k=5):
    
    recommended_list = np.array(recommended_list[:k])
    bought_list = np.array(bought_list)
    
    flags = np.isin(recommended_list, bought_list)
    ideal_flags = np.array([1] * k)
    discount_list = np.array([np.log2(i + 1) if i > 1 else i + 1 for i in range(5)])
    
    dcg_at_k = np.mean(flags / discount_list)
    ideal_dcg_at_k = np.mean(ideal_flags / discount_list)
    
    return dcg_at_k / ideal_dcg_at_k

In [49]:
ndcg_at_k(recommended_list, bought_list, k=5)

0.489938890671454

### 6.MRR@k

In [50]:
def reciprocal_rank(recommended_list, bought_list, k=1):
    
    reciprocal_rank_list = []
    for itm in zip(recommended_list, bought_list):
        flags = np.isin(itm[0][:k], itm[1])
        if flags.sum():
            reciprocal_rank_list.append(1 / (np.nonzero(flags)[0][0] + 1))
        else:
            reciprocal_rank_list.append(0)
    reciprocal_rank_list = np.array(reciprocal_rank_list)
    
    return reciprocal_rank_list.mean()

In [51]:
reciprocal_rank(recommended_list_3_users, bought_list_3_users, k=5)

0.3333333333333333

Cumulative Gain at K

Вновь рассмотрим один объект и inline_formula элементов с наибольшим inline_formula. Cumulative gain at K (CG@K) — базовая метрика ранжирования, которая использует простую идею: чем релевантные элементы в этом топе, тем лучше:


Эта метрика обладает очевидными недостатками: она не нормализована и не учитывает позицию релевантных элементов.

WTA (winner takes all) – эта метрика равна 1, если топ-рекомендация (с самым большим предсказанным рейтингом) из рассмотренных пользователем получила положительную оценку, и 0 в противном случае;