# Метрики рекомендательных систем

In [1]:
import pandas as pd
import numpy as np

## ML-метрики качества

### 1. Hit rate

Иногда применяется, когда продаются достаточно дорогие товары (например, бытовая техника) 

----
Hit rate = (был ли хотя бы 1 релевантный товар среди рекомендованных)   

Hit rate@k = (был ли хотя бы 1 релевантный товар среди топ-k рекомендованных)

In [15]:
def hit_rate(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    hit_rate = (flags.sum() > 0) * 1
    
    return hit_rate

#   сделать в домашней работе
def hit_rate_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    recommended_list_k = recommended_list[:k]
    
    flags = np.isin(bought_list, recommended_list_k)    
    hit_rate_at_k = (flags.sum() > 0) * 1
    
    return hit_rate_at_k

In [None]:
# или с использованием функции hit_rate
def hit_rate_at_k(recommended_list, bought_list, k=5):
    return hit_rate(recommended_list[:k], bought_list)

In [2]:
# Тренировочные данные с id товаров
recommended_list = [143, 523, 1134, 991, 27, 1543, 3345, 533, 11, 43] #id товаров
bought_list = [143, 523, 1134, 991]

In [19]:
hit_rate(recommended_list, bought_list)

1

In [20]:
hit_rate_at_k(recommended_list, bought_list, 5)

1

### 2. Precision

*Precision* - доля релевантных товаров среди рекомендованных = Какой % рекомендованных товаров юзер купил

- Пожалуй, самая приближенная к бизнес-метрикам и самая популярная метрика

---

Precision = (# of recommended items that are relevant) / (# of recommended items)  

Precision@k  = (# of recommended items @k that are relevant) / (# of recommended items @k)

Money Precision@k  = (revenue of recommended items @k that are relevant) / (revenue of recommended items @k)  

**Note:** Обычно k в precision@k достаточно невелико (5-20) и определяется из бизнес-логики. Например, 5 товаров в e-mail рассылке, 20 ответов на первой странице google и т.д

In [86]:
def precision(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    precision = flags.sum() / len(recommended_list)
    
    return precision


def precision_at_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    bought_list = bought_list
    recommended_list = recommended_list[:k]
    
    flags = np.isin(bought_list, recommended_list)
    
    precision = flags.sum() / len(recommended_list)
    
    
    return precision

# сделать дома
def money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    recommended_list_k = recommended_list[:k]
    prices = np.array(prices_recommended)
        
    flags = np.isin(bought_list, recommended_list_k)
    relevant_id = list(bought_list[flags])
    
    recommended_revenue = sum([prices[list(recommended_list).index(id_)] for id_ in relevant_id])
    
    money_precision_at_k = recommended_revenue / sum(prices[:k])
    
    return money_precision_at_k

In [36]:
recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43] #id товаров
bought_list = [521, 32, 143, 991]
prices = [100, 200, 300, 250, 400, 150, 650, 50, 330, 520]

In [87]:
money_precision_at_k = money_precision_at_k(recommended_list, bought_list, prices, k=4)
money_precision_at_k

0.4117647058823529

### 3. Recall

Recall - доля рекомендованных товаров среди релевантных = Какой % купленных товаров был среди рекомендованных

    Обычно используется для моделей пре-фильтрации товаров (убрать те товары, которые точно не будем рекомендовать)

Recall= (# of recommended items that are relevant) / (# of relevant items)

Recall@k = (# of recommended items @k that are relevant) / (# of relevant items)

Money Recall@k = (revenue of recommended items @k that are relevant) / (revenue of relevant items)

Note: в recall@k число k обычно достаточно большое (50-200), больше чем покупок у среднестатистического юзера

In [None]:
def recall(recommended_list, bought_list):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    flags = np.isin(bought_list, recommended_list)
    
    recall = flags.sum() / len(bought_list)
    
    return recall

# сделать дома
def recall_at_k(recommended_list, bought_list, k=5):
    return recall(recommended_list[:k], bought_list)

# сделать дома
def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    recommended_list_k = recommended_list[:k]
    prices = np.array(prices_recommended)
    
    flags = np.isin(bought_list, recommended_list_k)
    relevant_id = list(bought_list[flags])
    
    recommended_revenue = sum([prices[list(recommended_list).index(id_)] for id_ in relevant_id])
    relevant_revenue = sum([prices[list(recommended_list).index(id_)] for id_ in list(bought_list)])
    
    money_recall_at_k = recommended_revenue / relevant_revenue
    
    return recall

In [88]:
recommended_list = [143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43] #id товаров
bought_list = [521, 32, 143, 991]
prices = [100, 200, 300, 250, 400, 150, 650, 50, 330, 520]

In [91]:
bought_list = np.array(bought_list)
recommended_list = np.array(recommended_list)
k = 4
recommended_list_k = recommended_list[:k]
prices = np.array(prices_recommended)

In [92]:
flags = np.isin(bought_list, recommended_list_k)
relevant_id = list(bought_list[flags])

In [93]:
recommended_revenue = sum([prices_recommended[list(recommended_list).index(id_)] for id_ in relevant_id])
recommended_revenue

350

In [94]:
relevant_revenue = sum([prices[list(recommended_list).index(id_)] for id_ in list(bought_list)])

ValueError: 521 is not in list

## Метрики ранжирования

### AP@k
AP@k - average precision at k

$$AP@k = \frac{1}{r} \sum{[recommended_{relevant_i}] * precision@k}$$

- r - кол-во релевантных товаров среди рекомендованных
- Суммируем по всем релевантным товарам
- Зависит от порядка реокмендаций

In [108]:
def ap_k(recommended_list, bought_list, k=5):
    
    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)[:k]
    
    relevant_indexes = np.nonzero(np.isin(recommended_list, bought_list))[0]
    
    if len(relevant_indexes) == 0:
        return 0
    
    amount_relevant = len(relevant_indexes)
    
    sum_ = sum([precision_at_k(recommended_list, bought_list, k=index_relevant+1) for index_relevant in relevant_indexes])
    return sum_/amount_relevant

### MAP@k

MAP@k (Mean Average Precision@k)  
Среднее AP@k по всем юзерам
- Показывает средневзвешенную точность рекомендаций

$$MAP@k = \frac{1}{|U|} \sum_u{AP_k}$$
  
|U| - кол-во юзеров

In [116]:
# сделать дома
def map_k(recommended_list, bought_list, k=5):
    ap_k_sum = 0
    for i in range(len(recommended_list)):
        ap_k_sum += ap_k(recommended_list[i], bought_list[i], k)
    map_k = ap_k_sum / len(recommended_list)
    return map_k

In [114]:
# теперь список из 3 пользователей
recommended_list_3_users = [[143, 156, 1134, 991, 27, 1543, 3345, 533, 11, 43], 
                    [1134, 533, 14, 4, 15, 1543, 1, 99, 27, 3345],
                    [991, 3345, 27, 533, 43, 143, 1543, 156, 1134, 11]
                           ]

bought_list_3_users = [[521, 32, 143],  # юзер 1
                       [143, 156, 991, 43, 11], # юзер 2
                       [1,2]] # юзер 3

In [117]:
map_k = map_k(recommended_list_3_users, bought_list_3_users, k=5)
map_k

0.3333333333333333

### Mean Reciprocal Rank ( MRR@k )


- Считаем для первых k рекоммендаций
- Найти ранк первого релевантного предсказания $k_u$
- Посчитать reciprocal rank = $\frac{1}{k_u}$

$$MRR = mean(\frac{1}{k_u})$$

In [None]:
# сделать дома
def reciprocal_rank(recommended_list, bought_list, k=1):
    
    bought_list = np.array(bought_list)
    recommended_list_k = np.array(recommended_list)[:k]
    
    
    
    return result