# Курс Рекомендательные системы
# Практическое задание урока 2. Бейзлайны и детерминированные алгоритмы item-item

## Загрузка библиотек и модулей

In [1]:
!pip install implicit

Collecting implicit
  Downloading implicit-0.4.8.tar.gz (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 3.1 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Building wheels for collected packages: implicit
  Building wheel for implicit (PEP 517) ... [?25l[?25hdone
  Created wheel for implicit: filename=implicit-0.4.8-cp37-cp37m-linux_x86_64.whl size=4503607 sha256=e10dc470173ce33c2007d7a258428ee675daa09533789b04315314204fd038d6
  Stored in directory: /root/.cache/pip/wheels/88/e6/34/25e73cccbaf1a961154bb562a5f86123b68fdbf40e306073d6
Successfully built implicit
Installing collected packages: implicit
Successfully installed implicit-0.4.8


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами
from scipy.sparse import csr_matrix, coo_matrix

# Детерминированные алгоритмы
from implicit.nearest_neighbours import ItemItemRecommender, CosineRecommender, TFIDFRecommender, BM25Recommender

# Метрики
from implicit.evaluation import train_test_split
from implicit.evaluation import precision_at_k, mean_average_precision_at_k, AUC_at_k, ndcg_at_k

## Загрузка данных

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
data_path = '/content/drive/My Drive/Colab Notebooks/111_RecSys/less_02/retail_train.csv'
data = pd.read_csv(data_path)
data.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0


In [5]:
test_size_weeks = 3

data_train = data[data['week_no'] < data['week_no'].max() - test_size_weeks]
data_test = data[data['week_no'] >= data['week_no'].max() - test_size_weeks]

### Задание 1. Weighted Random Recommendation

Напишите код для случайных рекоммендаций, в которых вероятность рекомендовать товар прямо пропорциональна логарифму продаж
- Можно сэмплировать товары случайно, но пропорционально какому-либо весу
- Например, прямо пропорционально популярности. Вес = log(sales_sum товара)

**Решение**

In [6]:
# Создадим датафрейм с покупками юзеров на тестовом датасете (последние 3 недели)
result = data_test.groupby('user_id')['item_id'].unique().reset_index()
result.columns=['user_id', 'actual']
result['actual'] = result['actual'].apply(lambda x: list(x))
result.head(5)

Unnamed: 0,user_id,actual
0,1,"[821867, 834484, 856942, 865456, 889248, 90795..."
1,3,"[835476, 851057, 872021, 878302, 879948, 90963..."
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107..."
3,7,"[840386, 889774, 898068, 909714, 929067, 95347..."
4,8,"[835098, 872137, 910439, 924610, 992977, 10412..."


In [7]:
def get_items_weights(df):
    """Функция определения веса"""
    
    # Вес по логарифму продаж
    items_weights = data_train.groupby('item_id')['sales_value'].sum().reset_index()
    items_weights = items_weights[items_weights['sales_value'] >= 1]
    items_weights['log_sales_value'] = np.log(items_weights['sales_value'])
    items_weights['weight'] = items_weights['log_sales_value'] / items_weights['log_sales_value'].sum()
    items_weights = items_weights[['item_id', 'weight']]
    
    # Вес по частоте покупки
    items_weights_1 = data_train.groupby('item_id')['user_id'].unique().reset_index()
    items_weights_1['weight'] =  items_weights_1['user_id'].apply(lambda x: len(x)) 
    del items_weights_1['user_id']
    items_weights_1['weight'] /= items_weights_1.weight.sum()
    
    # Вес по (частота покупки * цена)
    items_weights_2 = data_train.groupby('item_id')['sales_value'].sum().reset_index()
    items_weights_2['weight'] =  items_weights_2['sales_value'] / items_weights_2['sales_value'].sum()
    del items_weights_2['sales_value']
    
    return items_weights, items_weights_1, items_weights_2 

In [8]:
%%time

items_weights, items_weights_1, items_weights_2 = get_items_weights(data_train)

CPU times: user 9.19 s, sys: 534 ms, total: 9.73 s
Wall time: 8.86 s


In [9]:
def weighted_random_recommendation(items_weights, n=5):
    """Случайные рекоммендации
    
    Input
    -----
    items_weights: pd.DataFrame
        Датафрейм со столбцами item_id, weight. Сумма weight по всем товарам = 1
    """
    
    # Подсказка: необходимо модифицировать функцию random_recommendation()
    # your_code

    items = np.array(items_weights['item_id'])
    recs = np.random.choice(items, size=n, replace=False, p=items_weights['weight'].to_list())    
    
    return recs.tolist()

In [10]:
%%time

# your_code
# Взвешенные случайные рекомендации по логарифму продаж
result['weighted_random_recommendation_logarithm'] = result['user_id'].apply(lambda x: weighted_random_recommendation(items_weights, n=5))

CPU times: user 15.2 s, sys: 116 ms, total: 15.3 s
Wall time: 15.3 s


In [11]:
%%time

# Взвешенные случайные рекомендации по частоте покупки
result['weighted_random_recommendation_sales'] = result['user_id'].apply(lambda x: weighted_random_recommendation(items_weights_1, n=5))

CPU times: user 15.8 s, sys: 154 ms, total: 15.9 s
Wall time: 15.9 s


In [12]:
%%time

# Взвешенные случайные рекомендации по (частота покупки * цена)
result['weighted_random_recommendation_sales_value'] = result['user_id'].apply(lambda x: weighted_random_recommendation(items_weights_2, n=5))

CPU times: user 16 s, sys: 189 ms, total: 16.2 s
Wall time: 16.2 s


In [13]:
result.head(2)

Unnamed: 0,user_id,actual,weighted_random_recommendation_logarithm,weighted_random_recommendation_sales,weighted_random_recommendation_sales_value
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[896083, 554622, 1060543, 13158838, 1123045]","[15801374, 845868, 892048, 889700, 1101174]","[914190, 1033913, 913210, 869573, 1040812]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[6773211, 986991, 6442964, 15797145, 1008859]","[819330, 874736, 15452860, 921614, 1053690]","[964968, 928585, 5712216, 1117514, 1049735]"


### Задание 2. Расчет метрик
Рассчитайте Precision@5 для каждого алгоритма с помощью функции из вебинара 1. Какой алгоритм показывает лучшее качество?

**Решение**

In [14]:
def precision_at_k(recommended_list, bought_list, k=5):
    """Функция из первого вебинара для расчета Precision@k"""

    bought_list = np.array(bought_list)
    recommended_list = np.array(recommended_list)
    
    bought_list = bought_list  # Тут нет [:k] !!
    recommended_list = recommended_list[:k]
    
    flags = np.isin(bought_list, recommended_list)
        
    precision = flags.sum() / len(recommended_list)
        
    return precision

Рассчитаем Precision@5 для алгоритмов с вебинара и weighted_random_recommendation

In [15]:
# загрузка predict с вебинара
result_path = '/content/drive/My Drive/Colab Notebooks/111_RecSys/less_02/predictions_basic.csv'
result_webinar = pd.read_csv(result_path)
result_webinar.head(2)

Unnamed: 0,user_id,actual,random_recommendation,popular_recommendation,itemitem,cosine,tfidf,own_purchases
0,1,[ 821867 834484 856942 865456 889248 ...,"[5586238, 1015228, 866118, 2416733, 2603573]","[6534178, 6533889, 1029743, 6534166, 1082185]","[981760, 1127831, 1098066, 826249, 878996]","[981760, 1127831, 1098066, 878996, 826249]","[981760, 1127831, 1098066, 826249, 878996]","[999999, 1082185, 1029743, 995785, 1004906]"
1,3,[ 835476 851057 872021 878302 879948 ...,"[161354, 63027, 1027802, 12263694, 307395]","[6534178, 6533889, 1029743, 6534166, 1082185]","[981760, 995242, 1029743, 840361, 961554]","[981760, 1004906, 961554, 1096036, 1080414]","[981760, 1004906, 859075, 1096036, 961554]","[999999, 1082185, 1098066, 6534178, 1127831]"


In [16]:
def fromStr(stri):
    """Файл с предсказаниями сохранил предсказания как строки.
    Нужно перевести обратно в список"""
    
    stri=stri[1:-1]    
    stri = stri.replace('\n','')
    stri = stri.replace(',','')
    lst = stri.split()
    ids = [int(en) for en in lst]
    return ids

In [17]:
result_presicion=[]
cols = result_webinar.columns
for i in range(2,8):
  algoritm_precision = result_webinar.apply(lambda row: precision_at_k(fromStr(row[cols[i]]), fromStr(row['actual'])), axis=1).mean()
  result_presicion.append([cols[i], algoritm_precision])

In [18]:
prec_weight_random = result.apply(lambda row: precision_at_k(row['weighted_random_recommendation_logarithm'], row['actual']), axis=1).mean()
prec_weight_random_1 = result.apply(lambda row: precision_at_k(row['weighted_random_recommendation_sales'], row['actual']), axis=1).mean()
prec_weight_random_2 = result.apply(lambda row: precision_at_k(row['weighted_random_recommendation_sales_value'], row['actual']), axis=1).mean()

result_presicion.append(['weighted_random_recommendation_logarithm', prec_weight_random])
result_presicion.append(['weighted_random_recommendation_sales', prec_weight_random_1])
result_presicion.append(['weighted_random_recommendation_sales_value', prec_weight_random_2])

Выведем результаты расчета Precision@5 для каждого алгоритма

In [19]:
pd.DataFrame(result_presicion, columns=['algoritm_name','algoritm_precision']).sort_values('algoritm_precision',ascending=False)

Unnamed: 0,algoritm_name,algoritm_precision
5,own_purchases,0.179987
1,popular_recommendation,0.15524
4,tfidf,0.036141
3,cosine,0.03526
2,itemitem,0.033595
8,weighted_random_recommendation_sales_value,0.019001
7,weighted_random_recommendation_sales,0.010774
6,weighted_random_recommendation_logarithm,0.001273
0,random_recommendation,0.000588


### Вывод

Случайные рекомендации и взвешенные случайные рекомендации показывают худшее качество метрики Precision@5. При этом качество метрики по взвешенным случайным рекомендациям лучше, чем без применения взвешивания.

Рекомендации на основе собственных покупок показывают лучшее качество метрики Precision@5.

Можно предположить, что в рассматриваемом датафрейме представлены покупки товаров повседневного спроса. Такие товары пользователь покупает снова и снова, не резко изменяя привычки. Будущее поведение пользователя в большей мере обусловлено его личным предыдущим поведением, чем поведением других потребителей. Следовательно, алгоритмы с наилучшими метриками нерационально применять для рекомендации. Лучше было бы протестировать алгоритмы с меньшим качеством Precision@5 и посмотреть результаты по бизнес-метрике.

### Задание 3*. Улучшение бейзлайнов и ItemItem

- Попробуйте улучшить бейзлайны, считая их на топ-5000 товаров
- Попробуйте улучшить разные варианты ItemItemRecommender, выбирая число соседей $K$.

In [20]:
# your_code
# Создадим датафрейм с покупками юзеров на тестовом датасете (последние 3 недели)
result_5k = data_test.groupby('user_id')['item_id'].unique().reset_index()
result_5k.columns=['user_id', 'actual']
result_5k['actual'] = result_5k['actual'].apply(lambda x: list(x))
result_5k.head(5)

Unnamed: 0,user_id,actual
0,1,"[821867, 834484, 856942, 865456, 889248, 90795..."
1,3,"[835476, 851057, 872021, 878302, 879948, 90963..."
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107..."
3,7,"[840386, 889774, 898068, 909714, 929067, 95347..."
4,8,"[835098, 872137, 910439, 924610, 992977, 10412..."


In [21]:
print(data_train.item_id.unique().shape)

(86865,)


In [22]:
popularity = data_train.groupby('item_id')['quantity'].sum().reset_index()
popularity.rename(columns={'quantity': 'n_sold'}, inplace=True)

top_5000 = popularity.sort_values('n_sold', ascending=False).head(5000).item_id.tolist()

In [23]:
# Заведем фиктивный item_id (если юзер НЕ покупал товары из топ-5000, то он "купил" такой товар)
data_train.loc[~data_train['item_id'].isin(top_5000), 'item_id'] = 999999

user_item_matrix = pd.pivot_table(data_train, 
                                  index='user_id', columns='item_id', 
                                  values='quantity',
                                  aggfunc='count', 
                                  fill_value=0
                                 )

user_item_matrix[user_item_matrix > 0] = 1 # так как в итоге хотим предсказать 
user_item_matrix = user_item_matrix.astype(float) # необходимый тип матрицы для implicit

# переведем в формат saprse matrix
sparse_user_item = csr_matrix(user_item_matrix).tocsr()

user_item_matrix.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


item_id,202291,397896,420647,480014,545926,707683,731106,818980,819063,819227,819255,819304,819308,819330,819518,819594,819643,819765,819840,819845,819927,819978,820082,820122,820165,820291,820301,820321,820361,820486,820518,820560,820701,820895,821025,821083,821200,821209,821219,821344,...,13512965,13671759,13672065,13777104,13841744,13842088,13842090,13842214,13842224,13877192,13945141,13945244,13987135,14025185,14043817,14043823,14043825,14043826,14050436,14050460,14050461,14077333,14106445,15452677,15452812,15506577,15511891,15596279,15596488,15596515,15778533,15831255,15926712,15926775,15926844,15926886,15927403,15927661,15927850,16809471
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
# ~np.array([True, False])
user_item_matrix.sum().sum() / (user_item_matrix.shape[0] * user_item_matrix.shape[1]) * 100
np.sort(data.item_id.unique())

array([   25671,    26081,    26093, ..., 18000012, 18024155, 18024556])

In [25]:
# перенумеруем пользователей и товары
userids = user_item_matrix.index.values
itemids = user_item_matrix.columns.values

matrix_userids = np.arange(len(userids))
matrix_itemids = np.arange(len(itemids))

id_to_itemid = dict(zip(matrix_itemids, itemids))
id_to_userid = dict(zip(matrix_userids, userids))

itemid_to_id = dict(zip(itemids, matrix_itemids))
userid_to_id = dict(zip(userids, matrix_userids))

In [26]:
print(data_train.item_id.unique().shape)

(5001,)


### 1. Улучшение бейзлайнов Weighted Random Recommendation на топ-5000 товаров

In [27]:
def weighted_random_recommendation(items_weights, n=5):
    """Взвешенные случайные рекомендации"""
    
    items = np.array(items_weights['item_id'])
    recs = np.random.choice(items, size=n, replace=False, p=items_weights['weight'].to_list())
        
    return recs.tolist()

In [28]:
%%time

items_weights, items_weights_1, items_weights_2 = get_items_weights(data_train)

CPU times: user 749 ms, sys: 12.7 ms, total: 761 ms
Wall time: 768 ms


In [29]:
%%time

# Взвешенные случайные рекомендации по логарифму продаж на топ-5000 товаров
result_5k['weighted_random_recommendation_logarithm'] = result_5k['user_id'].apply(lambda x: weighted_random_recommendation(items_weights, n=5))

CPU times: user 1.03 s, sys: 7.4 ms, total: 1.04 s
Wall time: 1.04 s


In [30]:
%%time

# Взвешенные случайные рекомендации по частоте покупки
result_5k['weighted_random_recommendation_sales'] = result_5k['user_id'].apply(lambda x: weighted_random_recommendation(items_weights_1, n=5))

CPU times: user 1.04 s, sys: 7.42 ms, total: 1.05 s
Wall time: 1.06 s


In [31]:
%%time

# Взвешенные случайные рекомендации по (частота покупки * цена)
result_5k['weighted_random_recommendation_sales_value'] = result_5k['user_id'].apply(lambda x: weighted_random_recommendation(items_weights_2, n=5))

CPU times: user 1.14 s, sys: 7.23 ms, total: 1.14 s
Wall time: 1.15 s


In [32]:
result_5k.head(2)

Unnamed: 0,user_id,actual,weighted_random_recommendation_logarithm,weighted_random_recommendation_sales,weighted_random_recommendation_sales_value
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[1025641, 1034176, 1004475, 1138135, 8180870]","[1044759, 892862, 7440663, 871337, 917494]","[999999, 5583547, 6534178, 1043837, 1012284]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[1026258, 1125530, 1044299, 13038913, 1015141]","[921744, 830887, 1082185, 1103476, 941033]","[1132231, 8090537, 7409957, 854852, 999999]"


In [33]:
prec_weight_random_5k = result_5k.apply(lambda row: precision_at_k(row['weighted_random_recommendation_logarithm'], row['actual']), axis=1).mean()
prec_weight_random_1_5k = result_5k.apply(lambda row: precision_at_k(row['weighted_random_recommendation_sales'], row['actual']), axis=1).mean()
prec_weight_random_2_5k = result_5k.apply(lambda row: precision_at_k(row['weighted_random_recommendation_sales_value'], row['actual']), axis=1).mean()

In [34]:
result_5k_presicion = []
result_5k_presicion.append(['weighted_random_recommendation_logarithm', prec_weight_random_5k])
result_5k_presicion.append(['weighted_random_recommendation_sales', prec_weight_random_1_5k])
result_5k_presicion.append(['weighted_random_recommendation_sales_value', prec_weight_random_2_5k])

In [36]:
pd.DataFrame(result_5k_presicion, columns=['algoritm_name','algoritm_precision']).sort_values('algoritm_precision',ascending=False)

Unnamed: 0,algoritm_name,algoritm_precision
2,weighted_random_recommendation_sales_value,0.031048
1,weighted_random_recommendation_sales,0.014887
0,weighted_random_recommendation_logarithm,0.006366


### 2. Улучшение ItemItemRecommender, выбирая число соседей 𝐾.

In [37]:
%%time
scores = {}
for i in range(1,10):
    model = ItemItemRecommender(K=i, num_threads=4) # K - кол-во билжайших соседей

    model.fit(csr_matrix(user_item_matrix).T.tocsr(),  # На вход item-user matrix
          show_progress=False)

    recs = model.recommend(userid=userid_to_id[2],  # userid - id от 0 до N
                        user_items=csr_matrix(user_item_matrix).tocsr(),   # на вход user-item matrix
                        N=5, # кол-во рекомендаций 
                        filter_already_liked_items=False, 
                        filter_items=None, 
                        recalculate_user=True)
    result_5k['itemitem'] = result_5k['user_id'].\
    apply(lambda x: [id_to_itemid[rec[0]] for rec in 
                    model.recommend(userid=userid_to_id[x], 
                                    user_items=sparse_user_item,   # на вход user-item matrix
                                    N=5, 
                                    filter_already_liked_items=False, 
                                    filter_items=None, 
                                    recalculate_user=True)])
    scores[str(i)] = result_5k.apply(lambda x: precision_at_k(x['itemitem'], x['actual'],  5), axis=1).mean()

CPU times: user 24.6 s, sys: 83.1 ms, total: 24.7 s
Wall time: 16.1 s


In [38]:
metrics = pd.DataFrame(scores.items(), columns=['Ki', 'precision'])
metrics

Unnamed: 0,Ki,precision
0,1,0.179987
1,2,0.192009
2,3,0.186092
3,4,0.144956
4,5,0.136925
5,6,0.142018
6,7,0.144858
7,8,0.147209
8,9,0.148482
