<a href="https://colab.research.google.com/github/bkvkrll/Recommender-systems/blob/main/course_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install implicit==0.4.4

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting implicit==0.4.4
  Downloading implicit-0.4.4.tar.gz (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 4.9 MB/s 
Building wheels for collected packages: implicit
  Building wheel for implicit (setup.py) ... [?25l[?25hdone
  Created wheel for implicit: filename=implicit-0.4.4-cp37-cp37m-linux_x86_64.whl size=3423220 sha256=80c8f0c865797cb20915bfdebfd72b7578a12bc588e9608e42860568aafa70dd
  Stored in directory: /root/.cache/pip/wheels/44/7e/7d/a17324ea207cfbe76aca878b5b8ca0aa932cf55d163329be37
Successfully built implicit
Installing collected packages: implicit
Successfully installed implicit-0.4.4


# **Import libs**

In [2]:
!pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.0.6-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 1.6 MB/s 
Installing collected packages: catboost
Successfully installed catboost-1.0.6


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами
from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit import als

# Модель второго уровня
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier, Pool

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import os, sys
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

# Написанные нами функции
from metrics import precision_at_k, recall_at_k
from utils import prefilter_items
from recommenders import MainRecommender

# **Read data**

In [4]:
data = pd.read_csv('retail_train.csv')
item_features = pd.read_csv('product.csv')
user_features = pd.read_csv('hh_demographic.csv')
test_data = pd.read_csv = pd.read_csv('retail_test1.csv')

Process features dataset

In [5]:
ITEM_COL = 'item_id'
USER_COL = 'user_id'

In [6]:
# column processing
item_features.columns = [col.lower() for col in item_features.columns]
user_features.columns = [col.lower() for col in user_features.columns]

item_features.rename(columns={'product_id': ITEM_COL}, inplace=True)
user_features.rename(columns={'household_key': USER_COL }, inplace=True)

In [7]:
# Для user-item матрицы я хочу использовать отношение количества купленного товара,
# деленного на "давность" покупки. Для этого создадим новый признак в data
max_week_no = data['week_no'].max() + 1
data['value'] = data['quantity'] / (max_week_no - data['week_no'])

Split dataset for train, eval, test

In [8]:
# т.к. стоим двухуровневую модель - разбиваем датасет на 3 части
# -- давние покупки -- | -- 6 недель -- | -- 3 недель -- 

VAL_MATCHER_WEEKS = 6
VAL_RANKER_WEEKS = 3

In [9]:
# берем данные для тренировки matching модели
data_train_matcher = data[data['week_no'] < data['week_no'].max() - (VAL_MATCHER_WEEKS + VAL_RANKER_WEEKS)]

# берем данные для валидации matching модели
data_val_matcher = data[(data['week_no'] >= data['week_no'].max() - (VAL_MATCHER_WEEKS + VAL_RANKER_WEEKS)) &
                      (data['week_no'] < data['week_no'].max() - (VAL_RANKER_WEEKS))]


# берем данные для тренировки ranking модели
data_train_ranker = data_val_matcher.copy()

# берем данные для теста ranking, matching модели
data_val_ranker = data[data['week_no'] >= data['week_no'].max() - VAL_RANKER_WEEKS]

In [10]:
# функция для визуализации информации о размерности датасета, количестве юзеров и товаров

def print_stats_data(df_data, name_df):
    print(name_df)
    print(f"Shape: {df_data.shape} Users: {df_data[USER_COL].nunique()} Items: {df_data[ITEM_COL].nunique()}")

In [11]:
print_stats_data(data_train_matcher,'train_matcher')
print_stats_data(data_val_matcher,'val_matcher')
print_stats_data(data_train_ranker,'train_ranker')
print_stats_data(data_val_ranker,'val_ranker')
print_stats_data(test_data, 'final_test')

train_matcher
Shape: (2108779, 13) Users: 2498 Items: 83685
val_matcher
Shape: (169711, 13) Users: 2154 Items: 27649
train_ranker
Shape: (169711, 13) Users: 2154 Items: 27649
val_ranker
Shape: (118314, 13) Users: 2042 Items: 24329
final_test
Shape: (88734, 12) Users: 1885 Items: 20497


Prefilter items

In [12]:
# Воспользуемся функцией prefilter_items, чтобы оставить только топ-3000 самых популярных товаров

n_items_before = data_train_matcher['item_id'].nunique()

data_train_matcher = prefilter_items(data_train_matcher, take_n_popular=3000)

n_items_after = data_train_matcher['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['price'] = data['sales_value'] / (np.maximum(data['quantity'], 1))


Decreased # items from 83685 to 3001


Make cold-start to warm-start

In [13]:
# В условиях задания сказано, что нужно использовать только "теплых юзеров", 
# проведем фильтрацию и уберем из всех датасетов "холодных" юзеров

# ищем общих пользователей
common_users = list(set(data_train_matcher.user_id.values)&set(data_val_matcher.user_id.values)&set(data_val_ranker.user_id.values)&set(test_data.user_id.values))

data_train_matcher = data_train_matcher[data_train_matcher.user_id.isin(common_users)]
data_val_matcher = data_val_matcher[data_val_matcher.user_id.isin(common_users)]
data_train_ranker = data_train_ranker[data_train_ranker.user_id.isin(common_users)]
data_val_ranker = data_val_ranker[data_val_ranker.user_id.isin(common_users)]
test_data = test_data[test_data.user_id.isin(common_users)]

print_stats_data(data_train_matcher,'train_matcher')
print_stats_data(data_val_matcher,'val_matcher')
print_stats_data(data_train_ranker,'train_ranker')
print_stats_data(data_val_ranker,'val_ranker')
print_stats_data(test_data,'final_test')

train_matcher
Shape: (742044, 14) Users: 1663 Items: 3001
val_matcher
Shape: (153229, 13) Users: 1663 Items: 26453
train_ranker
Shape: (153229, 13) Users: 1663 Items: 26453
val_ranker
Shape: (108983, 13) Users: 1663 Items: 23346
final_test
Shape: (83656, 12) Users: 1663 Items: 19981


In [14]:
# создадим экземпляр класса MainRecommender (данный класс делали в течение курса)
# в классе я поменял n_factors у als (сделал n_factors=50)
# также изменил значение гиперпараметра values при создании user_item_matrix
# user_item_matrix заполнена данными из data['value']

recommender = MainRecommender(data_train_matcher)

GPU training requires factor size to be a multiple of 32. Increasing factors from 20 to 32.


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/3001 [00:00<?, ?it/s]

In [15]:
# создадим функция для вычисления целевой метрики precision@5

def calc_precision(df_data, top_k):
    for col_name in df_data.columns[2:]:
        yield col_name, df_data.apply(lambda row: precision_at_k(row[col_name], row[ACTUAL_COL], k=top_k), axis=1).mean()

Вариант 1 get_als_recommendations 

Подготовка данных для трейна

In [16]:
# взяли пользователей из трейна для ранжирования
df_match_candidates = pd.DataFrame(data_train_ranker[USER_COL].unique())
df_match_candidates.columns = [USER_COL]

In [17]:
# собираем кандитатов с первого этапа (matcher)
# количество рекомендуемых товаров для последующего ранжирования = 30
N_PREDICT=30
df_match_candidates['candidates'] = df_match_candidates[USER_COL].apply(lambda x: recommender.get_als_recommendations(x, N=N_PREDICT))

In [18]:
# пример кандидатов als по 2-м юзерам
df_match_candidates.head(2)

Unnamed: 0,user_id,candidates
0,2070,"[899624, 1056509, 1046545, 12810391, 893018, 5..."
1,2021,"[950935, 819255, 1119454, 883932, 998556, 8996..."


In [19]:
# изменим формат представления данных по кандидатам
df_items = df_match_candidates.apply(lambda x: pd.Series(x['candidates']), axis=1).stack().reset_index(level=1, drop=True)
df_items.name = 'item_id'
df_match_candidates = df_match_candidates.drop('candidates', axis=1).join(df_items)
df_match_candidates.head()

Unnamed: 0,user_id,item_id
0,2070,899624
0,2070,1056509
0,2070,1046545
0,2070,12810391
0,2070,893018


Check warm start

In [20]:
# проверим, что кандидаты посчитаны только для "теплых" юзеров
print_stats_data(df_match_candidates, 'match_candidates')

match_candidates
Shape: (49890, 2) Users: 1663 Items: 2888


Подготавливаем фичи для обучения модели

In [21]:
# Заранее создам в трейне 2 новых признака (флага): наличие скидки в магазине / скидка по купону
data_train_ranker['retail_discount_flag'] = np.where(data_train_ranker.retail_disc != 0, 1, 0)
data_train_ranker['coupon_discount_flag'] = np.where(data_train_ranker.coupon_disc != 0, 1, 0)

In [22]:
df_train_ranker = data_train_ranker.copy()

df_train_ranker['target'] = 1  # тут только покупки 

df_train_ranker = df_match_candidates.merge(df_train_ranker, on=[USER_COL, ITEM_COL], how='left')

df_train_ranker['target'].fillna(0, inplace= True)

df_train_ranker.head(4)

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc,value,retail_discount_flag,coupon_discount_flag,target
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,0.0,0.0,0.2,1.0,0.0,1.0
1,2070,1056509,,,,,,,,,,,,,,0.0
2,2070,1046545,,,,,,,,,,,,,,0.0
3,2070,12810391,40941470000.0,619.0,1.0,17.59,311.0,-22.07,2015.0,89.0,0.0,0.0,0.142857,1.0,0.0,1.0


In [25]:
# присоединяем фичи юзеров и айтемов
df_train_ranker = df_train_ranker.merge(item_features, on='item_id', how='left')
df_train_ranker = df_train_ranker.merge(user_features, on='user_id', how='left')

df_train_ranker.head()

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,...,commodity_desc,sub_commodity_desc,curr_size_of_product,age_desc,marital_status_code,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,...,POTATOES,POTATOES RUSSET (BULK&BAG),10 LB,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown
1,2070,1056509,,,,,,,,,...,MILK BY-PRODUCTS,COTTAGE CHEESE,24 OZ,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown
2,2070,1046545,,,,,,,,,...,POTATOES,POTATOES RUSSET (BULK&BAG),10 LB,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown
3,2070,12810391,40941470000.0,619.0,1.0,17.59,311.0,-22.07,2015.0,89.0,...,PORK,ENHANCED,,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown
4,2070,893018,,,,,,,,,...,CHEESE,IWS SINGLE CHEESE,16OZ,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown


In [26]:
# Посчитаем общее количество покупок, общую сумму покупок и средний чек по каждому юзеру
user_sum_all_amounts = data_train_ranker.groupby('user_id')['sales_value'].sum().reset_index()
user_all_quantity = data_train_ranker.groupby('user_id')['quantity'].count().reset_index()
user_new_features = user_sum_all_amounts.merge(user_all_quantity, on=[USER_COL], how='left')
user_new_features['av_check'] = user_new_features['sales_value'] / user_new_features['quantity']
user_new_features.rename(columns={'sales_value': 'all_sales_sum', 'quantity': 'user_total_quantity'}, inplace=True)

# добавим новые признаки в обучающую выборку
df_train_ranker = df_train_ranker.merge(user_new_features, on='user_id', how='left')
df_train_ranker

df_train_ranker.head(2)

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,...,age_desc,marital_status_code,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc,all_sales_sum,user_total_quantity,av_check
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,...,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown,617.29,204,3.025931
1,2070,1056509,,,,,,,,,...,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown,617.29,204,3.025931


In [27]:
# Посчитаем количество и сумму покупок в разрезе юзеров и commodities
sales_of_cat_per_user = df_train_ranker.groupby(['user_id', 'commodity_desc'])[['sales_value', 'quantity']].sum().reset_index()
sales_of_cat_per_user.rename(columns={'sales_value': 'user_sales_in_category', 'quantity': 'commodity_quantity' }, inplace=True)

# добавим новые признаки в обучающую выборку
df_train_ranker = df_train_ranker.merge(sales_of_cat_per_user, on=['user_id', 'commodity_desc'], how='left')
df_train_ranker.head(2)

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,...,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc,all_sales_sum,user_total_quantity,av_check,user_sales_in_category,commodity_quantity
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,...,50-74K,Unknown,Unknown,1,None/Unknown,617.29,204,3.025931,2.69,1.0
1,2070,1056509,,,,,,,,,...,50-74K,Unknown,Unknown,1,None/Unknown,617.29,204,3.025931,0.0,0.0


In [28]:
# посчитаем для каждого юзера долю его покупок в каждом commodity, а также посчитаем среднее количество покупок .юзером каждого commodity в неделю
# добавим эти признаки в обучающую выборку
df_train_ranker['share_of_cat_per_user'] = df_train_ranker['user_sales_in_category'] / df_train_ranker['all_sales_sum']
df_train_ranker['commodity_purchases_per_week'] = df_train_ranker['commodity_quantity'] / VAL_MATCHER_WEEKS
df_train_ranker.head(2)

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,...,hh_comp_desc,household_size_desc,kid_category_desc,all_sales_sum,user_total_quantity,av_check,user_sales_in_category,commodity_quantity,share_of_cat_per_user,commodity_purchases_per_week
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,...,Unknown,1,None/Unknown,617.29,204,3.025931,2.69,1.0,0.004358,0.166667
1,2070,1056509,,,,,,,,,...,Unknown,1,None/Unknown,617.29,204,3.025931,0.0,0.0,0.0,0.0


In [29]:
# Посчитаем общее количество покупок по скидкам
discount_purch_per_user = df_train_ranker.groupby(['user_id'])[['retail_discount_flag', 'coupon_discount_flag']].sum().reset_index()
discount_purch_per_user.rename(columns={'retail_discount_flag': 'user_retail_discount_flag', 'coupon_discount_flag': 'user_coupon_discount_flag' }, inplace=True)
discount_purch_per_user['discount_purchases_count'] = discount_purch_per_user['user_retail_discount_flag'] + discount_purch_per_user['user_coupon_discount_flag'] 


# добавим эти признаки в обучающую выборку
df_train_ranker = df_train_ranker.merge(discount_purch_per_user, on=['user_id',], how='left')
df_train_ranker.head(2)

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,...,all_sales_sum,user_total_quantity,av_check,user_sales_in_category,commodity_quantity,share_of_cat_per_user,commodity_purchases_per_week,user_retail_discount_flag,user_coupon_discount_flag,discount_purchases_count
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,...,617.29,204,3.025931,2.69,1.0,0.004358,0.166667,2.0,0.0,2.0
1,2070,1056509,,,,,,,,,...,617.29,204,3.025931,0.0,0.0,0.0,0.0,2.0,0.0,2.0


In [30]:
# Добавим признак: доля покупок по скидкам
df_train_ranker['discount_purchases_share'] = df_train_ranker['discount_purchases_count'] / df_train_ranker['user_total_quantity']
df_train_ranker.head(2)

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,...,user_total_quantity,av_check,user_sales_in_category,commodity_quantity,share_of_cat_per_user,commodity_purchases_per_week,user_retail_discount_flag,user_coupon_discount_flag,discount_purchases_count,discount_purchases_share
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,...,204,3.025931,2.69,1.0,0.004358,0.166667,2.0,0.0,2.0,0.009804
1,2070,1056509,,,,,,,,,...,204,3.025931,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.009804


In [31]:
#Подготовим обучающую выборку (в т.ч. удалим некоторые признаки) и выделим таргет
X_train = df_train_ranker.drop(['target', 'basket_id', 'store_id', 'curr_size_of_product'], axis=1)
y_train = df_train_ranker[['target']]

In [32]:
# Выделим категориальные признаки
cat_feats = ['manufacturer',
 'department',
 'brand',
 'commodity_desc',
 'sub_commodity_desc',
 'age_desc',
 'marital_status_code',
 'income_desc',
 'homeowner_desc',
 'hh_comp_desc',
 'household_size_desc',
 'kid_category_desc']

X_train[cat_feats] = X_train[cat_feats].astype('category')

cat_feats

['manufacturer',
 'department',
 'brand',
 'commodity_desc',
 'sub_commodity_desc',
 'age_desc',
 'marital_status_code',
 'income_desc',
 'homeowner_desc',
 'hh_comp_desc',
 'household_size_desc',
 'kid_category_desc']

In [33]:
# Обучим модель
lgb = LGBMClassifier(objective='binary',
                     max_depth=8,
                     n_estimators=300,
                     learning_rate=0.05,
                     categorical_column=cat_feats)

lgb.fit(X_train, y_train)

train_preds = lgb.predict_proba(X_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [34]:
df_ranker_predict = df_train_ranker.copy()
df_ranker_predict['proba_item_purchase'] = train_preds[:,1]
df_ranker_predict.head(2)

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,...,av_check,user_sales_in_category,commodity_quantity,share_of_cat_per_user,commodity_purchases_per_week,user_retail_discount_flag,user_coupon_discount_flag,discount_purchases_count,discount_purchases_share,proba_item_purchase
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,...,3.025931,2.69,1.0,0.004358,0.166667,2.0,0.0,2.0,0.009804,0.9999997
1,2070,1056509,,,,,,,,,...,3.025931,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.009804,3.94837e-08


In [35]:
# Посмотрим предсказания по конкретному юзеру
df_ranker_predict.loc[df_ranker_predict['user_id']==2070].sort_values('proba_item_purchase', ascending=False)

Unnamed: 0,user_id,item_id,basket_id,day,quantity,sales_value,store_id,retail_disc,trans_time,week_no,...,av_check,user_sales_in_category,commodity_quantity,share_of_cat_per_user,commodity_purchases_per_week,user_retail_discount_flag,user_coupon_discount_flag,discount_purchases_count,discount_purchases_share,proba_item_purchase
0,2070,899624,41160130000.0,630.0,1.0,2.69,311.0,-0.3,14.0,91.0,...,3.025931,2.69,1.0,0.004358,0.166667,2.0,0.0,2.0,0.009804,0.9999997
3,2070,12810391,40941470000.0,619.0,1.0,17.59,311.0,-22.07,2015.0,89.0,...,3.025931,17.59,1.0,0.028496,0.166667,2.0,0.0,2.0,0.009804,0.9999997
11,2070,865456,40826480000.0,610.0,1.0,3.99,311.0,0.0,1300.0,88.0,...,3.025931,3.99,1.0,0.006464,0.166667,2.0,0.0,2.0,0.009804,0.9999997
29,2070,12695224,,,,,,,,,...,3.025931,3.99,1.0,0.006464,0.166667,2.0,0.0,2.0,0.009804,3.94837e-08
4,2070,893018,,,,,,,,,...,3.025931,3.99,1.0,0.006464,0.166667,2.0,0.0,2.0,0.009804,3.94837e-08
2,2070,1046545,,,,,,,,,...,3.025931,2.69,1.0,0.004358,0.166667,2.0,0.0,2.0,0.009804,3.94837e-08
25,2070,1053016,,,,,,,,,...,3.025931,3.99,1.0,0.006464,0.166667,2.0,0.0,2.0,0.009804,3.94837e-08
7,2070,1051323,,,,,,,,,...,3.025931,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.009804,3.94837e-08
24,2070,1042942,,,,,,,,,...,3.025931,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.009804,3.94837e-08
23,2070,1001702,,,,,,,,,...,3.025931,0.0,0.0,0.0,0.0,2.0,0.0,2.0,0.009804,3.94837e-08


In [36]:
ACTUAL_COL = 'actual'
TOPK_PRECISION = 5

# списки фактических покупок из тестового датасета для оценки качества модели курсового проекта
result_eval_ranker = test_data.groupby(USER_COL)[ITEM_COL].unique().reset_index()
result_eval_ranker.columns=[USER_COL, ACTUAL_COL]
result_eval_ranker.head(2)

Unnamed: 0,user_id,actual
0,1,"[880007, 883616, 931136, 938004, 940947, 94726..."
1,6,"[956902, 960791, 1037863, 1119051, 1137688, 84..."


In [37]:
%%time
result_eval_ranker['als_rec'] = result_eval_ranker[USER_COL].apply(lambda x: recommender.get_als_recommendations(x, N=N_PREDICT))

CPU times: user 10.3 s, sys: 8.37 s, total: 18.7 s
Wall time: 9.54 s


In [38]:
sorted(calc_precision(result_eval_ranker, TOPK_PRECISION), key=lambda x: x[1], reverse=True)

[('als_rec', 0.08370414912808212)]

In [39]:
# функция ранжирования с помощью обученной модели
def rerank(user_id):
    return df_ranker_predict[df_ranker_predict[USER_COL]==user_id].sort_values('proba_item_purchase', ascending=False).head(5).item_id.tolist()

In [40]:
result_eval_ranker['reranked_own_rec'] = result_eval_ranker[USER_COL].apply(lambda user_id: rerank(user_id))

In [41]:
result_eval_ranker['reranked_own_rec']

0             [856942, 856942, 9297615, 5577022, 9655212]
1           [1082185, 1024306, 1082185, 6548453, 6548453]
2        [9338009, 5592610, 13039088, 12810393, 10285022]
3       [10344725, 10282046, 12301109, 12172240, 12301...
4           [1056005, 1029743, 1106523, 9469110, 9677100]
                              ...                        
1658           [12810393, 916122, 916122, 899624, 999858]
1659       [1051323, 1081177, 1029743, 1079067, 12810391]
1660        [9487885, 8291322, 8019233, 7104690, 6513604]
1661         [5569327, 1060872, 904129, 1060872, 5568378]
1662      [12385477, 12386122, 9836353, 9707498, 9707240]
Name: reranked_own_rec, Length: 1663, dtype: object

In [42]:
print(*sorted(calc_precision(result_eval_ranker, TOPK_PRECISION), key=lambda x: x[1], reverse=True), sep='\n')

('reranked_own_rec', 0.13048707155742578)
('als_rec', 0.08370414912808212)
