### Основное

Дедлайн - 13 ноября 23:59
Целевая метрика precision@5
Бейзлайн решения - MainRecommender
Сдаем ссылку на github с решением. На github должен быть файл recommendations.csv (user_id | [rec_1, rec_2, ...] с рекомендациями. rec_i - реальные id item-ов (из retail_train.csv)
Hints:

#### Сначала просто попробуйте разные параметры MainRecommender:

N в топ-N товарах при формировании user-item матирцы (сейчас топ-5000)
Различные веса в user-item матрице (0/1, кол-во покупок, log(кол-во покупок + 1), сумма покупки, ...)
Разные взвешивания матрицы (TF-IDF, BM25 - у него есть параметры)
Разные смешивания рекомендаций (обратите внимание на бейзлайн - прошлые покупки юзера)
Сделайте MVP - минимально рабочий продукт - (пусть даже top-popular), а потом его улучшайте

Если вы делаете двухуровневую модель - следите за валидацией

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами
from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit import als

# Модель второго уровня
from lightgbm import LGBMClassifier

import os, sys
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

# Написанные нами функции
from src.metrics import precision_at_k, recall_at_k
from src.utils import prefilter_items
from src.recommenders import MainRecommender

In [2]:
data = pd.read_csv('data/retail_train.csv')
item_features = pd.read_csv('data/product.csv')
user_features = pd.read_csv('data/hh_demographic.csv')

# column processing
item_features.columns = [col.lower() for col in item_features.columns]
user_features.columns = [col.lower() for col in user_features.columns]

item_features.rename(columns={'product_id': 'item_id'}, inplace=True)
user_features.rename(columns={'household_key': 'user_id'}, inplace=True)


# Важна схема обучения и валидации!
# -- давние покупки -- | -- 6 недель -- | -- 3 недель -- 
# подобрать размер 2-ого датасета (6 недель) --> learning curve (зависимость метрики recall@k от размера датасета)
val_lvl_1_size_weeks = 6
val_lvl_2_size_weeks = 3

data_train_lvl_1 = data[data['week_no'] < data['week_no'].max() - (val_lvl_1_size_weeks + val_lvl_2_size_weeks)]
data_val_lvl_1 = data[(data['week_no'] >= data['week_no'].max() - (val_lvl_1_size_weeks + val_lvl_2_size_weeks)) &
                      (data['week_no'] < data['week_no'].max() - (val_lvl_2_size_weeks))]

data_train_lvl_2 = data_val_lvl_1.copy()  # Для наглядности. Далее мы добавим изменения, и они будут отличаться
data_val_lvl_2 = data[data['week_no'] >= data['week_no'].max() - val_lvl_2_size_weeks]

data_train_lvl_1.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0


In [3]:
n_items_before = data_train_lvl_1['item_id'].nunique()

data_train_lvl_1 = prefilter_items(data_train_lvl_1, item_features=item_features, take_n_popular=4000) #5000

n_items_after = data_train_lvl_1['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))

Decreased # items from 83685 to 4001


In [4]:
%%time
# parameters for grid search
params = {'factor': [25, 50, 75, 100, 125, 150, 175], #10 - слишком мало
          'l_reg': [0.001, 0.01, 0.05, 0.1],
          'itr': [10, 15, 25, 35]
         }
recommender = MainRecommender(data_train_lvl_1, params)



Step 1 of 112. Fitting model for factor=25, l_reg=0.001, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.43 sec.
Get recommendation time: 18.08 sec.

Step 2 of 112. Fitting model for factor=25, l_reg=0.001, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.08 sec.
Get recommendation time: 20.2 sec.

Step 3 of 112. Fitting model for factor=25, l_reg=0.001, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.44 sec.
Get recommendation time: 18.18 sec.

Step 4 of 112. Fitting model for factor=25, l_reg=0.001, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 4.68 sec.
Get recommendation time: 18.77 sec.

Step 5 of 112. Fitting model for factor=25, l_reg=0.01, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.31 sec.
Get recommendation time: 17.83 sec.

Step 6 of 112. Fitting model for factor=25, l_reg=0.01, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 1.96 sec.
Get recommendation time: 17.85 sec.

Step 7 of 112. Fitting model for factor=25, l_reg=0.01, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.32 sec.
Get recommendation time: 17.85 sec.

Step 8 of 112. Fitting model for factor=25, l_reg=0.01, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 4.63 sec.
Get recommendation time: 17.99 sec.

Step 9 of 112. Fitting model for factor=25, l_reg=0.05, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.35 sec.
Get recommendation time: 17.89 sec.

Step 10 of 112. Fitting model for factor=25, l_reg=0.05, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 1.96 sec.
Get recommendation time: 17.99 sec.

Step 11 of 112. Fitting model for factor=25, l_reg=0.05, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.51 sec.
Get recommendation time: 18.21 sec.

Step 12 of 112. Fitting model for factor=25, l_reg=0.05, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 4.63 sec.
Get recommendation time: 17.85 sec.

Step 13 of 112. Fitting model for factor=25, l_reg=0.1, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.36 sec.
Get recommendation time: 17.85 sec.

Step 14 of 112. Fitting model for factor=25, l_reg=0.1, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 1.94 sec.
Get recommendation time: 17.94 sec.

Step 15 of 112. Fitting model for factor=25, l_reg=0.1, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.32 sec.
Get recommendation time: 18.31 sec.

Step 16 of 112. Fitting model for factor=25, l_reg=0.1, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 4.49 sec.
Get recommendation time: 17.84 sec.

Step 17 of 112. Fitting model for factor=50, l_reg=0.001, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.43 sec.
Get recommendation time: 19.9 sec.

Step 18 of 112. Fitting model for factor=50, l_reg=0.001, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.15 sec.
Get recommendation time: 20.19 sec.

Step 19 of 112. Fitting model for factor=50, l_reg=0.001, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.51 sec.
Get recommendation time: 19.96 sec.

Step 20 of 112. Fitting model for factor=50, l_reg=0.001, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 5.02 sec.
Get recommendation time: 19.92 sec.

Step 21 of 112. Fitting model for factor=50, l_reg=0.01, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.57 sec.
Get recommendation time: 20.0 sec.

Step 22 of 112. Fitting model for factor=50, l_reg=0.01, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.12 sec.
Get recommendation time: 20.35 sec.

Step 23 of 112. Fitting model for factor=50, l_reg=0.01, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.79 sec.
Get recommendation time: 21.19 sec.

Step 24 of 112. Fitting model for factor=50, l_reg=0.01, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 5.29 sec.
Get recommendation time: 22.19 sec.

Step 25 of 112. Fitting model for factor=50, l_reg=0.05, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.51 sec.
Get recommendation time: 20.04 sec.

Step 26 of 112. Fitting model for factor=50, l_reg=0.05, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.12 sec.
Get recommendation time: 19.98 sec.

Step 27 of 112. Fitting model for factor=50, l_reg=0.05, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.45 sec.
Get recommendation time: 20.0 sec.

Step 28 of 112. Fitting model for factor=50, l_reg=0.05, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 4.88 sec.
Get recommendation time: 20.04 sec.

Step 29 of 112. Fitting model for factor=50, l_reg=0.1, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.42 sec.
Get recommendation time: 19.9 sec.

Step 30 of 112. Fitting model for factor=50, l_reg=0.1, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.1 sec.
Get recommendation time: 19.9 sec.

Step 31 of 112. Fitting model for factor=50, l_reg=0.1, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.41 sec.
Get recommendation time: 20.11 sec.

Step 32 of 112. Fitting model for factor=50, l_reg=0.1, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 4.87 sec.
Get recommendation time: 19.86 sec.

Step 33 of 112. Fitting model for factor=75, l_reg=0.001, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.59 sec.
Get recommendation time: 23.73 sec.

Step 34 of 112. Fitting model for factor=75, l_reg=0.001, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.38 sec.
Get recommendation time: 23.86 sec.

Step 35 of 112. Fitting model for factor=75, l_reg=0.001, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.99 sec.
Get recommendation time: 24.02 sec.

Step 36 of 112. Fitting model for factor=75, l_reg=0.001, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 5.64 sec.
Get recommendation time: 23.98 sec.

Step 37 of 112. Fitting model for factor=75, l_reg=0.01, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.57 sec.
Get recommendation time: 23.96 sec.

Step 38 of 112. Fitting model for factor=75, l_reg=0.01, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.34 sec.
Get recommendation time: 24.0 sec.

Step 39 of 112. Fitting model for factor=75, l_reg=0.01, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 4.01 sec.
Get recommendation time: 23.77 sec.

Step 40 of 112. Fitting model for factor=75, l_reg=0.01, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 5.76 sec.
Get recommendation time: 23.95 sec.

Step 41 of 112. Fitting model for factor=75, l_reg=0.05, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.59 sec.
Get recommendation time: 24.04 sec.

Step 42 of 112. Fitting model for factor=75, l_reg=0.05, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.38 sec.
Get recommendation time: 23.92 sec.

Step 43 of 112. Fitting model for factor=75, l_reg=0.05, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 3.84 sec.
Get recommendation time: 24.0 sec.

Step 44 of 112. Fitting model for factor=75, l_reg=0.05, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 5.55 sec.
Get recommendation time: 23.94 sec.

Step 45 of 112. Fitting model for factor=75, l_reg=0.1, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.6 sec.
Get recommendation time: 24.05 sec.

Step 46 of 112. Fitting model for factor=75, l_reg=0.1, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.4 sec.
Get recommendation time: 23.83 sec.

Step 47 of 112. Fitting model for factor=75, l_reg=0.1, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 4.1 sec.
Get recommendation time: 24.1 sec.

Step 48 of 112. Fitting model for factor=75, l_reg=0.1, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 5.39 sec.
Get recommendation time: 23.92 sec.

Step 49 of 112. Fitting model for factor=100, l_reg=0.001, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.89 sec.
Get recommendation time: 28.41 sec.

Step 50 of 112. Fitting model for factor=100, l_reg=0.001, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.65 sec.
Get recommendation time: 28.31 sec.

Step 51 of 112. Fitting model for factor=100, l_reg=0.001, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 4.28 sec.
Get recommendation time: 28.32 sec.

Step 52 of 112. Fitting model for factor=100, l_reg=0.001, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 6.03 sec.
Get recommendation time: 28.43 sec.

Step 53 of 112. Fitting model for factor=100, l_reg=0.01, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.73 sec.
Get recommendation time: 28.26 sec.

Step 54 of 112. Fitting model for factor=100, l_reg=0.01, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.55 sec.
Get recommendation time: 28.24 sec.

Step 55 of 112. Fitting model for factor=100, l_reg=0.01, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 4.29 sec.
Get recommendation time: 28.32 sec.

Step 56 of 112. Fitting model for factor=100, l_reg=0.01, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 5.99 sec.
Get recommendation time: 28.3 sec.

Step 57 of 112. Fitting model for factor=100, l_reg=0.05, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.9 sec.
Get recommendation time: 28.35 sec.

Step 58 of 112. Fitting model for factor=100, l_reg=0.05, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.59 sec.
Get recommendation time: 28.61 sec.

Step 59 of 112. Fitting model for factor=100, l_reg=0.05, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 4.34 sec.
Get recommendation time: 28.2 sec.

Step 60 of 112. Fitting model for factor=100, l_reg=0.05, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 6.22 sec.
Get recommendation time: 28.85 sec.

Step 61 of 112. Fitting model for factor=100, l_reg=0.1, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 1.77 sec.
Get recommendation time: 28.23 sec.

Step 62 of 112. Fitting model for factor=100, l_reg=0.1, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.63 sec.
Get recommendation time: 28.31 sec.

Step 63 of 112. Fitting model for factor=100, l_reg=0.1, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 4.28 sec.
Get recommendation time: 28.64 sec.

Step 64 of 112. Fitting model for factor=100, l_reg=0.1, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 6.0 sec.
Get recommendation time: 28.36 sec.

Step 65 of 112. Fitting model for factor=125, l_reg=0.001, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.02 sec.
Get recommendation time: 33.09 sec.

Step 66 of 112. Fitting model for factor=125, l_reg=0.001, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.91 sec.
Get recommendation time: 33.94 sec.

Step 67 of 112. Fitting model for factor=125, l_reg=0.001, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 5.01 sec.
Get recommendation time: 33.31 sec.

Step 68 of 112. Fitting model for factor=125, l_reg=0.001, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 6.81 sec.
Get recommendation time: 32.95 sec.

Step 69 of 112. Fitting model for factor=125, l_reg=0.01, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.03 sec.
Get recommendation time: 32.94 sec.

Step 70 of 112. Fitting model for factor=125, l_reg=0.01, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 3.0 sec.
Get recommendation time: 33.09 sec.

Step 71 of 112. Fitting model for factor=125, l_reg=0.01, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 5.05 sec.
Get recommendation time: 33.18 sec.

Step 72 of 112. Fitting model for factor=125, l_reg=0.01, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 7.05 sec.
Get recommendation time: 33.16 sec.

Step 73 of 112. Fitting model for factor=125, l_reg=0.05, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.02 sec.
Get recommendation time: 33.2 sec.

Step 74 of 112. Fitting model for factor=125, l_reg=0.05, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.97 sec.
Get recommendation time: 33.35 sec.

Step 75 of 112. Fitting model for factor=125, l_reg=0.05, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 4.87 sec.
Get recommendation time: 33.38 sec.

Step 76 of 112. Fitting model for factor=125, l_reg=0.05, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 6.81 sec.
Get recommendation time: 33.2 sec.

Step 77 of 112. Fitting model for factor=125, l_reg=0.1, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.03 sec.
Get recommendation time: 33.73 sec.

Step 78 of 112. Fitting model for factor=125, l_reg=0.1, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 2.87 sec.
Get recommendation time: 33.87 sec.

Step 79 of 112. Fitting model for factor=125, l_reg=0.1, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 4.83 sec.
Get recommendation time: 33.91 sec.

Step 80 of 112. Fitting model for factor=125, l_reg=0.1, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 6.83 sec.
Get recommendation time: 33.3 sec.

Step 81 of 112. Fitting model for factor=150, l_reg=0.001, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.17 sec.
Get recommendation time: 41.09 sec.

Step 82 of 112. Fitting model for factor=150, l_reg=0.001, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 3.24 sec.
Get recommendation time: 40.5 sec.

Step 83 of 112. Fitting model for factor=150, l_reg=0.001, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 5.41 sec.
Get recommendation time: 40.57 sec.

Step 84 of 112. Fitting model for factor=150, l_reg=0.001, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 7.58 sec.
Get recommendation time: 39.86 sec.

Step 85 of 112. Fitting model for factor=150, l_reg=0.01, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.28 sec.
Get recommendation time: 39.98 sec.

Step 86 of 112. Fitting model for factor=150, l_reg=0.01, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 3.31 sec.
Get recommendation time: 40.33 sec.

Step 87 of 112. Fitting model for factor=150, l_reg=0.01, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 5.75 sec.
Get recommendation time: 40.47 sec.

Step 88 of 112. Fitting model for factor=150, l_reg=0.01, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 7.84 sec.
Get recommendation time: 40.67 sec.

Step 89 of 112. Fitting model for factor=150, l_reg=0.05, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.21 sec.
Get recommendation time: 40.18 sec.

Step 90 of 112. Fitting model for factor=150, l_reg=0.05, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 3.34 sec.
Get recommendation time: 39.96 sec.

Step 91 of 112. Fitting model for factor=150, l_reg=0.05, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 5.42 sec.
Get recommendation time: 40.45 sec.

Step 92 of 112. Fitting model for factor=150, l_reg=0.05, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 7.56 sec.
Get recommendation time: 40.34 sec.

Step 93 of 112. Fitting model for factor=150, l_reg=0.1, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.22 sec.
Get recommendation time: 39.77 sec.

Step 94 of 112. Fitting model for factor=150, l_reg=0.1, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 3.33 sec.
Get recommendation time: 40.44 sec.

Step 95 of 112. Fitting model for factor=150, l_reg=0.1, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 6.48 sec.
Get recommendation time: 40.97 sec.

Step 96 of 112. Fitting model for factor=150, l_reg=0.1, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 7.82 sec.
Get recommendation time: 40.34 sec.

Step 97 of 112. Fitting model for factor=175, l_reg=0.001, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.44 sec.
Get recommendation time: 47.07 sec.

Step 98 of 112. Fitting model for factor=175, l_reg=0.001, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 3.8 sec.
Get recommendation time: 46.68 sec.

Step 99 of 112. Fitting model for factor=175, l_reg=0.001, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 6.28 sec.
Get recommendation time: 46.52 sec.

Step 100 of 112. Fitting model for factor=175, l_reg=0.001, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 8.81 sec.
Get recommendation time: 46.37 sec.

Step 101 of 112. Fitting model for factor=175, l_reg=0.01, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.51 sec.
Get recommendation time: 47.19 sec.

Step 102 of 112. Fitting model for factor=175, l_reg=0.01, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 4.39 sec.
Get recommendation time: 46.6 sec.

Step 103 of 112. Fitting model for factor=175, l_reg=0.01, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 6.25 sec.
Get recommendation time: 45.32 sec.

Step 104 of 112. Fitting model for factor=175, l_reg=0.01, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 8.77 sec.
Get recommendation time: 46.09 sec.

Step 105 of 112. Fitting model for factor=175, l_reg=0.05, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.55 sec.
Get recommendation time: 46.01 sec.

Step 106 of 112. Fitting model for factor=175, l_reg=0.05, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 3.76 sec.
Get recommendation time: 47.32 sec.

Step 107 of 112. Fitting model for factor=175, l_reg=0.05, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 6.18 sec.
Get recommendation time: 45.69 sec.

Step 108 of 112. Fitting model for factor=175, l_reg=0.05, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 8.72 sec.
Get recommendation time: 45.07 sec.

Step 109 of 112. Fitting model for factor=175, l_reg=0.1, iterations=10:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10.0), HTML(value='')))


Fit time: 2.5 sec.
Get recommendation time: 46.09 sec.

Step 110 of 112. Fitting model for factor=175, l_reg=0.1, iterations=15:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Fit time: 3.9 sec.
Get recommendation time: 47.42 sec.

Step 111 of 112. Fitting model for factor=175, l_reg=0.1, iterations=25:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=25.0), HTML(value='')))


Fit time: 6.15 sec.
Get recommendation time: 46.1 sec.

Step 112 of 112. Fitting model for factor=175, l_reg=0.1, iterations=35:

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))


Fit time: 8.66 sec.
Get recommendation time: 45.56 sec.

                     params  result_train  result_test  fit_time  \
109   als_f-175_lr-0.1_i-15      0.923126     0.169929      3.90   
110   als_f-175_lr-0.1_i-25      0.922084     0.173496      6.15   
107  als_f-175_lr-0.05_i-35      0.921924     0.172069      8.72   
106  als_f-175_lr-0.05_i-25      0.921764     0.175229      6.18   
111   als_f-175_lr-0.1_i-35      0.921363     0.173089      8.66   
..                      ...           ...          ...       ...   
14     als_f-25_lr-0.1_i-25      0.473347     0.106830      3.32   
1    als_f-25_lr-0.001_i-15      0.472946     0.106116      2.08   
3    als_f-25_lr-0.001_i-35      0.471824     0.103772      4.68   
6     als_f-25_lr-0.01_i-25      0.471663     0.106524      3.32   
2    als_f-25_lr-0.001_i-25      0.470782     0.103874      3.44   

     get_rec_time                                           cv_model  \
109         47.42  <implicit.cpu.als.AlternatingLeast

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4001.0), HTML(value='')))


CPU times: user 4h 10min 42s, sys: 2min 33s, total: 4h 13min 16s
Wall time: 1h 3min 50s


In [9]:
result_lvl_1 = data_val_lvl_1.groupby('user_id')['item_id'].unique().reset_index()
result_lvl_1.columns=['user_id', 'actual']
result_lvl_1.head(2)

Unnamed: 0,user_id,actual
0,1,"[853529, 865456, 867607, 872137, 874905, 87524..."
1,2,"[15830248, 838136, 839656, 861272, 866211, 870..."


In [10]:
%%time
_als_recs = []
_own_recs = []
_sim_itm_recs = []
_sim_usr_recs = []
d_recs = dict()
for uid in result_lvl_1['user_id']:
    try:
        _als_recs.append([uid, recommender.get_als_recommendations(uid, N=5)])
    except IndexError:
        _als_recs.append([uid, []])
    try:
        _own_recs.append([uid, recommender.get_own_recommendations(uid, N=5)])
    except (IndexError, ValueError):
        _own_recs.append([uid, []])
    try:
        _sim_itm_recs.append([uid, recommender.get_similar_items_recommendation(uid, N=5)])
    except IndexError:
        _sim_itm_recs.append([uid, []])
    try:
        _sim_usr_recs.append([uid, recommender.get_similar_users_recommendation(uid, N=5)])
    except (IndexError, ValueError):
        _sim_usr_recs.append([uid, []])
d_recs['als'] = _als_recs
d_recs['own'] = _own_recs
d_recs['sim_itm'] = _sim_itm_recs
d_recs['sim_usr'] = _sim_usr_recs
als_recs = pd.DataFrame(d_recs['als'], columns=['uid', 'als'])
own_recs = pd.DataFrame(d_recs['own'], columns=['uid', 'own'])
sim_itm_recs = pd.DataFrame(d_recs['sim_itm'], columns=['uid', 'sim_itm'])
sim_usr_recs = pd.DataFrame(d_recs['sim_usr'], columns=['uid', 'sim_usr'])

CPU times: user 2min 35s, sys: 1.41 s, total: 2min 36s
Wall time: 39 s


In [11]:
result_lvl_1_als = pd.merge(result_lvl_1, als_recs, left_on='user_id', right_on='uid', how='left')
result_lvl_1_als.drop(['uid'], axis=1, inplace=True) 

result_lvl_1_own = pd.merge(result_lvl_1, own_recs, left_on='user_id', right_on='uid', how='left')
result_lvl_1_own.drop(['uid'], axis=1, inplace=True) 

result_lvl_1_sim_itm = pd.merge(result_lvl_1, sim_itm_recs, left_on='user_id', right_on='uid', how='left')
result_lvl_1_sim_itm.drop(['uid'], axis=1, inplace=True) 

result_lvl_1_sim_usr = pd.merge(result_lvl_1, sim_usr_recs, left_on='user_id', right_on='uid', how='left')
result_lvl_1_sim_usr.drop(['uid'], axis=1, inplace=True) 

In [12]:
print(f"recall@k-als: {result_lvl_1_als.apply(lambda row: recall_at_k(row['als'], row['actual']), axis=1).mean()}")
print(f"precision@k-als: {result_lvl_1_als.apply(lambda row: precision_at_k(row['als'], row['actual']), axis=1).mean()}")
print(f"recall@k-own: {result_lvl_1_own.apply(lambda row: recall_at_k(row['own'], row['actual']), axis=1).mean()}")
print(f"recall@k-sim-itm: {result_lvl_1_sim_itm.apply(lambda row: recall_at_k(row['sim_itm'], row['actual']), axis=1).mean()}")
print(f"recall@k-sim-usr: {result_lvl_1_sim_usr.apply(lambda row: recall_at_k(row['sim_usr'], row['actual']), axis=1).mean()}")

recall@k-als: 0.012500804161864863
precision@k-als: 0.11399349139934825
recall@k-own: 0.018946149142845963


  precision = flags.sum() / recommended_list.size


recall@k-sim-itm: 0.0071301557655890045
recall@k-sim-usr: 0.000637689606739862


In [13]:
result_lvl_1_als['recall@k-als'] = result_lvl_1_als.apply(lambda row: recall_at_k(row['als'], row['actual']), axis=1)
result_lvl_1_als.sort_values('recall@k-als', ascending=False, inplace=True)
#result_lvl_1_als_top = result_lvl_1_als.head(50)

result_lvl_1_als['precision@k-als'] = result_lvl_1_als.apply(lambda row: precision_at_k(row['als'], row['actual']), axis=1)
#result_lvl_1_als.sort_values('precision@k-als', ascending=False, inplace=True)
result_lvl_1_als_top = result_lvl_1_als.head(50)

result_lvl_1_own['recall@k-own'] = result_lvl_1_own.apply(lambda row: recall_at_k(row['own'], row['actual']), axis=1)
result_lvl_1_own.sort_values('recall@k-own', ascending=False, inplace=True)
result_lvl_1_own_top = result_lvl_1_own.head(50)

result_lvl_1_sim_itm['recall@k-sim-itm'] = result_lvl_1_sim_itm.apply(lambda row: recall_at_k(row['sim_itm'], row['actual']), axis=1)
result_lvl_1_sim_itm.sort_values('recall@k-sim-itm', ascending=False, inplace=True)
result_lvl_1_sim_itm_top = result_lvl_1_sim_itm.head(50)

result_lvl_1_sim_usr['recall@k-sim-usr'] = result_lvl_1_sim_usr.apply(lambda row: recall_at_k(row['sim_usr'], row['actual']), axis=1)
result_lvl_1_sim_usr.sort_values('recall@k-sim-usr', ascending=False, inplace=True)
result_lvl_1_sim_usr_top = result_lvl_1_sim_usr.head(50)

In [17]:
result_lvl_1_als_top.head()

Unnamed: 0,user_id,actual,als,recall@k-als,precision@k-als
501,589,[5569230],"[5569230, 1102845, 832442, 1072693, 1021324]",1.0,0.2
2095,2433,"[1082185, 9268695]","[1082185, 965267, 1026118, 919681, 831557]",0.5,0.2
1074,1251,"[879948, 901067]","[901067, 5569172, 10285022, 1048373, 880888]",0.5,0.2
2090,2428,"[944486, 6533681]","[996444, 6533681, 924531, 1053329, 1020604]",0.5,0.2
1270,1477,"[961979, 1029743]","[9926758, 961979, 1125278, 986912, 999250]",0.5,0.2


In [15]:
print(f"recall@k-als-top: {result_lvl_1_als.head(50).apply(lambda row: recall_at_k(row['als'], row['actual']), axis=1).mean()}")
print(f"recall@k-own-top: {result_lvl_1_own.head(50).apply(lambda row: recall_at_k(row['own'], row['actual']), axis=1).mean()}")
print(f"recall@k-sim-itm-top: {result_lvl_1_sim_itm.head(50).apply(lambda row: recall_at_k(row['sim_itm'], row['actual']), axis=1).mean()}")
print(f"recall@k-sim-usr-top: {result_lvl_1_sim_usr.head(50).apply(lambda row: recall_at_k(row['sim_usr'], row['actual']), axis=1).mean()}")

recall@k-als-top: 0.19188297063008902
recall@k-own-top: 0.2627805661771177
recall@k-sim-itm-top: 0.14559079685996795
recall@k-sim-usr-top: 0.02747166825835325


In [16]:
recs_output = result_lvl_1_als.copy()
recs_output.drop(['actual', 'recall@k-als', 'precision@k-als'], axis=1, inplace=True)
recs_output.sort_index(inplace=True)
recs_output.rename(columns={'als': 'recommendations'}, inplace=True)
recs_output.to_csv('recommendations_als.csv')

### Задание 2.

Обучите модель 2-ого уровня, при этом:<br>
    - Добавьте минимум по 2 фичи для юзера, товара и пары юзер-товар<br>
    - Измерьте отдельно precision@5 модели 1-ого уровня и двухуровневой модели на data_val_lvl_2<br>
    - Вырос ли precision@5 при использовании двухуровневой модели?

In [18]:
users_lvl_2 = pd.DataFrame(data_train_lvl_2['user_id'].unique())
users_lvl_2.columns = ['user_id']

# Пока только warm start
train_users = result_lvl_1_als_top['user_id'].unique()
users_lvl_2 = users_lvl_2[users_lvl_2['user_id'].isin(train_users)]

users_lvl_2['candidates'] = users_lvl_2['user_id'].apply(lambda x: recommender.get_own_recommendations(x, N=50))

In [19]:
users_lvl_2.head(2)

Unnamed: 0,user_id,candidates
159,930,"[917033, 1016800, 1050741, 854716, 851188, 801..."
215,1449,"[918046, 6463710, 9365106, 1004385, 861246, 74..."


In [20]:
s = users_lvl_2.apply(lambda x: pd.Series(x['candidates']), axis=1).stack().reset_index(level=1, drop=True)
s.name = 'item_id'

users_lvl_2 = users_lvl_2.drop('candidates', axis=1).join(s)
users_lvl_2['flag'] = 1

users_lvl_2.head(4)

Unnamed: 0,user_id,item_id,flag
159,930,917033,1
159,930,1016800,1
159,930,1050741,1
159,930,854716,1


In [21]:
users_lvl_2.shape[0]

2500

In [22]:
users_lvl_2['user_id'].nunique()

50

In [23]:
targets_lvl_2 = data_train_lvl_2[['user_id', 'item_id']].copy()
targets_lvl_2['target'] = 1  # тут только покупки 

targets_lvl_2 = users_lvl_2.merge(targets_lvl_2, on=['user_id', 'item_id'], how='left')

targets_lvl_2['target'].fillna(0, inplace= True)
targets_lvl_2.drop('flag', axis=1, inplace=True)

In [24]:
targets_lvl_2.head(2)

Unnamed: 0,user_id,item_id,target
0,930,917033,0.0
1,930,1016800,0.0


In [25]:
targets_lvl_2['target'].mean()

0.06551588858375834

In [26]:
item_features.head(2)

Unnamed: 0,item_id,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product
0,25671,2,GROCERY,National,FRZN ICE,ICE - CRUSHED/CUBED,22 LB
1,26081,2,MISC. TRANS.,National,NO COMMODITY DESCRIPTION,NO SUBCOMMODITY DESCRIPTION,


In [27]:
user_features.head(2)

Unnamed: 0,age_desc,marital_status_code,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc,user_id
0,65+,A,35-49K,Homeowner,2 Adults No Kids,2,None/Unknown,1
1,45-54,A,50-74K,Homeowner,2 Adults No Kids,2,None/Unknown,7


In [28]:
targets_lvl_2 = targets_lvl_2.merge(item_features, on='item_id', how='left')
targets_lvl_2 = targets_lvl_2.merge(user_features, on='user_id', how='left')

targets_lvl_2.head()

Unnamed: 0,user_id,item_id,target,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product,age_desc,marital_status_code,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc
0,930,917033,0.0,103,GROCERY,National,SOFT DRINKS,SOFT DRINKS 12/18&15PK CAN CAR,12 OZ,,,,,,,
1,930,1016800,0.0,103,GROCERY,National,SOFT DRINKS,SOFT DRINKS 12/18&15PK CAN CAR,12 OZ,,,,,,,
2,930,1050741,1.0,103,GROCERY,National,SOFT DRINKS,SOFT DRINKS 20PK&24PK CAN CARB,12 OZ,,,,,,,
3,930,1050741,1.0,103,GROCERY,National,SOFT DRINKS,SOFT DRINKS 20PK&24PK CAN CARB,12 OZ,,,,,,,
4,930,854716,0.0,2,GROCERY,National,SOFT DRINKS,SOFT DRINKS 20PK&24PK CAN CARB,12 OZ,,,,,,,


In [29]:
X_train = targets_lvl_2.drop('target', axis=1)
y_train = targets_lvl_2[['target']]

In [30]:
cat_feats = X_train.columns[2:].tolist()
X_train[cat_feats] = X_train[cat_feats].astype('category')

cat_feats

['manufacturer',
 'department',
 'brand',
 'commodity_desc',
 'sub_commodity_desc',
 'curr_size_of_product',
 'age_desc',
 'marital_status_code',
 'income_desc',
 'homeowner_desc',
 'hh_comp_desc',
 'household_size_desc',
 'kid_category_desc']

In [2]:
lgb = LGBMClassifier(objective='binary', max_depth=7, categorical_column=cat_feats)
lgb.fit(X_train, y_train)

train_preds = lgb.predict(X_train)

NameError: name 'LGBMClassifier' is not defined

In [1]:
X_train

NameError: name 'X_train' is not defined

In [None]:
len(train_preds)

In [None]:
result_lvl_2 = data_val_lvl_2.groupby('user_id')['item_id'].unique().reset_index()
result_lvl_2.columns=['user_id', 'actual']
result_lvl_2.head(2)

In [None]:
%%time
_own_recs_2 = []
d_recs_2 = dict()
for uid in result_lvl_2['user_id']:
    try:
        _own_recs_2.append([uid, recommender.get_own_recommendations(uid, N=5)])
    except (IndexError, ValueError):
        _own_recs_2.append([uid, []])
d_recs_2['own2'] = _own_recs_2
own_recs_2 = pd.DataFrame(d_recs_2['own2'], columns=['uid', 'own2'])

In [None]:
result_lvl_2_own = pd.merge(result_lvl_2, own_recs_2, left_on='user_id', right_on='uid', how='left')
result_lvl_2_own.drop(['uid'], axis=1, inplace=True)
#result_lvl_2_own.replace(np.NaN, '[]', inplace=True)

In [None]:
result_lvl_2_own

In [None]:
result_lvl_1_own.apply(lambda row: precision_at_k(row['own'], row['actual']), axis=1).mean()

In [None]:
result_lvl_1_own.head(50).apply(lambda row: precision_at_k(row['own'], row['actual']), axis=1).mean()

In [None]:
result_lvl_2_own.apply(lambda row: precision_at_k(row['own2'], row['actual']), axis=1).mean()

In [None]:
X_tr_p = X_train.copy()
X_tr_p['preds'] = pd.Series(train_preds, index=X_tr_p.index)
X_tr_p = X_tr_p[X_tr_p.preds != 0]
lv2 = X_tr_p.groupby('user_id')['item_id'].apply(list).reset_index()
lv2

In [None]:
result_lvl_2_preds = pd.merge(result_lvl_2, lv2, left_on='user_id', right_on='user_id', how='left')
result_lvl_2_preds.replace(np.NaN, '[]', inplace=True)
result_lvl_2_preds.head()

In [None]:
result_lvl_2_preds.apply(lambda row: precision_at_k(row['item_id'], row['actual']), axis=1).mean()

In [None]:
targets_val_lvl_2 = data_val_lvl_2[['user_id', 'item_id']].copy()
targets_val_lvl_2['target'] = 1  # тут только покупки 

targets_val_lvl_2 = users_lvl_2.merge(targets_val_lvl_2, on=['user_id', 'item_id'], how='left')

targets_val_lvl_2['target'].fillna(0, inplace= True)
targets_val_lvl_2.drop('flag', axis=1, inplace=True)

In [None]:
targets_val_lvl_2 = targets_val_lvl_2.merge(item_features, on='item_id', how='left')
targets_val_lvl_2 = targets_val_lvl_2.merge(user_features, on='user_id', how='left')

In [None]:
X_val = targets_val_lvl_2.drop('target', axis=1)
y_val = targets_val_lvl_2[['target']]

In [None]:
v_cat_feats = X_val.columns[2:].tolist()
X_val[v_cat_feats] = X_val[v_cat_feats].astype('category')

In [None]:
val_preds = lgb.predict(X_val)

In [None]:
X_val_p = X_val.copy()
X_val_p['preds'] = pd.Series(val_preds, index=X_val_p.index)
X_val_p = X_val_p[X_val_p.preds != 0]
lv2_v = X_val_p.groupby('user_id')['item_id'].apply(list).reset_index()
lv2_v

In [None]:
result_val_lvl_2_preds = pd.merge(result_lvl_2, lv2_v, left_on='user_id', right_on='user_id', how='left')
result_val_lvl_2_preds.replace(np.NaN, '[]', inplace=True)
result_val_lvl_2_preds.head()

In [None]:
result_val_lvl_2_preds.apply(lambda row: precision_at_k(row['item_id'], row['actual']), axis=1).mean()