## Подготовка к ДЗ_06

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами
from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit import als

# Модель второго уровня
from lightgbm import LGBMClassifier

import os, sys
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

# Написанные нами функции
from metrics import precision_at_k, recall_at_k
from utils import prefilter_items
from recommenders import MainRecommender

from tqdm.notebook import tqdm

### Read data

In [2]:
data = pd.read_csv('./data/retail_train.csv')
item_features = pd.read_csv('./data/product.csv')
user_features = pd.read_csv('./data/hh_demographic.csv')

### Process features dataset

In [3]:
ITEM_COL = 'item_id'
USER_COL = 'user_id'

# column processing
item_features.columns = [col.lower() for col in item_features.columns]
user_features.columns = [col.lower() for col in user_features.columns]

item_features.rename(columns={'product_id': ITEM_COL}, inplace=True)
user_features.rename(columns={'household_key': USER_COL }, inplace=True)

### Split dataset for train, eval, test

In [4]:
# Важна схема обучения и валидации!
# -- давние покупки -- | -- 6 недель -- | -- 3 недель -- 
# подобрать размер 2-ого датасета (6 недель) --> learning curve (зависимость метрики recall@k от размера датасета)

VAL_MATCHER_WEEKS = 6
VAL_RANKER_WEEKS = 3

# берем данные для тренировки matching модели
data_train_matcher = data[data['week_no'] < data['week_no'].max() - (VAL_MATCHER_WEEKS + VAL_RANKER_WEEKS)]

# берем данные для валидации matching модели
data_val_matcher = data[(data['week_no'] >= data['week_no'].max() - (VAL_MATCHER_WEEKS + VAL_RANKER_WEEKS)) &
                      (data['week_no'] < data['week_no'].max() - (VAL_RANKER_WEEKS))]


# берем данные для тренировки ranking модели
data_train_ranker = data_val_matcher.copy()  # Для наглядности. Далее мы добавим изменения, и они будут отличаться

# берем данные для теста ranking, matching модели
data_val_ranker = data[data['week_no'] >= data['week_no'].max() - VAL_RANKER_WEEKS]

In [5]:
def print_stats_data(df_data, name_df):
    print(name_df)
    print(f"Shape: {df_data.shape} Users: {df_data[USER_COL].nunique()} Items: {df_data[ITEM_COL].nunique()}")

In [6]:
print_stats_data(data_train_matcher,'train_matcher')
print_stats_data(data_val_matcher,'val_matcher')
print_stats_data(data_train_ranker,'train_ranker')
print_stats_data(data_val_ranker,'val_ranker')
# выведем разброс по пользователям и товарам

train_matcher
Shape: (2108779, 12) Users: 2498 Items: 83685
val_matcher
Shape: (169711, 12) Users: 2154 Items: 27649
train_ranker
Shape: (169711, 12) Users: 2154 Items: 27649
val_ranker
Shape: (118314, 12) Users: 2042 Items: 24329


In [7]:
data_train_matcher.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0


In [8]:
data_val_matcher.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
2104867,2070,40618492260,594,1019940,1,1.0,311,-0.29,40,86,0.0,0.0
2107468,2021,40618753059,594,840361,1,0.99,443,0.0,101,86,0.0,0.0


In [9]:
data_train_ranker.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
2104867,2070,40618492260,594,1019940,1,1.0,311,-0.29,40,86,0.0,0.0
2107468,2021,40618753059,594,840361,1,0.99,443,0.0,101,86,0.0,0.0


In [10]:
data_val_ranker.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
2277416,338,41260573635,636,840173,1,1.99,369,0.0,112,92,0.0,0.0
2277417,338,41260573635,636,1037348,1,0.89,369,-0.3,112,92,0.0,0.0


### Prefilter items

In [11]:
n_items_before = data_train_matcher['item_id'].nunique()

data_train_matcher = prefilter_items(data_train_matcher, item_features=item_features, take_n_popular=5000)

n_items_after = data_train_matcher['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))

Decreased # items from 83685 to 5001


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['price'] = data['sales_value'] / (np.maximum(data['quantity'], 1))


### Make cold-start to warm-start

In [12]:
# ищем общих пользователей
common_users = data_train_matcher.user_id.values

data_val_matcher = data_val_matcher[data_val_matcher.user_id.isin(common_users)]
data_train_ranker = data_train_ranker[data_train_ranker.user_id.isin(common_users)]
data_val_ranker = data_val_ranker[data_val_ranker.user_id.isin(common_users)]

print_stats_data(data_train_matcher,'train_matcher')
print_stats_data(data_val_matcher,'val_matcher')
print_stats_data(data_train_ranker,'train_ranker')
print_stats_data(data_val_ranker,'val_ranker')

train_matcher
Shape: (861404, 13) Users: 2495 Items: 5001
val_matcher
Shape: (169615, 12) Users: 2151 Items: 27644
train_ranker
Shape: (169615, 12) Users: 2151 Items: 27644
val_ranker
Shape: (118282, 12) Users: 2040 Items: 24325


### Init/train recommender

In [13]:
recommender = MainRecommender(data_train_matcher)



  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/5001 [00:00<?, ?it/s]

# Задание 1

A) Попробуйте различные варианты генерации кандидатов. Какие из них дают наибольший recall@k ?
- Пока пробуем отобрать 50 кандидатов (k=50)
- Качество измеряем на data_val_matcher: следующие 6 недель после трейна

Дают ли own recommendtions + top-popular лучший recall?  

In [14]:
ACTUAL_COL = 'actual'

In [15]:
result_eval_matcher = data_val_matcher.groupby(USER_COL)[ITEM_COL].unique().reset_index()
result_eval_matcher.columns=[USER_COL, ACTUAL_COL]
result_eval_matcher.head(2)

Unnamed: 0,user_id,actual
0,1,"[853529, 865456, 867607, 872137, 874905, 87524..."
1,2,"[15830248, 838136, 839656, 861272, 866211, 870..."


In [16]:
# N = Neighbors
N_PREDICT = 500

In [17]:
methods = [
    recommender.get_similar_users_recommendation,
    recommender.get_similar_items_recommendation,
    recommender.get_own_recommendations,
    recommender.get_als_recommendations
    ]

In [18]:
%%time

for method in tqdm(methods):
    print(method.__name__)
    result_eval_matcher[method.__name__] = result_eval_matcher[USER_COL].apply(lambda x: method(x, N=N_PREDICT))

  0%|          | 0/4 [00:00<?, ?it/s]

get_similar_users_recommendation
get_similar_items_recommendation
get_own_recommendations
get_als_recommendations
CPU times: user 51min 43s, sys: 9min 19s, total: 1h 1min 2s
Wall time: 19min 16s


In [19]:
recall_array_at_5, recall_array_at_50 = list(), list()

for col in result_eval_matcher.iloc[:,2:]:
    recall_array_at_5.append(result_eval_matcher.apply(lambda row: recall_at_k(row[col], row['actual'], k=5), axis=1).mean())
    recall_array_at_50.append(result_eval_matcher.apply(lambda row: recall_at_k(row[col], row['actual'], k=50), axis=1).mean())

In [20]:
pd.DataFrame({'Algoritm': result_eval_matcher.columns[2:],
              'Recall@5': recall_array_at_5,
              'Recall@50': recall_array_at_50}
            )

Unnamed: 0,Algoritm,Recall@5,Recall@50
0,get_similar_users_recommendation,0.001536,0.007573
1,get_similar_items_recommendation,0.005955,0.03412
2,get_own_recommendations,0.018202,0.065257
3,get_als_recommendations,0.013284,0.048083


get_own_recomendations дают лучший результат

B)* Как зависит recall@k от k? Постройте для одной схемы генерации кандидатов эту зависимость для k = {20, 50, 100, 200, 500}  
C)* Исходя из прошлого вопроса, как вы думаете, какое значение k является наиболее разумным?

In [21]:
result_multi_k = data_val_matcher.groupby(USER_COL)[ITEM_COL].unique().reset_index()
result_multi_k.columns=[USER_COL, ACTUAL_COL]
result_multi_k.head(2)

Unnamed: 0,user_id,actual
0,1,"[853529, 865456, 867607, 872137, 874905, 87524..."
1,2,"[15830248, 838136, 839656, 861272, 866211, 870..."


In [22]:
N = [20, 50, 100, 200, 500]
for n in tqdm(N):
    result_multi_k[n] = result_multi_k[USER_COL].apply(lambda x: recommender.get_own_recommendations(x, N=n))

  0%|          | 0/5 [00:00<?, ?it/s]

In [23]:
result_multi_k.head(2)

Unnamed: 0,user_id,actual,20,50,100,200,500
0,1,"[853529, 865456, 867607, 872137, 874905, 87524...","[856942, 9297615, 5577022, 877391, 9655212, 88...","[856942, 9297615, 5577022, 877391, 9655212, 88...","[856942, 9297615, 5577022, 877391, 9655212, 88...","[856942, 9297615, 5577022, 877391, 9655212, 88...","[856942, 9297615, 5577022, 877391, 9655212, 88..."
1,2,"[15830248, 838136, 839656, 861272, 866211, 870...","[911974, 1076580, 1103898, 5567582, 1056620, 9...","[911974, 1076580, 1103898, 5567582, 1056620, 9...","[911974, 1076580, 1103898, 5567582, 1056620, 9...","[911974, 1076580, 1103898, 5567582, 1056620, 9...","[911974, 1076580, 1103898, 5567582, 1056620, 9..."


In [24]:
recall_array_multi_k = []

for n in N:
    recall_array_multi_k.append(result_multi_k.apply(lambda row: recall_at_k(row[n], row[ACTUAL_COL], k=n), axis=1).mean())
    
pd.DataFrame({'N': N, 'Recall@N': recall_array_multi_k})

Unnamed: 0,N,Recall@N
0,20,0.039284
1,50,0.065257
2,100,0.096045
3,200,0.135373
4,500,0.182053


Recall возвращает процент купленых среди рекомендованных нами товаров. Чем больше количество товаров мы рекомендуем, тем больше recall. В связи с этим, нужно выбирать K такого же размера как и формируемая нами рекомендация, если дальше мы переходим к модели второго уровня. Если у нас будет модель второго уровня для ранжирования, тогда К нужно брать размером с переходящую выборку. Что бы в ней содержалось как можно больше правильных кандидатов.

# Задание 2

Обучите модель 2-ого уровня, при этом:

- Добавьте минимум по 2 фичи для юзера, товара и пары юзер-товар

- Измерьте отдельно precision@5 модели 1-ого уровня и двухуровневой модели на data_val_ranker

- Вырос ли precision@5 при использовании двухуровневой модели?

### Подготовка данных для трейна

In [25]:
N_PREDICT = 500

In [26]:
df_match_candidates = pd.DataFrame(data_train_ranker[USER_COL].unique())
df_match_candidates.columns = [USER_COL]

# собираем кандитатов с первого этапа (matcher)
df_match_candidates['candidates'] = df_match_candidates[USER_COL].apply(lambda x: recommender.get_own_recommendations(x, N=N_PREDICT))

df_match_candidates.head(2)

Unnamed: 0,user_id,candidates
0,2070,"[1105426, 1097350, 879194, 948640, 928263, 944..."
1,2021,"[950935, 1119454, 835578, 863762, 1019142, 102..."


In [27]:
df_items = df_match_candidates.apply(lambda x: pd.Series(x['candidates']), axis=1).stack().reset_index(level=1, drop=True)
df_items.name = ITEM_COL

df_match_candidates = df_match_candidates.drop('candidates', axis=1).join(df_items)

df_match_candidates.head(4)

Unnamed: 0,user_id,item_id
0,2070,1105426
0,2070,1097350
0,2070,879194
0,2070,948640


### Check warm start

In [28]:
print_stats_data(df_match_candidates, 'match_candidates')

match_candidates
Shape: (1075500, 2) Users: 2151 Items: 4628


### Создаем трейн сет для ранжирования с учетом кандидатов с этапа 1 

In [29]:
df_ranker_train = data_train_ranker[[USER_COL, ITEM_COL]].copy()
df_ranker_train['target'] = 1  # тут только покупки 

df_ranker_train.head()

Unnamed: 0,user_id,item_id,target
2104867,2070,1019940,1
2107468,2021,840361,1
2107469,2021,856060,1
2107470,2021,869344,1
2107471,2021,896862,1


#### Не хватает нулей в датасете, поэтому добавляем наших кандитатов в качество нулей

In [30]:
df_ranker_train = df_match_candidates.merge(df_ranker_train, on=[USER_COL, ITEM_COL], how='left')

# чистим дубликаты
df_ranker_train = df_ranker_train.drop_duplicates(subset=[USER_COL, ITEM_COL])

df_ranker_train['target'].fillna(0, inplace= True)

df_ranker_train.target.value_counts()

0.0    989501
1.0     26185
Name: target, dtype: int64

In [31]:
df_ranker_train.head(2)

Unnamed: 0,user_id,item_id,target
0,2070,1105426,0.0
1,2070,1097350,0.0


In [32]:
df_ranker_train['target'].mean()

0.02578060542332965

### Подготавливаем фичи для обучения модели

In [33]:
df_ranker_train = df_ranker_train.merge(item_features, on=ITEM_COL, how='left')
df_ranker_train = df_ranker_train.merge(user_features, on=USER_COL, how='left')

df_ranker_train.head(2)

Unnamed: 0,user_id,item_id,target,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product,age_desc,marital_status_code,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc
0,2070,1105426,0.0,69,DELI,Private,SANDWICHES,SANDWICHES - (COLD),,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown
1,2070,1097350,0.0,2468,GROCERY,National,DOMESTIC WINE,VALUE GLASS WINE,4 LTR,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown


In [34]:
df_ranker_train.head(2)

Unnamed: 0,user_id,item_id,target,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product,age_desc,marital_status_code,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc
0,2070,1105426,0.0,69,DELI,Private,SANDWICHES,SANDWICHES - (COLD),,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown
1,2070,1097350,0.0,2468,GROCERY,National,DOMESTIC WINE,VALUE GLASS WINE,4 LTR,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown


In [35]:
average_bill = data_train_ranker.groupby(['user_id', 'basket_id'], as_index=False)['sales_value'].sum()
average_bill = average_bill.groupby('user_id', as_index=False).mean()
average_bill = average_bill.drop('basket_id', axis=1)
average_bill.rename(columns={'sales_value': 'average_bill'}, inplace=True)
df_ranker_train = df_ranker_train.merge(average_bill, on='user_id', how='left')

In [36]:
num_pur = data_train_ranker.merge(item_features, on='item_id', how='left')
num_pur = num_pur.groupby(['user_id', 'department'], as_index=False)['quantity'].sum()
num_pur.rename(columns={'quantity': 'num_pur'}, inplace=True)
num_pur['num_pur'].fillna(0, inplace=True)
df_ranker_train = df_ranker_train.merge(num_pur, on=['user_id', 'department'], how='left')

In [37]:
price = data_train_ranker.loc[:,('item_id', 'quantity', 'sales_value')]
price = price.groupby('item_id', as_index=False)[['quantity', 'sales_value']].sum()
price['price'] = price['sales_value'] / price['quantity']
price = price.drop(['quantity', 'sales_value'], axis=1)
df_ranker_train = df_ranker_train.merge(price, on='item_id', how='left')

In [38]:
goods_per_week = data_train_ranker.groupby(['item_id', 'week_no'], as_index=False)['quantity'].sum()
goods_per_week = goods_per_week.groupby('item_id', as_index=False)['quantity'].mean()
goods_per_week.rename(columns={'quantity': 'goods_per_week'}, inplace=True)
goods_per_week['goods_per_week'].fillna(0, inplace=True)
df_ranker_train = df_ranker_train.merge(goods_per_week, on='item_id', how='left')

In [39]:
df_ranker_train.head(2)

Unnamed: 0,user_id,item_id,target,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product,age_desc,marital_status_code,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc,average_bill,num_pur,price,goods_per_week
0,2070,1105426,0.0,69,DELI,Private,SANDWICHES,SANDWICHES - (COLD),,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown,14.355581,3.0,3.99,1.666667
1,2070,1097350,0.0,2468,GROCERY,National,DOMESTIC WINE,VALUE GLASS WINE,4 LTR,45-54,U,50-74K,Unknown,Unknown,1,None/Unknown,14.355581,213.0,10.99,1.0


In [40]:
X_train = df_ranker_train.drop('target', axis=1)
y_train = df_ranker_train[['target']]

In [41]:
cat_feats = X_train.columns[2:].tolist()
X_train[cat_feats] = X_train[cat_feats].astype('category')

cat_feats

['manufacturer',
 'department',
 'brand',
 'commodity_desc',
 'sub_commodity_desc',
 'curr_size_of_product',
 'age_desc',
 'marital_status_code',
 'income_desc',
 'homeowner_desc',
 'hh_comp_desc',
 'household_size_desc',
 'kid_category_desc',
 'average_bill',
 'num_pur',
 'price',
 'goods_per_week']

### Обучение модели ранжирования

In [42]:
lgb = LGBMClassifier(objective='binary',
                     max_depth=8,
                     n_estimators=300,
                     learning_rate=0.05,
                     categorical_column=cat_feats)

lgb.fit(X_train, y_train)

train_preds = lgb.predict_proba(X_train)

  return f(*args, **kwargs)


In [43]:
df_ranker_predict = df_ranker_train.copy()

In [44]:
df_ranker_predict['proba_item_purchase'] = train_preds[:,1]

Мы обучили модель ранжирования на покупках из сета data_train_ranker и на кандитатах от own_recommendations, что является тренировочным сетом, и теперь наша задача предсказать и оценить именно на тестовом сете.

In [45]:
df_ranker_predict.head()

Unnamed: 0,user_id,item_id,target,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product,age_desc,...,income_desc,homeowner_desc,hh_comp_desc,household_size_desc,kid_category_desc,average_bill,num_pur,price,goods_per_week,proba_item_purchase
0,2070,1105426,0.0,69,DELI,Private,SANDWICHES,SANDWICHES - (COLD),,45-54,...,50-74K,Unknown,Unknown,1,None/Unknown,14.355581,3.0,3.99,1.666667,0.042092
1,2070,1097350,0.0,2468,GROCERY,National,DOMESTIC WINE,VALUE GLASS WINE,4 LTR,45-54,...,50-74K,Unknown,Unknown,1,None/Unknown,14.355581,213.0,10.99,1.0,0.026125
2,2070,879194,0.0,69,DRUG GM,Private,DIAPERS & DISPOSABLES,BABY DIAPERS,14 CT,45-54,...,50-74K,Unknown,Unknown,1,None/Unknown,14.355581,31.0,,,0.042652
3,2070,948640,0.0,1213,DRUG GM,National,ORAL HYGIENE PRODUCTS,WHITENING SYSTEMS,3 OZ,45-54,...,50-74K,Unknown,Unknown,1,None/Unknown,14.355581,31.0,,,0.004805
4,2070,928263,0.0,69,DRUG GM,Private,DIAPERS & DISPOSABLES,BABY DIAPERS,13 CT,45-54,...,50-74K,Unknown,Unknown,1,None/Unknown,14.355581,31.0,7.99,2.2,0.031278


### Evaluation on test dataset

In [46]:
result_eval_ranker = data_val_ranker.groupby(USER_COL)[ITEM_COL].unique().reset_index()
result_eval_ranker.columns=[USER_COL, ACTUAL_COL]
result_eval_ranker.head(2)

Unnamed: 0,user_id,actual
0,1,"[821867, 834484, 856942, 865456, 889248, 90795..."
1,3,"[835476, 851057, 872021, 878302, 879948, 90963..."


### Eval matching on test dataset

In [47]:
%%time
result_eval_ranker['own_rec'] = result_eval_ranker[USER_COL].apply(lambda x: recommender.get_own_recommendations(x, N=N_PREDICT))

CPU times: user 2.32 s, sys: 16.4 ms, total: 2.34 s
Wall time: 2.31 s


In [48]:
def calc_precision(df_data, top_k):
    for col_name in df_data.columns[2:]:
        yield col_name, df_data.apply(lambda row: precision_at_k(row[col_name], row[ACTUAL_COL], k=top_k), axis=1).mean()

In [49]:
TOPK_PRECISION = 5

In [50]:
# померяем precision только модели матчинга, чтобы понимать влияение ранжирования на метрики

sorted(calc_precision(result_eval_ranker, TOPK_PRECISION), key=lambda x: x[1], reverse=True)

[('own_rec', 0.1444117647058813)]

### Eval re-ranked matched result on test dataset
    Вспомним df_match_candidates сет, который был получен own_recommendations на юзерах, набор пользователей мы фиксировали и он одинаков, значи и прогноз одинаков, поэтому мы можем использовать этот датафрейм для переранжирования.

In [51]:
def rerank(user_id):
    return df_ranker_predict[df_ranker_predict[USER_COL]==user_id].sort_values('proba_item_purchase', ascending=False).head(5).item_id.tolist()

In [52]:
result_eval_ranker['reranked_own_rec'] = result_eval_ranker[USER_COL].apply(lambda user_id: rerank(user_id))

In [53]:
print(*sorted(calc_precision(result_eval_ranker, TOPK_PRECISION), key=lambda x: x[1], reverse=True), sep='\n')

('reranked_own_rec', 0.21785900783289597)
('own_rec', 0.1444117647058813)


  return flags.sum() / len(recommended_list)


результат с 
N_PREDICT = 500 (обучение в 10 раз дольше, чем N=50)
с 4 фичами average_bill num_pur price goods_per_week

* ('reranked_own_rec', 0.21785900783289597)
* ('own_rec', 0.1444117647058813)

результат с 
N_PREDICT = 500 (обучение в 10 раз дольше, чем N=50)
с 3 фичами average_bill num_pur price

* ('reranked_own_rec', 0.21493472584856121)
* ('own_rec', 0.1444117647058813)

результат с 
N_PREDICT = 500 (обучение в 10 раз дольше, чем N=50)
с 2 фичами average_bill num_pur

* ('reranked_own_rec', 0.19634464751957956)
* ('own_rec', 0.1444117647058813)

результат с 
N_PREDICT = 500 (обучение в 10 раз дольше, чем N=50)
с 1 фичей average_bill

* ('reranked_own_rec', 0.1989556135770208)
* ('own_rec', 0.1444117647058813)

результат с 
N_PREDICT = 50
с 1 фичей average_bill

* ('reranked_own_rec', 0.16417754569190462)
* ('own_rec', 0.1444117647058813)

без добавления фичей - prescision@5 вырос при использовании двухуровневой модели
* ('reranked_own_rec', 0.15331592689294912)
* ('own_rec', 0.1444117647058813)