In [1]:
import numpy as np
import pandas as pd

from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares
from implicit.nearest_neighbours import bm25_weight

from functools import partial
from sklearn.preprocessing import StandardScaler
from sklearn.base import TransformerMixin

from typing import List

## load & split

In [2]:
purchases = pd.read_csv('retail_train.csv')
purchases.head(3)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0
2,2375,26984851472,1,1036325,1,0.99,364,-0.3,1631,1,0.0,0.0


In [3]:
# train/test split
test_size_weeks = 3

train = purchases[purchases['week_no'] < purchases['week_no'].max() - test_size_weeks].copy()
test = purchases[purchases['week_no'] >= purchases['week_no'].max() - test_size_weeks].copy()

In [4]:
products = pd.read_csv('product.csv')
products.columns = products.columns.str.lower()
products.head(3)

Unnamed: 0,product_id,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product
0,25671,2,GROCERY,National,FRZN ICE,ICE - CRUSHED/CUBED,22 LB
1,26081,2,MISC. TRANS.,National,NO COMMODITY DESCRIPTION,NO SUBCOMMODITY DESCRIPTION,
2,26093,69,PASTRY,Private,BREAD,BREAD:ITALIAN/FRENCH,


In [5]:
# prepare result DataFrame
true_values = test.groupby('user_id')['item_id'].unique().reset_index()
true_values.columns=['user_id', 'actual']
true_values.head(3)

Unnamed: 0,user_id,actual
0,1,"[821867, 834484, 856942, 865456, 889248, 90795..."
1,3,"[835476, 851057, 872021, 878302, 879948, 90963..."
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107..."


## Used functions

In [6]:
class BColor:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

def cprint(msg, color: BColor):
    print(f'{color}{msg}{BColor.ENDC}')

In [7]:
# Топ популярных товаров
def popularity_measure(source, fields: List[str], k=5000, beta: List[float] = None, add_target=None, scaler=None):
    """ Расчет оценки важности товара в покупке и отбор топ K наиболее популярных товаров
    :param source - исходные данные
    :param fields - признаки, по которым измеряется мера важности товара
    :param k - количество товаров, отбираемых в топ
    :param beta - множители значимости для каждого признака в оценке
    :param add_target - название финального признака. Признак не добавляется, если target = None
    :param scaler - класс масштабирования данных
    """
    b = [1.] * len(fields) if beta is None else np.array(beta)
    assert len(fields) == len(b), '`fields` and `beta` dimensions must equal'
    assert issubclass(StandardScaler, TransformerMixin) or scaler is None, 'scaler must be a subclass of TransformerMixin'
    _df = source[['item_id']].copy()
    prepared = scaler().fit_transform(source[fields]) * b if scaler else source[fields] * b
    values = np.linalg.norm(prepared, ord=2, axis=1)
    _df['popularity'] = values
    if add_target:
        source.loc[:, add_target] = values
    popularity = _df.groupby('item_id')['popularity'].sum()
    return popularity.sort_values(ascending=False).head(k).index.tolist()

In [8]:
def check_model(uim, mdl_params, rec_params, res, ttl='als'):
    """
    :param uim: user-item matrix
    :param mdl_params: model init parameters
    :param rec_params: recommendation parameters
    :param res: true values, including user_id
    :param ttl: model title
    :return: predicted values (DataFrame)
    """
    mdl = AlternatingLeastSquares(**mdl_params)
    mdl.fit(uim.T, show_progress=False)
    # rec_params['user_items'] = uim
    res[ttl] = res['user_id'].apply(partial(recommender, mdl=mdl, params=rec_params))
    return mdl

In [9]:
# предсказатель-интерпретатор
def recommender(user_id, mdl, params):
    uid = userid_to_id.get(user_id, None)
    if uid is None:
        return list()
    rec_score = mdl.recommend(userid_to_id[user_id], **params)
    return [id_to_itemid[rec[0]] for rec in rec_score]

In [10]:
# метрики
def precision_at_k(recommended_list, bought_list, k=5):
    flags = np.isin(bought_list, recommended_list[:k])
    return flags.sum() / k

def ap_k(recommended_list, bought_list, k=5):
    flags = np.isin(recommended_list, bought_list)
    if sum(flags) == 0:
        return 0

    func = partial(precision_at_k, recommended_list, bought_list)
    rel_items = np.arange(1, k + 1)[flags[:k]]
    return np.sum(list(map(func, rel_items))) / flags.sum()

In [11]:
# сериализация расчета метрики
def calc_metric(metric_func, source: pd.DataFrame):
    """ Подсчет метрики
    :param metric_func - функция измерения метрики. Первый аргумент - рекомендации, второй - актуальные значения
    :param source - данные для подсчета метрики
    """
    def metric_wrapper(pred, act):
        return metric_func(pred, act) if len(pred) != 0 else 0

    metric = pd.DataFrame()
    for col in source.columns:
        if col == 'user_id':
            metric[col] = source[col]
        elif col == 'actual':
            continue
        else:
            metric[col] = source[[col, 'actual']].apply(lambda row: metric_wrapper(*row.values), axis=1)
    return metric

In [12]:
def compare_metrics(res, saveto=None):
    pr_at_k = calc_metric(partial(precision_at_k, k=5), res)
    ap_at_k = calc_metric(lambda pred, act: ap_k(pred, act, k=min(5, len(pred))), res)
    smr = pd.DataFrame([pr_at_k.mean(), ap_at_k.mean()], index=['precision@k', 'map@k']).drop(columns='user_id')
    if saveto:
        smr.T.to_csv(saveto)
    return smr

In [13]:
# def plot_weight_curve(data: pd.Series, p1=True):
#     """ Построение графиков весов """
#     _val = data.sort_values(ascending=False).values
#     fig, ax = plt.subplots(1, 2, figsize=(12, 3))
#     fig.suptitle(f'Weights curve for {data.name}')
#     ax[0].set_title('clean')
#     ax[0].plot(_val)
#     ax[1].set_title('log-scaled')
#     ax[1].plot(np.log1p(_val) if p1 else np.log(_val))
#     plt.show()

In [14]:
# def apply_weights(uim, wl, axis, top):
#     assert len(wl) == len(axis), 'weights and axis lists must be same-dimensional'
#     res_mat = []
#     for ax, w in zip(axis, wl):
#         if ax in [1, 'u', 'user']:
#             mat = csr_matrix((uim.T * w).T).tocsr()
#         elif ax in [0, 'i', 'item']:
#             w = w[w.index.isin(top)]
#             w[-1] = 1
#             mat = csr_matrix(uim * w).tocsr()
#         else:
#             mat = csr_matrix(uim).tocsr()
#         res_mat.append(mat)
#     return res_mat

## Baseline

In [43]:
# %%time
# # стандартный топ5000 по кол-ву проданных единиц
# top5k = popularity_measure(train, ['quantity'], k=5000, add_target='popularity')
# top5k[:7]

In [44]:
%%time
# берем топ5000 по оценке популярности товара
top5k = popularity_measure(train, ['quantity', 'sales_value'], beta=[1., 1.], k=5000, add_target='popularity', scaler=StandardScaler)
top5k[:7]

CPU times: user 675 ms, sys: 520 ms, total: 1.19 s
Wall time: 5.4 s


[6534178, 6533889, 1082185, 6534166, 6533765, 995242, 981760]

In [45]:
# обработка товаров не из топа
top_train = train.copy()
top_train.loc[~top_train['item_id'].isin(top5k), 'item_id'] = -1     # товары не из топ5000 превращаем в один товар
top_train.head(3)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc,popularity
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0,0.415439
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0,0.549036
2,2375,26984851472,1,1036325,1,0.99,364,-0.3,1631,1,0.0,0.0,0.509067


In [54]:
# подготовка обучающих данных: составление таблицы user-item на основе популярности товара для пользователя
user_item_matrix = pd.pivot_table(top_train,
                                  index='user_id',
                                  columns='item_id',
                                  values='popularity',
                                  aggfunc='sum',
                                  fill_value=0
                                 )

user_item_matrix = user_item_matrix.astype(float)

# переведем в формат sparse matrix
sparse_user_item = csr_matrix(user_item_matrix).tocsr()

In [19]:
user_item_matrix.head(3)

item_id,-1,202291,397896,420647,480014,545926,707683,731106,818980,819063,...,15778533,15831255,15926712,15926775,15926844,15926886,15927403,15927661,15927850,16809471
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,768.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,268.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,233.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
# remap dictionaries
id_to_itemid = dict(enumerate(user_item_matrix.columns.values))
id_to_userid = dict(enumerate(user_item_matrix.index.values))

itemid_to_id = {v: k for k, v in id_to_itemid.items()}
userid_to_id = {v: k for k, v in id_to_userid.items()}

In [56]:
# подготовка параметров обучения модели и параметров предсказателя
model_params = {'factors': 44,
                'regularization': 0.001,
                'iterations': 15,
                'calculate_training_loss': True,
                'use_gpu': False,
                'random_state': 23}

recommender_params = {
    'user_items': sparse_user_item,
    'N': 5, # кол-во рекомендаций
    'filter_already_liked_items': False,
    'filter_items': [itemid_to_id[-1]],
    'recalculate_user': True
}

# sparse_user_item = csr_matrix(bm25_weight(user_item_matrix.T).T).tocsr()

In [57]:
%%time
# обучение
baseline = true_values.copy()
model = check_model(sparse_user_item, model_params, recommender_params, baseline, 'als_baseline')

CPU times: user 1min 24s, sys: 49.5 s, total: 2min 14s
Wall time: 1min 40s


In [27]:
compare_metrics(baseline)

Unnamed: 0,als_baseline
precision@k,0.207738
map@k,0.346116


## Similar-Item recommender

In [36]:
def filter_top_for_users(items, users, measure='popularity', k=5):
    """ Get users top purchases
    :param items: data grouped by users and items
    :param users: user ids array
    :param measure: ranging measure
    :param k: number of items to find
    :return ungrouped dataframe
    """
    filter_mask = (items['user_id'].isin(users)) & (items['item_id'] != -1)
    return items[filter_mask].sort_values(by=['user_id', measure], ascending=[True, False]).groupby('user_id').head(k)

In [None]:
def get_nearest_items(mdl, itemid, k):
    """ Get top K the nearest items to the given
    :param mdl: ALS fitted model
    :param itemid: real item id
    :param k: number of items to find
    :return: list of similar items
    """
    return [id_to_itemid[idx] for idx, _ in mdl.similar_items(itemid=itemid_to_id[itemid], N=k + 1)[1:]]

In [28]:
def basic_item_filter(items, placeholder=-1):
    """ Из списка товаров берем первый, отличный от товара-заглушки, а если таких нет, то возвращаем заглушку """
    return result[0] if (result := [item for item in items if item != -1]) else placeholder

def private_label_item_filter(items, placeholder=-1):
    """ Из списка товаров берем сука только мыло и веревку """
    # убираем товары-заглушки
    if not len(goods := [idx for idx in items if idx != -1]):
        return placeholder
    # фильтруем товары private brand
    filter_mask = products['product_id'].isin(goods) & (products['brand'] == 'Private')
    private_brand = products.loc[filter_mask, 'product_id'].values
    # если такие есть - даем ближайший в рекомендацию, иначе даем просто ближайший
    return private_brand[0] if private_brand.size else goods[0]

In [29]:
def similar_item_recommend(mdl, users, data, measure='popularity', k=5,
                           filter_func=basic_item_filter, placeholder=(), title='similar_items'):
    """ Recommend similar items based on top K purchases
    :param mdl: ALS fitted model
    :param users: user ids to recommend for
    :param data: source dataset
    :param measure: target field in the dataset
    :param k: number of items to recommend
    :param filter_func: additional filters like func(items: list) -> list
    :param placeholder: value to use if no predictions available
    :param title: name of target column
    :return: list of predictions for given user
    """
    # по userid получаем топ покупок пользователей
    all_items = data.groupby(['user_id', 'item_id'])[measure].sum().reset_index()
    items = filter_top_for_users(all_items, users, measure, k)

    # для каждого товара из топа пользователя находим ближайшие K товаров из топ5к
    sim = items['item_id'].apply(lambda x: get_nearest_items(mdl, x, 5))
    # на список похожих товаров применяем нужный фильтр:
    #   по умолчанию берем первый товар, который не-заглушка. если таких нет, то берем указанный placeholder
    if filter_func and callable(filter_func):
        items[title] = sim.apply(filter_func)
    # набор почти готовых рекомендаций
    sim = items.groupby('user_id')[title].unique().reset_index()

    # если пользователь покупал меньше, чем мы хотим порекомендовать, или вообще ничего не покупал
    sizes = items.groupby('user_id').size()
    user_array = np.array(users)
    if (low_pred := user_array[np.isin(users, sizes[sizes < 5].index)]).any():
        cprint(f"Some users have less than {k} predictions!", BColor.WARNING)
        print(low_pred)
        # какая-то обработка подобных ситуаций
        # как вариант, на 1 купленный товар рекомендовать от 2 до k похожих на него, в зависимости от кол-ва купленных
    if (nan_pred := user_array[~np.isin(users, sizes.index)]).any():
        cprint(f"Some users have no predictions at all!", BColor.FAIL)
        print(nan_pred)
        # какая-то обработка подобных ситуаций
        # но пока просто пишем пустоту
        fix_pred = pd.DataFrame({'user_id': nan_pred, title: [np.array(placeholder)] * nan_pred.size})
        sim = pd.concat([sim, fix_pred])

    return sim

In [966]:
# init predictions
predictions = baseline.copy()

In [967]:
%%time
# базовые рекомендации по similar item
basic_similar_items = similar_item_recommend(model, baseline['user_id'], top_train,
                                             k=5, title='basic_similar_items')
predictions = predictions.merge(basic_similar_items, on='user_id', how='left')

[93mSome users have less than 5 predictions![0m
[  75  729 1105 1539 1565 1636 1984 2120 2259 2364 2373 2404]
[91mSome users have no predictions at all![0m
[ 650  954 1987]
CPU times: user 7.6 s, sys: 5.73 s, total: 13.3 s
Wall time: 2.06 s


In [968]:
%%time
# рекомендации по similar item предпочтительно private brand
brand_similar_items = similar_item_recommend(model, baseline['user_id'], top_train,
                                             k=5, title='brand_similar_items', filter_func=private_label_item_filter)
predictions = predictions.merge(brand_similar_items, on='user_id', how='left')

[93mSome users have less than 5 predictions![0m
[  75  729 1105 1539 1565 1636 1984 2120 2259 2364 2373 2404]
[91mSome users have no predictions at all![0m
[ 650  954 1987]
CPU times: user 1min 8s, sys: 5.95 s, total: 1min 14s
Wall time: 1min 3s


In [969]:
predictions.head(3)

Unnamed: 0,user_id,actual,als_baseline,basic_similar_items,brand_similar_items
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[995242, 1082185, 1005186, 5978656, 6534178]","[6534178, 880310, 1029743, 10150194, 1022053]","[981760, 880310, 826249, 10150194, 1022053]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[5569230, 6534178, 1133018, 1082185, 1053690]","[1082185, 12810391, 1075979, 12263692, 847573]","[1062425, 1026586, 896974, 12263692, 847573]"
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107...","[866211, 1127831, 878996, 834484, 854852]","[1082185, 6534178, 948650, 825541, 1135476]","[1062425, 981760, 948650, 825541, 1135476]"


In [970]:
# расчет метрик
compare_metrics(predictions)

Unnamed: 0,als_baseline,basic_similar_items,brand_similar_items
precision@k,0.207444,0.122233,0.06288
map@k,0.345969,0.26392,0.112231


In [971]:
metrics = pd.read_csv('metrics_similar_items.csv')
metrics[metrics['metric'] == 'precision@k']

Unnamed: 0,mode,metric,als_baseline,basic_similar_items,brand_similar_items
0,default,precision@k,0.167091,0.111361,0.076494
2,default (bm25),precision@k,0.201469,0.130558,0.085211
4,mixed top5k,precision@k,0.207444,0.122233,0.06288
6,mixed top5k (bm25),precision@k,0.222625,0.101273,0.077669


## Similar-User recommender

In [24]:
def get_nearest_users(mdl, userid, k):
    """ Get top K the nearest users to the given
    :param mdl: ALS fitted model
    :param userid: real user id
    :param k: number of users to find
    :return: list of similar users
    """
    return [id_to_userid[idx] for idx, _ in mdl.similar_users(userid=userid_to_id[userid], N=k + 1)[1:]]

In [58]:
def basic_user_filter(items, k, placeholder=()):
    """ Из списка товаров берем K первых, отличный от товара-заглушки, а если таких нет, то возвращаем заглушку """
    return result[:k] if (result := [item for item in items if item != -1]) else placeholder

def private_label_user_filter(items, k, placeholder=()):
    """ Из списка товаров берем сука только мыло и веревку """
    # убираем товары-заглушки
    if not (goods := pd.Series([idx for idx in items if idx != -1], dtype=np.int64)).size:
        return placeholder
    # собираем и присоединяем информацию по брендам товаров
    brand_map = products.set_index('product_id')['brand'].to_dict()
    prods = pd.DataFrame({'item_id': goods.values, 'brand': goods.map(brand_map)})
    # фильтруем private brand brand_map топ, не нарушая исходной сортировки
    filter_mask = prods['brand'] == 'Private'
    return pd.concat([prods[filter_mask], prods[~filter_mask]])['item_id'].head(k).values

In [26]:
def similar_user_recommend(mdl, users, data, measure='popularity', k=5,
                           filter_func=basic_user_filter, placeholder=(), title='similar_users'):
    """"""
    # для каждого юзера из запроса находим K ближайших
    sim = pd.Series(users).apply(lambda uid: get_nearest_users(mdl, userid=uid, k=k))
    # для каждого пользователя в запросе составляем общий список товаров из топ К покупок каждого ближайшего пользователя
    # полученные списки содержат наиболее релевантные товары ближайшего(-их) пользователя(-ей)
    all_items = data.groupby(['user_id', 'item_id'])[measure].sum().reset_index()
    items = sim.apply(lambda x: filter_top_for_users(all_items, x, measure, k)['item_id'].drop_duplicates().values)
    # теперь можем дополнительно отфильтровать полученные списки
    #   по умолчанию берем первые К товаров
    items = items.apply(lambda val: filter_func(val, k) if filter_func and callable(filter_func) else lambda x: x[:k])
    # если похожие пользователи мало покупали, то рекомендаций может не хватить
    sizes = items.apply(len)
    if (low_pred := items.index[sizes < k]).any():
        cprint(f"Some users have less than {k} predictions!", BColor.WARNING)
        print(low_pred.tolist())
        # какая-то обработка подобных ситуаций
    if (nan_pred := items.index[sizes == 0]).any():
        cprint(f"Some users have no predictions at all!", BColor.FAIL)
        print(nan_pred.tolist())
        # какая-то обработка подобных ситуаций
    items.name = title
    items.index = users
    return items

In [38]:
# init predictions
predictions = baseline.copy()

In [59]:
%%time
# базовые рекомендации по similar user
basic_similar_users = similar_user_recommend(model, baseline['user_id'], top_train, measure='popularity',
                                             k=5, title='basic_similar_users')
predictions = predictions.merge(basic_similar_users, on='user_id', how='left')

[93mSome users have less than 5 predictions![0m
[511, 770, 1144, 1281, 1625]
CPU times: user 39.7 s, sys: 929 ms, total: 40.7 s
Wall time: 1min 8s


In [60]:
%%time
# рекомендации по similar user предпочтительно private brand
brand_similar_users = similar_user_recommend(model, baseline['user_id'], top_train, measure='popularity',
                                             k=5, title='brand_similar_users', filter_func=private_label_user_filter)
predictions = predictions.merge(brand_similar_users, on='user_id', how='left')

[93mSome users have less than 5 predictions![0m
[511, 770, 1144, 1281, 1625]
CPU times: user 8min 1s, sys: 1.58 s, total: 8min 2s
Wall time: 9min 31s


In [61]:
predictions.head(3)

Unnamed: 0,user_id,actual,als_baseline,basic_similar_users_x,brand_similar_users_x,basic_similar_users_y,brand_similar_users_y,basic_similar_users,brand_similar_users
0,1,"[821867, 834484, 856942, 865456, 889248, 90795...","[1082185, 6534178, 995242, 981760, 1058997]","[6534178, 913449, 1082185, 1040807, 964521]","[6534178, 6533889, 1058997, 1029743, 995242]","[6534178, 1070015, 994928, 1082185, 12263692]","[6534178, 1070015, 994928, 911409, 969494]","[995242, 1082185, 908531, 904240, 1024306]","[995242, 908531, 904240, 862349, 994928]"
1,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[6534178, 1029743, 1106523, 995242, 1082185]","[1080414, 1022872, 1124729, 1085604, 1065593]","[6534178, 953476, 6534166, 1133018, 995242]","[1107597, 6533362, 9553017, 1062128, 942525]","[6534178, 6533765, 1070015, 1029743, 7160764]","[5569230, 949965, 897298, 1021324, 844179]","[907143, 1102139, 6534178, 1070015, 851254]"
2,6,"[920308, 926804, 946489, 1006718, 1017061, 107...","[1082185, 6534178, 1127831, 1007195, 1024306]","[1133018, 908846, 1007195, 1028816, 995242]","[1133018, 995242, 6534178, 6534166, 1029743]","[1028816, 961979, 1034176, 962615, 828106]","[961979, 6534178, 994928, 981760, 904240]","[5996007, 12262978, 1107420, 980353, 1092363]","[5996007, 980353, 1092363, 6534178, 6533765]"


In [62]:
# расчет метрик
compare_metrics(predictions)

Unnamed: 0,als_baseline,basic_similar_users_x,brand_similar_users_x,basic_similar_users_y,brand_similar_users_y,basic_similar_users,brand_similar_users
precision@k,0.201469,0.086582,0.100098,0.090597,0.09569,0.118511,0.114398
map@k,0.357767,0.199868,0.224635,0.199992,0.217748,0.287378,0.285605


In [64]:
metrics = pd.read_csv('metrics_similar_users.csv')
metrics[metrics['metric'] == 'precision@k']

Unnamed: 0,mode,metric,als_baseline,basic_similar_users,brand_similar_users
0,default,precision@k,0.167091,0.117042,0.125661
2,default (bm25),precision@k,0.201469,0.086582,0.100098
4,mixed top5k,precision@k,0.207444,0.118511,0.114398
6,mixed top5k (bm25),precision@k,0.222625,0.090597,0.09569


In [None]:
#