# Вебинар 6. Двухуровневые модели рекомендаций


Код для src, utils, metrics вы можете скачать из [этого](https://github.com/geangohn/recsys-tutorial) github репозитория

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Для работы с матрицами
from scipy.sparse import csr_matrix
from implicit.nearest_neighbours import bm25_weight, tfidf_weight

# Матричная факторизация
from implicit import als

# Модель второго уровня
from lightgbm import LGBMClassifier

import os, sys
module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)

# Написанные нами функции
from metrics import precision_at_k, recall_at_k
from utils import prefilter_items
#from recommenders import MainRecommender
from implicit.als import AlternatingLeastSquares

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv('../retail_train.csv')
item_features = pd.read_csv('../product.csv')
user_features = pd.read_csv('../hh_demographic.csv')

# column processing
item_features.columns = [col.lower() for col in item_features.columns]
user_features.columns = [col.lower() for col in user_features.columns]

item_features.rename(columns={'product_id': 'item_id'}, inplace=True)
user_features.rename(columns={'household_key': 'user_id'}, inplace=True)


# Важна схема обучения и валидации!
# -- давние покупки -- | -- 6 недель -- | -- 3 недель -- 
# подобрать размер 2-ого датасета (6 недель) --> learning curve (зависимость метрики recall@k от размера датасета)
val_lvl_1_size_weeks = 6
val_lvl_2_size_weeks = 3

data_train_lvl_1 = data[data['week_no'] < data['week_no'].max() - (val_lvl_1_size_weeks + val_lvl_2_size_weeks)]
data_val_lvl_1 = data[(data['week_no'] >= data['week_no'].max() - (val_lvl_1_size_weeks + val_lvl_2_size_weeks)) &
                      (data['week_no'] < data['week_no'].max() - (val_lvl_2_size_weeks))]

data_train_lvl_2 = data_val_lvl_1.copy()  # Для наглядности. Далее мы добавим изменения, и они будут отличаться
data_val_lvl_2 = data[data['week_no'] >= data['week_no'].max() - val_lvl_2_size_weeks]

data_train_lvl_1.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0


In [3]:
n_items_before = data_train_lvl_1['item_id'].nunique()

prefiltered_train_lvl_1 = prefilter_items(data_train_lvl_1)

n_items_after = prefiltered_train_lvl_1['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))

Decreased # items from 83685 to 5000


In [4]:
prefiltered_train_lvl_1.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0


In [5]:
user_item_matrix = pd.pivot_table(prefiltered_train_lvl_1, index='user_id', columns='item_id', values='quantity',
                                      aggfunc='count', fill_value=0)
user_item_matrix

item_id,202291,397896,420647,480014,545926,707683,731106,818980,819063,819255,...,15511891,15596279,15596488,15596515,15778533,15926844,15926886,15927403,15927661,15927850
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,1,...,0,2,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2496,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2497,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2498,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2499,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


### Задание 1



Дают ли own recommendtions + top-popular лучший recall?  

 
C)* Исходя из прошлого вопроса, как вы думаете, какое значение k является наиболее разумным?


In [6]:
result_lvl_1 = data_val_lvl_1.groupby('user_id')['item_id'].unique().reset_index()
result_lvl_1.columns=['user_id', 'actual']
result_lvl_1.head(2)

Unnamed: 0,user_id,actual
0,1,"[853529, 865456, 867607, 872137, 874905, 87524..."
1,2,"[15830248, 838136, 839656, 861272, 866211, 870..."


A) Попробуйте различные варианты генерации кандидатов. Какие из них дают наибольший recall@k ?
- Пока пробуем отобрать 50 кандидатов (k=50)
- Качество измеряем на data_val_lvl_1: следующие 6 недель после трейна
B)* Как зависит recall@k от k? Постройте для одной схемы генерации кандидатов эту зависимость для k = {20, 50, 100, 200, 500} 

In [7]:
def prepare_matrx(data):
        
    user_item_matrix = pd.pivot_table(data, index='user_id', columns='item_id', values='quantity',
                                      aggfunc='count', fill_value=0)
    
    user_item_matrix = user_item_matrix.astype(float)  # необходимый тип матрицы для implicit
    
    return user_item_matrix

In [8]:
def prepare_dict(user_item_matrix):
    userids = user_item_matrix.index.values
    itemids = user_item_matrix.columns.values
    matrix_userids = np.arange(len(userids))
    matrix_itemids = np.arange(len(itemids))

    id_to_itemid = dict(zip(matrix_itemids, itemids))
    id_to_userid = dict(zip(matrix_userids, userids))
    itemid_to_id = dict(zip(itemids, matrix_itemids))
    userid_to_id = dict(zip(userids, matrix_userids))

    return id_to_itemid, id_to_userid, itemid_to_id, userid_to_id

In [9]:
def upd_dict(self, user_id):
    if user_id not in userid_to_id.keys():
        max_id = max(list(userid_to_id.values()))
        max_id += 1
        
        userid_to_id.update({user_id: max_id})
        id_to_userid.update({max_id: user_id})
    return id_to_itemid, id_to_userid, itemid_to_id, userid_to_id

In [10]:
def fit(data, n_factors=20, regularization=0.001, iterations=15, num_threads=4):
    user_item_matrix = prepare_matrx(data)
    user_item_matrix = bm25_weight(user_item_matrix.T).T
    model = AlternatingLeastSquares(factors=n_factors, regularization=regularization,
                                    iterations=iterations, num_threads=num_threads)
    model.fit(csr_matrix(user_item_matrix).T.tocsr())

    return model

In [11]:
def get_als_recommends(prefiltered_data, user, unfiltered_data, model, N=50):
    if user in prefiltered_data['user_id'].unique():
        prefiltered_matrix = prepare_matrx(prefiltered_data)
        id_to_itemid, id_to_userid, itemid_to_id, userid_to_id = prepare_dict(prefiltered_matrix)
        prefiltered_matrix = bm25_weight(prefiltered_matrix.T).T
        print(f" old {userid_to_id[user]}")
        rec = model.recommend(userid=userid_to_id[user], user_items=csr_matrix(prefiltered_matrix).tocsr(),
                              N=N, filter_already_liked_items=False, recalculate_user=True)
        rec_ids = [id_to_itemid[f[0]] for f in rec]
        
    else:
        
        new_df = pd.concat([prefiltered_data, unfiltered_data.loc[unfiltered_data['user_id']==user, :]])
        prefiltered_matrix = prepare_matrx(new_df)
        id_to_itemid, id_to_userid, itemid_to_id, userid_to_id = prepare_dict(prefiltered_matrix)
        prefiltered_matrix = bm25_weight(prefiltered_matrix.T).T
        rec = als_model.recommend(userid=userid_to_id[user], user_items=csr_matrix(new_df).tocsr(),
                              N=N, filter_already_liked_items=False, recalculate_user=True)
        rec_ids = [id_to_itemid[f[0]] for f in rec]
        
    return rec_ids

In [12]:
als_model = fit(prefiltered_train_lvl_1)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:04<00:00,  3.04it/s]


In [13]:
num_candidates = [20, 50, 100, 200, 500]

In [14]:
%%time

for k in num_candidates:
    column_name = f'als{k}'
    result_lvl_1[column_name] = result_lvl_1['user_id'].apply(lambda x: get_als_recommends(prefiltered_train_lvl_1,
                                                                                     x, data_val_lvl_1, als_model, N=k))


 old 0
 old 1
 old 3
 old 5
 old 6
 old 7
 old 8
 old 12
 old 13
 old 14
 old 15
 old 16
 old 17
 old 18
 old 19
 old 20
 old 21
 old 22
 old 23
 old 24
 old 25
 old 26
 old 27
 old 28
 old 29
 old 30
 old 31
 old 32
 old 33
 old 34
 old 35
 old 36
 old 38
 old 39
 old 40
 old 41
 old 42
 old 43
 old 45
 old 46
 old 48
 old 50
 old 51
 old 52
 old 54
 old 55
 old 56
 old 57
 old 58
 old 61
 old 62
 old 63
 old 64
 old 65
 old 66
 old 67
 old 68
 old 69
 old 70
 old 72
 old 73
 old 74
 old 75
 old 76
 old 77
 old 78
 old 79
 old 80
 old 81
 old 82
 old 83
 old 84
 old 85
 old 86
 old 89
 old 90
 old 92
 old 93
 old 95
 old 96
 old 97
 old 98
 old 99
 old 101
 old 102
 old 103
 old 104
 old 105
 old 106
 old 107
 old 108
 old 109
 old 110
 old 111
 old 112
 old 113
 old 114
 old 115
 old 116
 old 117
 old 119
 old 120
 old 121
 old 122
 old 123
 old 124
 old 125
 old 126
 old 127
 old 129
 old 130
 old 131
 old 132
 old 133
 old 134
 old 135
 old 136
 old 137
 old 138
 old 140
 old 141
 

 old 1067
 old 1068
 old 1070
 old 1071
 old 1072
 old 1073
 old 1074
 old 1075
 old 1076
 old 1077
 old 1078
 old 1079
 old 1080
 old 1081
 old 1082
 old 1083
 old 1084
 old 1086
 old 1087
 old 1088
 old 1089
 old 1090
 old 1092
 old 1093
 old 1094
 old 1096
 old 1097
 old 1098
 old 1099
 old 1100
 old 1101
 old 1102
 old 1103
 old 1104
 old 1106
 old 1107
 old 1108
 old 1109
 old 1110
 old 1111
 old 1113
 old 1114
 old 1115
 old 1116
 old 1117
 old 1118
 old 1119
 old 1120
 old 1121
 old 1122
 old 1123
 old 1124
 old 1125
 old 1126
 old 1127
 old 1128
 old 1129
 old 1130
 old 1131
 old 1132
 old 1133
 old 1135
 old 1136
 old 1137
 old 1138
 old 1139
 old 1141
 old 1142
 old 1143
 old 1144
 old 1145
 old 1146
 old 1147
 old 1148
 old 1149
 old 1150
 old 1151
 old 1153
 old 1154
 old 1155
 old 1156
 old 1157
 old 1158
 old 1159
 old 1161
 old 1162
 old 1163
 old 1164
 old 1165
 old 1166
 old 1167
 old 1168
 old 1169
 old 1170
 old 1171
 old 1174
 old 1175
 old 1176
 old 1177
 old 1178


 old 2000
 old 2001
 old 2002
 old 2004
 old 2005
 old 2006
 old 2007
 old 2008
 old 2009
 old 2010
 old 2011
 old 2012
 old 2014
 old 2015
 old 2016
 old 2017
 old 2020
 old 2021
 old 2022
 old 2023
 old 2024
 old 2025
 old 2027
 old 2028
 old 2030
 old 2032
 old 2033
 old 2034
 old 2035
 old 2036
 old 2037
 old 2040
 old 2042
 old 2043
 old 2044
 old 2045
 old 2047
 old 2049
 old 2050
 old 2051
 old 2052
 old 2055
 old 2056
 old 2057
 old 2058
 old 2061
 old 2062
 old 2063
 old 2064
 old 2065
 old 2066
 old 2067
 old 2069
 old 2070
 old 2071
 old 2072
 old 2073
 old 2074
 old 2075
 old 2076
 old 2077
 old 2080
 old 2081
 old 2082
 old 2084
 old 2085
 old 2087
 old 2088
 old 2089
 old 2091
 old 2094
 old 2095
 old 2096
 old 2097
 old 2098
 old 2099
 old 2100
 old 2101
 old 2102
 old 2103
 old 2104
 old 2106
 old 2107
 old 2108
 old 2109
 old 2110
 old 2111
 old 2112
 old 2113
 old 2114
 old 2116
 old 2117
 old 2118
 old 2119
 old 2120
 old 2121
 old 2122
 old 2123
 old 2124
 old 2125


 old 542
 old 543
 old 544
 old 545
 old 547
 old 548
 old 549
 old 550
 old 551
 old 553
 old 554
 old 556
 old 557
 old 558
 old 559
 old 560
 old 563
 old 564
 old 565
 old 567
 old 569
 old 570
 old 571
 old 572
 old 573
 old 574
 old 575
 old 576
 old 577
 old 578
 old 579
 old 581
 old 582
 old 583
 old 584
 old 585
 old 586
 old 587
 old 588
 old 589
 old 590
 old 591
 old 592
 old 593
 old 594
 old 596
 old 597
 old 598
 old 599
 old 600
 old 601
 old 602
 old 603
 old 604
 old 605
 old 607
 old 608
 old 610
 old 611
 old 612
 old 613
 old 614
 old 616
 old 617
 old 618
 old 619
 old 620
 old 621
 old 622
 old 623
 old 624
 old 625
 old 626
 old 627
 old 628
 old 630
 old 631
 old 632
 old 633
 old 634
 old 636
 old 638
 old 639
 old 641
 old 642
 old 643
 old 646
 old 647
 old 648
 old 649
 old 650
 old 652
 old 653
 old 654
 old 655
 old 656
 old 657
 old 658
 old 659
 old 660
 old 661
 old 662
 old 663
 old 664
 old 665
 old 666
 old 667
 old 668
 old 669
 old 670
 old 673
 

 old 1533
 old 1534
 old 1535
 old 1536
 old 1537
 old 1538
 old 1539
 old 1541
 old 1542
 old 1544
 old 1545
 old 1546
 old 1547
 old 1548
 old 1549
 old 1550
 old 1552
 old 1553
 old 1554
 old 1555
 old 1556
 old 1557
 old 1559
 old 1560
 old 1561
 old 1562
 old 1563
 old 1564
 old 1565
 old 1566
 old 1567
 old 1568
 old 1569
 old 1570
 old 1571
 old 1573
 old 1574
 old 1575
 old 1576
 old 1577
 old 1578
 old 1579
 old 1580
 old 1581
 old 1582
 old 1583
 old 1584
 old 1585
 old 1586
 old 1587
 old 1588
 old 1589
 old 1591
 old 1593
 old 1594
 old 1595
 old 1596
 old 1598
 old 1600
 old 1601
 old 1602
 old 1603
 old 1605
 old 1607
 old 1608
 old 1609
 old 1610
 old 1611
 old 1612
 old 1613
 old 1614
 old 1616
 old 1618
 old 1619
 old 1620
 old 1621
 old 1622
 old 1623
 old 1624
 old 1626
 old 1627
 old 1628
 old 1629
 old 1631
 old 1632
 old 1633
 old 1636
 old 1637
 old 1638
 old 1639
 old 1640
 old 1641
 old 1642
 old 1643
 old 1645
 old 1646
 old 1647
 old 1648
 old 1649
 old 1650


 old 2483
 old 0
 old 1
 old 3
 old 5
 old 6
 old 7
 old 8
 old 12
 old 13
 old 14
 old 15
 old 16
 old 17
 old 18
 old 19
 old 20
 old 21
 old 22
 old 23
 old 24
 old 25
 old 26
 old 27
 old 28
 old 29
 old 30
 old 31
 old 32
 old 33
 old 34
 old 35
 old 36
 old 38
 old 39
 old 40
 old 41
 old 42
 old 43
 old 45
 old 46
 old 48
 old 50
 old 51
 old 52
 old 54
 old 55
 old 56
 old 57
 old 58
 old 61
 old 62
 old 63
 old 64
 old 65
 old 66
 old 67
 old 68
 old 69
 old 70
 old 72
 old 73
 old 74
 old 75
 old 76
 old 77
 old 78
 old 79
 old 80
 old 81
 old 82
 old 83
 old 84
 old 85
 old 86
 old 89
 old 90
 old 92
 old 93
 old 95
 old 96
 old 97
 old 98
 old 99
 old 101
 old 102
 old 103
 old 104
 old 105
 old 106
 old 107
 old 108
 old 109
 old 110
 old 111
 old 112
 old 113
 old 114
 old 115
 old 116
 old 117
 old 119
 old 120
 old 121
 old 122
 old 123
 old 124
 old 125
 old 126
 old 127
 old 129
 old 130
 old 131
 old 132
 old 133
 old 134
 old 135
 old 136
 old 137
 old 138
 old 140


 old 1066
 old 1067
 old 1068
 old 1070
 old 1071
 old 1072
 old 1073
 old 1074
 old 1075
 old 1076
 old 1077
 old 1078
 old 1079
 old 1080
 old 1081
 old 1082
 old 1083
 old 1084
 old 1086
 old 1087
 old 1088
 old 1089
 old 1090
 old 1092
 old 1093
 old 1094
 old 1096
 old 1097
 old 1098
 old 1099
 old 1100
 old 1101
 old 1102
 old 1103
 old 1104
 old 1106
 old 1107
 old 1108
 old 1109
 old 1110
 old 1111
 old 1113
 old 1114
 old 1115
 old 1116
 old 1117
 old 1118
 old 1119
 old 1120
 old 1121
 old 1122
 old 1123
 old 1124
 old 1125
 old 1126
 old 1127
 old 1128
 old 1129
 old 1130
 old 1131
 old 1132
 old 1133
 old 1135
 old 1136
 old 1137
 old 1138
 old 1139
 old 1141
 old 1142
 old 1143
 old 1144
 old 1145
 old 1146
 old 1147
 old 1148
 old 1149
 old 1150
 old 1151
 old 1153
 old 1154
 old 1155
 old 1156
 old 1157
 old 1158
 old 1159
 old 1161
 old 1162
 old 1163
 old 1164
 old 1165
 old 1166
 old 1167
 old 1168
 old 1169
 old 1170
 old 1171
 old 1174
 old 1175
 old 1176
 old 1177


 old 1999
 old 2000
 old 2001
 old 2002
 old 2004
 old 2005
 old 2006
 old 2007
 old 2008
 old 2009
 old 2010
 old 2011
 old 2012
 old 2014
 old 2015
 old 2016
 old 2017
 old 2020
 old 2021
 old 2022
 old 2023
 old 2024
 old 2025
 old 2027
 old 2028
 old 2030
 old 2032
 old 2033
 old 2034
 old 2035
 old 2036
 old 2037
 old 2040
 old 2042
 old 2043
 old 2044
 old 2045
 old 2047
 old 2049
 old 2050
 old 2051
 old 2052
 old 2055
 old 2056
 old 2057
 old 2058
 old 2061
 old 2062
 old 2063
 old 2064
 old 2065
 old 2066
 old 2067
 old 2069
 old 2070
 old 2071
 old 2072
 old 2073
 old 2074
 old 2075
 old 2076
 old 2077
 old 2080
 old 2081
 old 2082
 old 2084
 old 2085
 old 2087
 old 2088
 old 2089
 old 2091
 old 2094
 old 2095
 old 2096
 old 2097
 old 2098
 old 2099
 old 2100
 old 2101
 old 2102
 old 2103
 old 2104
 old 2106
 old 2107
 old 2108
 old 2109
 old 2110
 old 2111
 old 2112
 old 2113
 old 2114
 old 2116
 old 2117
 old 2118
 old 2119
 old 2120
 old 2121
 old 2122
 old 2123
 old 2124


 old 541
 old 542
 old 543
 old 544
 old 545
 old 547
 old 548
 old 549
 old 550
 old 551
 old 553
 old 554
 old 556
 old 557
 old 558
 old 559
 old 560
 old 563
 old 564
 old 565
 old 567
 old 569
 old 570
 old 571
 old 572
 old 573
 old 574
 old 575
 old 576
 old 577
 old 578
 old 579
 old 581
 old 582
 old 583
 old 584
 old 585
 old 586
 old 587
 old 588
 old 589
 old 590
 old 591
 old 592
 old 593
 old 594
 old 596
 old 597
 old 598
 old 599
 old 600
 old 601
 old 602
 old 603
 old 604
 old 605
 old 607
 old 608
 old 610
 old 611
 old 612
 old 613
 old 614
 old 616
 old 617
 old 618
 old 619
 old 620
 old 621
 old 622
 old 623
 old 624
 old 625
 old 626
 old 627
 old 628
 old 630
 old 631
 old 632
 old 633
 old 634
 old 636
 old 638
 old 639
 old 641
 old 642
 old 643
 old 646
 old 647
 old 648
 old 649
 old 650
 old 652
 old 653
 old 654
 old 655
 old 656
 old 657
 old 658
 old 659
 old 660
 old 661
 old 662
 old 663
 old 664
 old 665
 old 666
 old 667
 old 668
 old 669
 old 670
 

 old 1532
 old 1533
 old 1534
 old 1535
 old 1536
 old 1537
 old 1538
 old 1539
 old 1541
 old 1542
 old 1544
 old 1545
 old 1546
 old 1547
 old 1548
 old 1549
 old 1550
 old 1552
 old 1553
 old 1554
 old 1555
 old 1556
 old 1557
 old 1559
 old 1560
 old 1561
 old 1562
 old 1563
 old 1564
 old 1565
 old 1566
 old 1567
 old 1568
 old 1569
 old 1570
 old 1571
 old 1573
 old 1574
 old 1575
 old 1576
 old 1577
 old 1578
 old 1579
 old 1580
 old 1581
 old 1582
 old 1583
 old 1584
 old 1585
 old 1586
 old 1587
 old 1588
 old 1589
 old 1591
 old 1593
 old 1594
 old 1595
 old 1596
 old 1598
 old 1600
 old 1601
 old 1602
 old 1603
 old 1605
 old 1607
 old 1608
 old 1609
 old 1610
 old 1611
 old 1612
 old 1613
 old 1614
 old 1616
 old 1618
 old 1619
 old 1620
 old 1621
 old 1622
 old 1623
 old 1624
 old 1626
 old 1627
 old 1628
 old 1629
 old 1631
 old 1632
 old 1633
 old 1636
 old 1637
 old 1638
 old 1639
 old 1640
 old 1641
 old 1642
 old 1643
 old 1645
 old 1646
 old 1647
 old 1648
 old 1649


 old 2482
 old 2483
 old 0
 old 1
 old 3
 old 5
 old 6
 old 7
 old 8
 old 12
 old 13
 old 14
 old 15
 old 16
 old 17
 old 18
 old 19
 old 20
 old 21
 old 22
 old 23
 old 24
 old 25
 old 26
 old 27
 old 28
 old 29
 old 30
 old 31
 old 32
 old 33
 old 34
 old 35
 old 36
 old 38
 old 39
 old 40
 old 41
 old 42
 old 43
 old 45
 old 46
 old 48
 old 50
 old 51
 old 52
 old 54
 old 55
 old 56
 old 57
 old 58
 old 61
 old 62
 old 63
 old 64
 old 65
 old 66
 old 67
 old 68
 old 69
 old 70
 old 72
 old 73
 old 74
 old 75
 old 76
 old 77
 old 78
 old 79
 old 80
 old 81
 old 82
 old 83
 old 84
 old 85
 old 86
 old 89
 old 90
 old 92
 old 93
 old 95
 old 96
 old 97
 old 98
 old 99
 old 101
 old 102
 old 103
 old 104
 old 105
 old 106
 old 107
 old 108
 old 109
 old 110
 old 111
 old 112
 old 113
 old 114
 old 115
 old 116
 old 117
 old 119
 old 120
 old 121
 old 122
 old 123
 old 124
 old 125
 old 126
 old 127
 old 129
 old 130
 old 131
 old 132
 old 133
 old 134
 old 135
 old 136
 old 137
 old 138

 old 1065
 old 1066
 old 1067
 old 1068
 old 1070
 old 1071
 old 1072
 old 1073
 old 1074
 old 1075
 old 1076
 old 1077
 old 1078
 old 1079
 old 1080
 old 1081
 old 1082
 old 1083
 old 1084
 old 1086
 old 1087
 old 1088
 old 1089
 old 1090
 old 1092
 old 1093
 old 1094
 old 1096
 old 1097
 old 1098
 old 1099
 old 1100
 old 1101
 old 1102
 old 1103
 old 1104
 old 1106
 old 1107
 old 1108
 old 1109
 old 1110
 old 1111
 old 1113
 old 1114
 old 1115
 old 1116
 old 1117
 old 1118
 old 1119
 old 1120
 old 1121
 old 1122
 old 1123
 old 1124
 old 1125
 old 1126
 old 1127
 old 1128
 old 1129
 old 1130
 old 1131
 old 1132
 old 1133
 old 1135
 old 1136
 old 1137
 old 1138
 old 1139
 old 1141
 old 1142
 old 1143
 old 1144
 old 1145
 old 1146
 old 1147
 old 1148
 old 1149
 old 1150
 old 1151
 old 1153
 old 1154
 old 1155
 old 1156
 old 1157
 old 1158
 old 1159
 old 1161
 old 1162
 old 1163
 old 1164
 old 1165
 old 1166
 old 1167
 old 1168
 old 1169
 old 1170
 old 1171
 old 1174
 old 1175
 old 1176


 old 1998
 old 1999
 old 2000
 old 2001
 old 2002
 old 2004
 old 2005
 old 2006
 old 2007
 old 2008
 old 2009
 old 2010
 old 2011
 old 2012
 old 2014
 old 2015
 old 2016
 old 2017
 old 2020
 old 2021
 old 2022
 old 2023
 old 2024
 old 2025
 old 2027
 old 2028
 old 2030
 old 2032
 old 2033
 old 2034
 old 2035
 old 2036
 old 2037
 old 2040
 old 2042
 old 2043
 old 2044
 old 2045
 old 2047
 old 2049
 old 2050
 old 2051
 old 2052
 old 2055
 old 2056
 old 2057
 old 2058
 old 2061
 old 2062
 old 2063
 old 2064
 old 2065
 old 2066
 old 2067
 old 2069
 old 2070
 old 2071
 old 2072
 old 2073
 old 2074
 old 2075
 old 2076
 old 2077
 old 2080
 old 2081
 old 2082
 old 2084
 old 2085
 old 2087
 old 2088
 old 2089
 old 2091
 old 2094
 old 2095
 old 2096
 old 2097
 old 2098
 old 2099
 old 2100
 old 2101
 old 2102
 old 2103
 old 2104
 old 2106
 old 2107
 old 2108
 old 2109
 old 2110
 old 2111
 old 2112
 old 2113
 old 2114
 old 2116
 old 2117
 old 2118
 old 2119
 old 2120
 old 2121
 old 2122
 old 2123


In [16]:
result_lvl_1

Unnamed: 0,user_id,actual,als20,als50,als100,als200,als500
0,1,"[853529, 865456, 867607, 872137, 874905, 87524...","[885290, 1062572, 1028166, 1082185, 898121, 10...","[885290, 1062572, 1028166, 1082185, 898121, 10...","[885290, 1062572, 1028166, 1082185, 898121, 10...","[885290, 1062572, 1028166, 1082185, 898121, 10...","[885290, 1062572, 1028166, 1082185, 898121, 10..."
1,2,"[15830248, 838136, 839656, 861272, 866211, 870...","[1041259, 1082185, 1033142, 916122, 834484, 90...","[1041259, 1082185, 1033142, 916122, 834484, 90...","[1041259, 1082185, 1033142, 916122, 834484, 90...","[1041259, 1082185, 1033142, 916122, 834484, 90...","[1041259, 1082185, 1033142, 916122, 834484, 90..."
2,4,"[883932, 970760, 1035676, 1055863, 1097610, 67...","[902172, 846550, 891423, 883932, 999714, 11194...","[902172, 846550, 891423, 883932, 999714, 11194...","[902172, 846550, 891423, 883932, 999714, 11194...","[902172, 846550, 891423, 883932, 999714, 11194...","[902172, 846550, 891423, 883932, 999714, 11194..."
3,6,"[1024306, 1102949, 6548453, 835394, 940804, 96...","[1082185, 878996, 1024306, 965267, 930118, 863...","[1082185, 878996, 1024306, 965267, 930118, 863...","[1082185, 878996, 1024306, 965267, 930118, 863...","[1082185, 878996, 1024306, 965267, 930118, 863...","[1082185, 878996, 1024306, 965267, 930118, 863..."
4,7,"[836281, 843306, 845294, 914190, 920456, 93886...","[853643, 1029504, 857390, 1123086, 1003188, 10...","[853643, 1029504, 857390, 1123086, 1003188, 10...","[853643, 1029504, 857390, 1123086, 1003188, 10...","[853643, 1029504, 857390, 1123086, 1003188, 10...","[853643, 1029504, 857390, 1123086, 1003188, 10..."
...,...,...,...,...,...,...,...
2149,2496,"[831509, 867188, 1013623, 1048851, 5592734, 16...","[844179, 1020581, 12810393, 1004906, 8065410, ...","[844179, 1020581, 12810393, 1004906, 8065410, ...","[844179, 1020581, 12810393, 1004906, 8065410, ...","[844179, 1020581, 12810393, 1004906, 8065410, ...","[844179, 1020581, 12810393, 1004906, 8065410, ..."
2150,2497,"[820291, 824759, 838797, 859010, 859075, 86077...","[1098066, 826249, 981760, 5569230, 9707498, 89...","[1098066, 826249, 981760, 5569230, 9707498, 89...","[1098066, 826249, 981760, 5569230, 9707498, 89...","[1098066, 826249, 981760, 5569230, 9707498, 89...","[1098066, 826249, 981760, 5569230, 9707498, 89..."
2151,2498,"[865511, 962991, 1076374, 1102358, 5564901, 15...","[1077490, 886787, 892844, 823721, 916122, 7024...","[1077490, 886787, 892844, 823721, 916122, 7024...","[1077490, 886787, 892844, 823721, 916122, 7024...","[1077490, 886787, 892844, 823721, 916122, 7024...","[1077490, 886787, 892844, 823721, 916122, 7024..."
2152,2499,"[861282, 921744, 1050968, 13842089, 828837, 86...","[826249, 883404, 1098066, 1029743, 859075, 913...","[826249, 883404, 1098066, 1029743, 859075, 913...","[826249, 883404, 1098066, 1029743, 859075, 913...","[826249, 883404, 1098066, 1029743, 859075, 913...","[826249, 883404, 1098066, 1029743, 859075, 913..."


In [24]:
%%time

for k in num_candidates:
    column_name = f'als{k}'
    print(f"precision at k = {k} from {k} candidates is {result_lvl_1.apply(lambda row: precision_at_k(row[column_name], row['actual'], k=k), axis=1).mean()}")
    print(f"precision at k = 5 from {k} candidates is {result_lvl_1.apply(lambda row: precision_at_k(row[column_name], row['actual'], k=5), axis=1).mean()}")

precision at k = 20 from 20 candidates is 0.12453574744661096
precision at k = 5 from 20 candidates is 0.17790157845868154
precision at k = 50 from 50 candidates is 0.09171773444753946
precision at k = 5 from 50 candidates is 0.17790157845868154
precision at k = 100 from 100 candidates is 0.07071030640668524
precision at k = 5 from 100 candidates is 0.17790157845868154
precision at k = 200 from 200 candidates is 0.053033890436397405
precision at k = 5 from 200 candidates is 0.17790157845868154
precision at k = 500 from 500 candidates is 0.034356545961002787
precision at k = 5 from 500 candidates is 0.17790157845868154
CPU times: total: 2.55 s
Wall time: 2.56 s


In [26]:
%%time

for k in num_candidates:
    column_name = f'als{k}'
    print(f"recall_at_k = {k} from {k} candidates is {result_lvl_1.apply(lambda row: recall_at_k(row[column_name], row['actual'], k=k), axis=1).mean()}")
    print(f"recall_at_k = 5 from {k} candidates is {result_lvl_1.apply(lambda row: recall_at_k(row[column_name], row['actual'], k=5), axis=1).mean()}")

recall_at_k = 20 from 20 candidates is 0.04446346191756893
recall_at_k = 5 from 20 candidates is 0.017867107189893615
recall_at_k = 50 from 50 candidates is 0.07949832552178829
recall_at_k = 5 from 50 candidates is 0.017867107189893615
recall_at_k = 100 from 100 candidates is 0.11822696135050792
recall_at_k = 5 from 100 candidates is 0.017867107189893615
recall_at_k = 200 from 200 candidates is 0.17142455811772303
recall_at_k = 5 from 200 candidates is 0.017867107189893615
recall_at_k = 500 from 500 candidates is 0.27015727575936027
recall_at_k = 5 from 500 candidates is 0.017867107189893615
CPU times: total: 2.7 s
Wall time: 2.7 s


In [27]:
result_lvl_1.to_csv('../als_test_recommendations.csv')

In [None]:
# your_code

In [None]:
### Финальный проект

Мы уже прошли всю необходимуб теорию для финального проекта. Проект осуществляется на данных из вебинара (данные считаны в начале ДЗ).
Рекомендуем вам **начать делать проект сразу после этого домашнего задания**
- Целевая метрика - precision@5. Порог для уcпешной сдачи проекта precision@5 > 25%
- Будет public тестовый датасет, на котором вы сможете измерять метрику
- Также будет private тестовый датасет для измерения финального качества
- НЕ обязательно, но крайне желательно использовать 2-ух уровневые рекоммендательные системы в проекте
- Вы сдаете код проекта в виде github репозитория и csv файл с рекомендациями 