In [1]:
import pandas as pd

In [283]:
pd.__version__

'1.1.3'

Небольшой интернет-магазин попросил вас добавить ранжирование товаров в блок "Смотрели ранее" - в нем теперь надо показывать не последние просмотренные пользователем товары, а те товары из просмотренных, которые он наиболее вероятно купит. Качество вашего решения будет оцениваться по количеству покупок в сравнении с прошлым решением в ходе А/В теста, т.к. по доходу от продаж статзначимость будет достигаться дольше из-за разброса цен. Таким образом, ничего заранее не зная про корреляцию оффлайновых и онлайновых метрик качества, в начале проекта вы можете лишь постараться оптимизировать recall@k и precision@k.

Это задание посвящено построению простых бейзлайнов для этой задачи: ранжирование просмотренных товаров по частоте просмотров и по частоте покупок. Эти бейзлайны, с одной стороны, могут помочь вам грубо оценить возможный эффект от ранжирования товаров в блоке - например, чтобы вписать какие-то числа в коммерческое предложение заказчику, а с другой стороны, могут оказаться самым хорошим вариантом, если данных очень мало (недостаточно для обучения даже простых моделей).

# Входные данные

Вам дается две выборки с пользовательскими сессиями - id-шниками просмотренных и id-шниками купленных товаров. Одна выборка будет использоваться для обучения (оценки популярностей товаров), а другая - для теста.

В файлах записаны сессии по одной в каждой строке. Формат сессии: id просмотренных товаров через , затем идёт ; после чего следуют id купленных товаров (если такие имеются), разделённые запятой. Например, 1,2,3,4; или 1,2,3,4;5,6.

Гарантируется, что среди id купленных товаров все различные.

Важно:

- Сессии, в которых пользователь ничего не купил, исключаем из оценки качества.
- Если товар не встречался в обучающей выборке, его популярность равна 0.
- Рекомендуем разные товары. И их число должно быть не больше, чем количество различных просмотренных пользователем товаров.
- Рекомендаций всегда не больше, чем минимум из двух чисел: количество просмотренных пользователем товаров и k в recall@k / precision@k.

In [641]:
raw_train = pd.read_table('coursera_sessions_train.txt', names=['view_id', 'buy_id'], delimiter=';')
raw_train

Unnamed: 0,view_id,buy_id
0,012345,
1,9101191112911,
2,161718192021,
3,2425262724,
4,343536343735363738393839,
...,...,...
49995,32291605203229138220,32291
49996,6047928288102804102805,
49997,98020025980200259802002598020025,
49998,"8844,42500,8838,8172,29237,352,8847,6681,8827,...",


In [642]:
raw_test = pd.read_table('coursera_sessions_test.txt', names=['view_id', 'buy_id'], delimiter=';')
raw_test

Unnamed: 0,view_id,buy_id
0,678,
1,131415,
2,2223,
3,282930313233,
4,4041,
...,...,...
49995,376371836318131,
49996,"35402,10150,35402,10150,35402,88962,49526,3540...",
49997,16113451611345161134516130853161,
49998,262361657226236,


In [643]:
data_train = raw_train.dropna(axis=0)
data_train

Unnamed: 0,view_id,buy_id
7,59606162606364656661676867,676063
10,848586878889849091929386,86
19,138198199127,199
30,303304305306307308309310311312,303
33,352353352,352
...,...,...
49943,"41795,4337,4335,4337,4335,24087,4335,41795,240...",4335
49964,6366152696366589563665895,5895
49981,64552259312807,259352807
49991,919212025150632174250632025134927,91921


In [644]:
data_test = raw_test.dropna(axis=0)
data_test

Unnamed: 0,view_id,buy_id
7,63686970666159616668,6663
14,158159160159161162,162
19,200201202203204,201205
34,371372371,371373
40,422,422
...,...,...
49943,"2859,2854,88887,96997,4439,28645,99975,34601,1...",24907102691184964333
49944,"77655,23249,1306,47450,26157,58205,47450,58205...",58205311169482
49945,605384443066252444306053866251,6625244430
49946,4981576363,49815


# Подсчёт популярности товаров

Превращаем строки в списки int'ов

In [645]:
view_train = raw_train.view_id.apply(lambda x: list(map(int, x.split(','))))
view_train

0                                       [0, 1, 2, 3, 4, 5]
1                            [9, 10, 11, 9, 11, 12, 9, 11]
2                                 [16, 17, 18, 19, 20, 21]
3                                     [24, 25, 26, 27, 24]
4         [34, 35, 36, 34, 37, 35, 36, 37, 38, 39, 38, 39]
                               ...                        
49995                         [32291, 60520, 32291, 38220]
49996                       [60479, 28288, 102804, 102805]
49997     [980, 20025, 980, 20025, 980, 20025, 980, 20025]
49998    [8844, 42500, 8838, 8172, 29237, 352, 8847, 66...
49999                               [39047, 102806, 27774]
Name: view_id, Length: 50000, dtype: object

In [646]:
buy_train = data_train.buy_id.apply(lambda x: list(map(int, x.split(','))))
buy_train

7         [67, 60, 63]
10                [86]
19               [199]
30               [303]
33               [352]
             ...      
49943           [4335]
49964           [5895]
49981    [25935, 2807]
49991          [91921]
49995          [32291]
Name: buy_id, Length: 3608, dtype: object

In [647]:
view_test = raw_test.view_id.apply(lambda x: list(map(int, x.split(','))))
view_test

0                                                [6, 7, 8]
1                                             [13, 14, 15]
2                                                 [22, 23]
3                                 [28, 29, 30, 31, 32, 33]
4                                                 [40, 41]
                               ...                        
49995                                [37637, 18363, 18131]
49996    [35402, 10150, 35402, 10150, 35402, 88962, 495...
49997    [161, 1345, 161, 1345, 161, 1345, 161, 30853, ...
49998                                [26236, 16572, 26236]
49999                      [80626, 102807, 102808, 102809]
Name: view_id, Length: 50000, dtype: object

In [648]:
buy_test = data_test.buy_id.apply(lambda x: list(map(int, x.split(','))))
buy_test

7                            [66, 63]
14                              [162]
19                         [201, 205]
34                         [371, 373]
40                              [422]
                     ...             
49943    [24907, 102691, 18496, 4333]
49944            [58205, 3111, 69482]
49945                  [66252, 44430]
49946                         [49815]
49985                         [21841]
Name: buy_id, Length: 3665, dtype: object

Разворачиваем списки

In [649]:
view_id_train = view_train.explode()
view_id_train

0             0
0             1
0             2
0             3
0             4
          ...  
49998      8827
49998      8844
49999     39047
49999    102806
49999     27774
Name: view_id, Length: 356177, dtype: object

In [651]:
buy_id_train = buy_train.explode()
buy_id_train

7           67
7           60
7           63
10          86
19         199
         ...  
49964     5895
49981    25935
49981     2807
49991    91921
49995    32291
Name: buy_id, Length: 5374, dtype: object

Считаем количество просмотров товаров по всем сессиям

In [652]:
most_viewed_train = view_id_train.value_counts()
most_viewed_train

73        677
158       641
204       396
262       387
162       318
         ... 
34464       1
73826       1
73827       1
73828       1
102806      1
Name: view_id, Length: 77064, dtype: int64

Считаем количество покупок товаров по всем сессиям

In [653]:
most_bought_train = buy_id_train.value_counts()
most_bought_train

158      14
204      12
73       11
3324     11
977      10
         ..
62509     1
5106      1
5105      1
21485     1
49152     1
Name: buy_id, Length: 4479, dtype: int64

In [654]:
most_bought_train.value_counts()

1     3895
2      431
3       87
4       33
5       17
6        5
10       3
11       2
7        2
14       1
9        1
12       1
8        1
Name: buy_id, dtype: int64

# Рекомендации

Формируем рекомендации:
- сортировка по справочнику
- сортировка по порядку просмотра товаров
- убираем повторы

In [655]:
def recommend_by(by, vocab):
    df = pd.DataFrame(vocab.reindex(by))
    df['id'] = range(len(by))
    df = df.sort_values(by=[df.columns[0], df.columns[1]], ascending=[False, True])
    return list(df.index.unique())

## views

### train

Рекомендации по просмотрам на обучающей выборке

In [656]:
%%time
recommendations_by_views_train = view_train[data_train.index].apply(recommend_by, args=(most_viewed_train,))
recommendations_by_views_train

Wall time: 5.73 s


7                 [63, 64, 60, 61, 65, 66, 67, 68, 59, 62]
10                [85, 93, 89, 90, 84, 92, 86, 87, 91, 88]
19                                    [127, 138, 198, 199]
30       [303, 306, 304, 307, 309, 310, 305, 308, 311, ...
33                                              [352, 353]
                               ...                        
49943    [4335, 4337, 24073, 41795, 57885, 24087, 4344,...
49964                                  [5895, 6366, 15269]
49981                                 [2807, 25931, 64552]
49991                   [5063, 21742, 20251, 91921, 34927]
49995                                [32291, 38220, 60520]
Name: view_id, Length: 3608, dtype: object

In [657]:
recommendations_by_views_train.rename('recommendations_id', inplace=True)

7                 [63, 64, 60, 61, 65, 66, 67, 68, 59, 62]
10                [85, 93, 89, 90, 84, 92, 86, 87, 91, 88]
19                                    [127, 138, 198, 199]
30       [303, 306, 304, 307, 309, 310, 305, 308, 311, ...
33                                              [352, 353]
                               ...                        
49943    [4335, 4337, 24073, 41795, 57885, 24087, 4344,...
49964                                  [5895, 6366, 15269]
49981                                 [2807, 25931, 64552]
49991                   [5063, 21742, 20251, 91921, 34927]
49995                                [32291, 38220, 60520]
Name: recommendations_id, Length: 3608, dtype: object

### test

In [658]:
%%time
recommendations_by_views_test = view_test[data_test.index].apply(recommend_by, args=(most_viewed_train,))
recommendations_by_views_test

Wall time: 5.87 s


7                             [63, 68, 66, 61, 59, 69, 70]
14                               [158, 162, 160, 159, 161]
19                               [204, 202, 203, 200, 201]
34                                              [371, 372]
40                                                   [422]
                               ...                        
49943    [4335, 4333, 24907, 606, 2854, 4439, 96997, 34...
49944    [1262, 16237, 7318, 1261, 1306, 3111, 26157, 4...
49945                         [60538, 44430, 66252, 66251]
49946                                       [49815, 76363]
49985                                [17711, 21841, 22562]
Name: view_id, Length: 3665, dtype: object

In [659]:
recommendations_by_views_test.rename('recommendations_id', inplace=True)

7                             [63, 68, 66, 61, 59, 69, 70]
14                               [158, 162, 160, 159, 161]
19                               [204, 202, 203, 200, 201]
34                                              [371, 372]
40                                                   [422]
                               ...                        
49943    [4335, 4333, 24907, 606, 2854, 4439, 96997, 34...
49944    [1262, 16237, 7318, 1261, 1306, 3111, 26157, 4...
49945                         [60538, 44430, 66252, 66251]
49946                                       [49815, 76363]
49985                                [17711, 21841, 22562]
Name: recommendations_id, Length: 3665, dtype: object

## purchases

### train

In [660]:
%%time
recommendations_by_purchases_train = view_train[data_train.index].apply(recommend_by, args=(most_bought_train,))
recommendations_by_purchases_train

Wall time: 5.87 s


7                 [60, 63, 67, 59, 61, 62, 64, 65, 66, 68]
10                [86, 85, 93, 84, 87, 88, 89, 90, 91, 92]
19                                    [138, 199, 127, 198]
30       [303, 304, 305, 306, 307, 308, 309, 310, 311, ...
33                                              [352, 353]
                               ...                        
49943    [4335, 41795, 4337, 24087, 24073, 4344, 57885,...
49964                                  [5895, 6366, 15269]
49981                                 [2807, 64552, 25931]
49991                   [91921, 5063, 20251, 21742, 34927]
49995                                [32291, 60520, 38220]
Name: view_id, Length: 3608, dtype: object

In [661]:
recommendations_by_purchases_train.rename('recommendations_id', inplace=True)

7                 [60, 63, 67, 59, 61, 62, 64, 65, 66, 68]
10                [86, 85, 93, 84, 87, 88, 89, 90, 91, 92]
19                                    [138, 199, 127, 198]
30       [303, 304, 305, 306, 307, 308, 309, 310, 311, ...
33                                              [352, 353]
                               ...                        
49943    [4335, 41795, 4337, 24087, 24073, 4344, 57885,...
49964                                  [5895, 6366, 15269]
49981                                 [2807, 64552, 25931]
49991                   [91921, 5063, 20251, 21742, 34927]
49995                                [32291, 60520, 38220]
Name: recommendations_id, Length: 3608, dtype: object

### test

In [662]:
%%time
recommendations_by_purchases_test = view_test[data_test.index].apply(recommend_by, args=(most_bought_train,))
recommendations_by_purchases_test

Wall time: 5.85 s


7                             [63, 68, 69, 70, 66, 61, 59]
14                               [158, 162, 160, 159, 161]
19                               [204, 202, 200, 201, 203]
34                                              [371, 372]
40                                                   [422]
                               ...                        
49943    [4335, 4333, 606, 2859, 2854, 88887, 96997, 44...
49944    [1262, 1306, 13520, 77655, 23249, 47450, 26157...
49945                         [60538, 44430, 66252, 66251]
49946                                       [49815, 76363]
49985                                [21841, 17711, 22562]
Name: view_id, Length: 3665, dtype: object

In [663]:
recommendations_by_purchases_test.rename('recommendations_id', inplace=True)

7                             [63, 68, 69, 70, 66, 61, 59]
14                               [158, 162, 160, 159, 161]
19                               [204, 202, 200, 201, 203]
34                                              [371, 372]
40                                                   [422]
                               ...                        
49943    [4335, 4333, 606, 2859, 2854, 88887, 96997, 44...
49944    [1262, 1306, 13520, 77655, 23249, 47450, 26157...
49945                         [60538, 44430, 66252, 66251]
49946                                       [49815, 76363]
49985                                [21841, 17711, 22562]
Name: recommendations_id, Length: 3665, dtype: object

# Метрики

In [664]:
metrics = pd.DataFrame(columns=['views_train', 'views_test', 'purch_train', 'purch_test'], 
                       index=['average_recall_1', 'average_precision_1', 'average_recall_5', 'average_precision_5'])
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,,,,
average_precision_1,,,,
average_recall_5,,,,
average_precision_5,,,,


## views

### train

In [665]:
recommendations_by_views_train = pd.DataFrame(recommendations_by_views_train)

In [666]:
recommendations_by_views_train['buy_id'] = buy_train
recommendations_by_views_train

Unnamed: 0,recommendations_id,buy_id
7,"[63, 64, 60, 61, 65, 66, 67, 68, 59, 62]","[67, 60, 63]"
10,"[85, 93, 89, 90, 84, 92, 86, 87, 91, 88]",[86]
19,"[127, 138, 198, 199]",[199]
30,"[303, 306, 304, 307, 309, 310, 305, 308, 311, ...",[303]
33,"[352, 353]",[352]
...,...,...
49943,"[4335, 4337, 24073, 41795, 57885, 24087, 4344,...",[4335]
49964,"[5895, 6366, 15269]",[5895]
49981,"[2807, 25931, 64552]","[25935, 2807]"
49991,"[5063, 21742, 20251, 91921, 34927]",[91921]


#### AverageRecall@1

In [667]:
def average_recall(recommendations, purchases, at):
    return len(set(recommendations[:at]) & set(purchases)) / len(purchases)

In [668]:
average_recall_at_1_views_train = recommendations_by_views_train.apply(lambda row: average_recall(row[0], row[1], at=1), axis=1)
average_recall_at_1_views_train

7        0.333333
10       0.000000
19       0.000000
30       1.000000
33       1.000000
           ...   
49943    1.000000
49964    1.000000
49981    0.500000
49991    0.000000
49995    1.000000
Length: 3608, dtype: float64

In [669]:
metrics.views_train.average_recall_1 = average_recall_at_1_views_train.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,,,
average_precision_1,,,,
average_recall_5,,,,
average_precision_5,,,,


#### AveragePrecision@1

In [670]:
def average_precision(recommendations, purchases, at):
    return len(set(recommendations[:at]) & set(purchases)) / at

In [671]:
average_precision_at_1_views_train = recommendations_by_views_train.apply(lambda row: average_precision(row[0], row[1], at=1), 
                                                                     axis=1)
average_precision_at_1_views_train

7        1.0
10       0.0
19       0.0
30       1.0
33       1.0
        ... 
49943    1.0
49964    1.0
49981    1.0
49991    0.0
49995    1.0
Length: 3608, dtype: float64

In [672]:
metrics.views_train.average_precision_1 = average_precision_at_1_views_train.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,,,
average_precision_1,0.512195,,,
average_recall_5,,,,
average_precision_5,,,,


#### AverageRecall@5

In [673]:
average_recall_at_5_views_train = recommendations_by_views_train.apply(lambda row: average_recall(row[0], row[1], at=5), axis=1)
average_recall_at_5_views_train

7        0.666667
10       0.000000
19       1.000000
30       1.000000
33       1.000000
           ...   
49943    1.000000
49964    1.000000
49981    0.500000
49991    1.000000
49995    1.000000
Length: 3608, dtype: float64

In [674]:
metrics.views_train.average_recall_5 = average_recall_at_5_views_train.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,,,
average_precision_1,0.512195,,,
average_recall_5,0.824692,,,
average_precision_5,,,,


#### AveragePrecision@5

In [675]:
average_precision_at_5_views_train = recommendations_by_views_train.apply(lambda row: average_precision(row[0], row[1], at=5), 
                                                                     axis=1)
average_precision_at_5_views_train

7        0.4
10       0.0
19       0.2
30       0.2
33       0.2
        ... 
49943    0.2
49964    0.2
49981    0.2
49991    0.2
49995    0.2
Length: 3608, dtype: float64

In [676]:
metrics.views_train.average_precision_5 = average_precision_at_5_views_train.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,,,
average_precision_1,0.512195,,,
average_recall_5,0.824692,,,
average_precision_5,0.212528,,,


### test

In [677]:
recommendations_by_views_test = pd.DataFrame(recommendations_by_views_test)

In [678]:
recommendations_by_views_test['buy_id'] = buy_test
recommendations_by_views_test

Unnamed: 0,recommendations_id,buy_id
7,"[63, 68, 66, 61, 59, 69, 70]","[66, 63]"
14,"[158, 162, 160, 159, 161]",[162]
19,"[204, 202, 203, 200, 201]","[201, 205]"
34,"[371, 372]","[371, 373]"
40,[422],[422]
...,...,...
49943,"[4335, 4333, 24907, 606, 2854, 4439, 96997, 34...","[24907, 102691, 18496, 4333]"
49944,"[1262, 16237, 7318, 1261, 1306, 3111, 26157, 4...","[58205, 3111, 69482]"
49945,"[60538, 44430, 66252, 66251]","[66252, 44430]"
49946,"[49815, 76363]",[49815]


#### AverageRecall@1

In [679]:
average_recall_at_1_views_test = recommendations_by_views_test.apply(lambda row: average_recall(row[0], row[1], at=1), axis=1)
average_recall_at_1_views_test

7        0.5
14       0.0
19       0.0
34       0.5
40       1.0
        ... 
49943    0.0
49944    0.0
49945    0.0
49946    1.0
49985    0.0
Length: 3665, dtype: float64

In [680]:
metrics.views_test.average_recall_1 = average_recall_at_1_views_test.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,,
average_precision_1,0.512195,,,
average_recall_5,0.824692,,,
average_precision_5,0.212528,,,


#### AveragePrecision@1

In [681]:
average_precision_at_1_views_test = recommendations_by_views_test.apply(lambda row: average_precision(row[0], row[1], at=1), 
                                                                        axis=1)
average_precision_at_1_views_test

7        1.0
14       0.0
19       0.0
34       1.0
40       1.0
        ... 
49943    0.0
49944    0.0
49945    0.0
49946    1.0
49985    0.0
Length: 3665, dtype: float64

In [682]:
metrics.views_test.average_precision_1 = average_precision_at_1_views_test.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,,
average_precision_1,0.512195,0.48131,,
average_recall_5,0.824692,,,
average_precision_5,0.212528,,,


#### AverageRecall@5

In [683]:
average_recall_at_5_views_test = recommendations_by_views_test.apply(lambda row: average_recall(row[0], row[1], at=5), axis=1)
average_recall_at_5_views_test

7        1.0
14       1.0
19       0.5
34       0.5
40       1.0
        ... 
49943    0.5
49944    0.0
49945    1.0
49946    1.0
49985    1.0
Length: 3665, dtype: float64

In [684]:
metrics.views_test.average_recall_5 = average_recall_at_5_views_test.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,,
average_precision_1,0.512195,0.48131,,
average_recall_5,0.824692,0.800034,,
average_precision_5,0.212528,,,


#### AveragePrecision@5

In [685]:
average_precision_at_5_views_test = recommendations_by_views_test.apply(lambda row: average_precision(row[0], row[1], at=5), 
                                                                        axis=1)
average_precision_at_5_views_test

7        0.4
14       0.2
19       0.2
34       0.2
40       0.2
        ... 
49943    0.4
49944    0.0
49945    0.4
49946    0.2
49985    0.2
Length: 3665, dtype: float64

In [686]:
metrics.views_test.average_precision_5 = average_precision_at_5_views_test.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,,
average_precision_1,0.512195,0.48131,,
average_recall_5,0.824692,0.800034,,
average_precision_5,0.212528,0.203765,,


## purchases

### train

In [687]:
recommendations_by_purchases_train = pd.DataFrame(recommendations_by_purchases_train)

In [688]:
recommendations_by_purchases_train['buy_id'] = buy_train
recommendations_by_purchases_train

Unnamed: 0,recommendations_id,buy_id
7,"[60, 63, 67, 59, 61, 62, 64, 65, 66, 68]","[67, 60, 63]"
10,"[86, 85, 93, 84, 87, 88, 89, 90, 91, 92]",[86]
19,"[138, 199, 127, 198]",[199]
30,"[303, 304, 305, 306, 307, 308, 309, 310, 311, ...",[303]
33,"[352, 353]",[352]
...,...,...
49943,"[4335, 41795, 4337, 24087, 24073, 4344, 57885,...",[4335]
49964,"[5895, 6366, 15269]",[5895]
49981,"[2807, 64552, 25931]","[25935, 2807]"
49991,"[91921, 5063, 20251, 21742, 34927]",[91921]


#### AverageRecall@1

In [689]:
average_recall_at_1_purchases_train = recommendations_by_purchases_train.apply(lambda row: average_recall(row[0], row[1], 
                                                                                                          at=1), axis=1)
average_recall_at_1_purchases_train

7        0.333333
10       1.000000
19       0.000000
30       1.000000
33       1.000000
           ...   
49943    1.000000
49964    1.000000
49981    0.500000
49991    1.000000
49995    1.000000
Length: 3608, dtype: float64

In [690]:
metrics.purch_train.average_recall_1 = average_recall_at_1_purchases_train.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,0.688449,
average_precision_1,0.512195,0.48131,,
average_recall_5,0.824692,0.800034,,
average_precision_5,0.212528,0.203765,,


#### AveragePrecision@1

In [691]:
average_precision_at_1_purchases_train = recommendations_by_purchases_train.apply(lambda row: 
                                                                                 average_precision(row[0], row[1], 
                                                                                                            at=1), axis=1)
average_precision_at_1_purchases_train

7        1.0
10       1.0
19       0.0
30       1.0
33       1.0
        ... 
49943    1.0
49964    1.0
49981    1.0
49991    1.0
49995    1.0
Length: 3608, dtype: float64

In [692]:
metrics.purch_train.average_precision_1 = average_precision_at_1_purchases_train.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,0.688449,
average_precision_1,0.512195,0.48131,0.803769,
average_recall_5,0.824692,0.800034,,
average_precision_5,0.212528,0.203765,,


#### AverageRecall@5

In [693]:
average_recall_at_5_purchases_train = recommendations_by_purchases_train.apply(lambda row: average_recall(row[0], row[1], 
                                                                                                          at=5), axis=1)
average_recall_at_5_purchases_train

7        1.0
10       1.0
19       1.0
30       1.0
33       1.0
        ... 
49943    1.0
49964    1.0
49981    0.5
49991    1.0
49995    1.0
Length: 3608, dtype: float64

In [694]:
metrics.purch_train.average_recall_5 = average_recall_at_5_purchases_train.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,0.688449,
average_precision_1,0.512195,0.48131,0.803769,
average_recall_5,0.824692,0.800034,0.926307,
average_precision_5,0.212528,0.203765,,


#### AveragePrecision@5

In [695]:
average_precision_at_5_purchases_train = recommendations_by_purchases_train.apply(lambda row: 
                                                                                 average_precision(row[0], row[1], 
                                                                                                            at=5), axis=1)
average_precision_at_5_purchases_train

7        0.6
10       0.2
19       0.2
30       0.2
33       0.2
        ... 
49943    0.2
49964    0.2
49981    0.2
49991    0.2
49995    0.2
Length: 3608, dtype: float64

In [696]:
metrics.purch_train.average_precision_5 = average_precision_at_5_purchases_train.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,0.688449,
average_precision_1,0.512195,0.48131,0.803769,
average_recall_5,0.824692,0.800034,0.926307,
average_precision_5,0.212528,0.203765,0.25255,


### test

In [697]:
recommendations_by_purchases_test = pd.DataFrame(recommendations_by_purchases_test)

In [698]:
recommendations_by_purchases_test['buy_id'] = buy_test
recommendations_by_purchases_test

Unnamed: 0,recommendations_id,buy_id
7,"[63, 68, 69, 70, 66, 61, 59]","[66, 63]"
14,"[158, 162, 160, 159, 161]",[162]
19,"[204, 202, 200, 201, 203]","[201, 205]"
34,"[371, 372]","[371, 373]"
40,[422],[422]
...,...,...
49943,"[4335, 4333, 606, 2859, 2854, 88887, 96997, 44...","[24907, 102691, 18496, 4333]"
49944,"[1262, 1306, 13520, 77655, 23249, 47450, 26157...","[58205, 3111, 69482]"
49945,"[60538, 44430, 66252, 66251]","[66252, 44430]"
49946,"[49815, 76363]",[49815]


#### AverageRecall@1

In [699]:
average_recall_at_1_purchases_test = recommendations_by_purchases_test.apply(lambda row: average_recall(row[0], row[1], 
                                                                                                          at=1), axis=1)
average_recall_at_1_purchases_test

7        0.5
14       0.0
19       0.0
34       0.5
40       1.0
        ... 
49943    0.0
49944    0.0
49945    0.0
49946    1.0
49985    1.0
Length: 3665, dtype: float64

In [700]:
metrics.purch_test.average_recall_1 = average_recall_at_1_purchases_test.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,0.688449,0.46062
average_precision_1,0.512195,0.48131,0.803769,
average_recall_5,0.824692,0.800034,0.926307,
average_precision_5,0.212528,0.203765,0.25255,


#### AveragePrecision@1

In [701]:
average_precision_at_1_purchases_test = recommendations_by_purchases_test.apply(lambda row: 
                                                                                 average_precision(row[0], row[1], 
                                                                                                            at=1), axis=1)
average_precision_at_1_purchases_test

7        1.0
14       0.0
19       0.0
34       1.0
40       1.0
        ... 
49943    0.0
49944    0.0
49945    0.0
49946    1.0
49985    1.0
Length: 3665, dtype: float64

In [702]:
metrics.purch_test.average_precision_1 = average_precision_at_1_purchases_test.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,0.688449,0.46062
average_precision_1,0.512195,0.48131,0.803769,0.527694
average_recall_5,0.824692,0.800034,0.926307,
average_precision_5,0.212528,0.203765,0.25255,


#### AverageRecall@5

In [703]:
average_recall_at_5_purchases_test = recommendations_by_purchases_test.apply(lambda row: average_recall(row[0], row[1], 
                                                                                                          at=5), axis=1)
average_recall_at_5_purchases_test

7        1.00
14       1.00
19       0.50
34       0.50
40       1.00
         ... 
49943    0.25
49944    0.00
49945    1.00
49946    1.00
49985    1.00
Length: 3665, dtype: float64

In [704]:
metrics.purch_test.average_recall_5 = average_recall_at_5_purchases_test.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,0.688449,0.46062
average_precision_1,0.512195,0.48131,0.803769,0.527694
average_recall_5,0.824692,0.800034,0.926307,0.820187
average_precision_5,0.212528,0.203765,0.25255,


#### AveragePrecision@5

In [705]:
average_precision_at_5_purchases_test = recommendations_by_purchases_test.apply(lambda row: 
                                                                                 average_precision(row[0], row[1], 
                                                                                                            at=5), axis=1)
average_precision_at_5_purchases_test

7        0.4
14       0.2
19       0.2
34       0.2
40       0.2
        ... 
49943    0.2
49944    0.0
49945    0.4
49946    0.2
49985    0.2
Length: 3665, dtype: float64

In [706]:
metrics.purch_test.average_precision_5 = average_precision_at_5_purchases_test.mean()
metrics

Unnamed: 0,views_train,views_test,purch_train,purch_test
average_recall_1,0.442634,0.417333,0.688449,0.46062
average_precision_1,0.512195,0.48131,0.803769,0.527694
average_recall_5,0.824692,0.800034,0.926307,0.820187
average_precision_5,0.212528,0.203765,0.25255,0.210095
