In [6]:
import pandas as pd
from hnmchallenge.data_reader import DataReader
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from hnmchallenge.dataset import Dataset
from hnmchallenge.filtered_dataset import FilterdDataset
from hnmchallenge.stratified_dataset import StratifiedDataset
from hnmchallenge.models.top_pop import TopPop
from hnmchallenge.evaluation.python_evaluation import map_at_k, recall_at_k
from hnmchallenge.constant import *
from hnmchallenge.models.sgmc.sgmc import SGMC
from hnmchallenge.models.ease.ease import EASE
from hnmchallenge.models.itemknn.itemknn import ItemKNN 

In [7]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)

In [8]:
dataset = StratifiedDataset()
dr = DataReader()

In [9]:
# creating H&M 
holdout = dataset.get_holdout()

In [10]:
item_per_user = holdout.groupby(DEFAULT_USER_COL)[DEFAULT_ITEM_COL].apply(list)

In [11]:
item_per_user_df = item_per_user.to_frame()

In [12]:
item_per_user_df

Unnamed: 0_level_0,article_id
customer_id,Unnamed: 1_level_1
0,"[1652, 7053, 11572]"
1,"[3161, 8254, 16695, 13392, 2427]"
2,"[8443, 3023, 7068, 8089, 3215, 7193]"
3,"[632, 3]"
4,[4]
...,...
1136201,[17418]
1136202,[20154]
1136203,[4770]
1136204,[17478]


In [16]:
recs = pd.read_feather(dr.get_preprocessed_data_path()/"cosine_recs_100.feather")

In [17]:
recs

Unnamed: 0,customer_id,recs
0,0,"[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ..."
1,1,"[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782..."
2,2,"[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309..."
3,3,"[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6..."
4,5,"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725..."
...,...,...
962725,1136200,"[20583, 17333, 3328, 18900, 16949, 21549, 2049..."
962726,1136201,"[17418, 20780, 14892, 15765, 17869, 17715, 128..."
962727,1136203,"[19923, 20246, 20091, 19496, 16864, 21001, 199..."
962728,1136204,"[17478, 20806, 20946, 18373, 18249, 17488, 215..."


In [18]:
final_df = pd.merge(item_per_user_df.reset_index(), recs, on=DEFAULT_USER_COL)

In [21]:
final_df

Unnamed: 0,customer_id,article_id,recs
0,0,"[1652, 7053, 11572]","[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ..."
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782..."
2,2,"[8443, 3023, 7068, 8089, 3215, 7193]","[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309..."
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6..."
4,5,[991],"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725..."
...,...,...,...
962725,1136200,[22036],"[20583, 17333, 3328, 18900, 16949, 21549, 2049..."
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128..."
962727,1136203,[4770],"[19923, 20246, 20091, 19496, 16864, 21001, 199..."
962728,1136204,[17478],"[17478, 20806, 20946, 18373, 18249, 17488, 215..."


In [22]:
final_df["hit_all"] = final_df.apply(lambda x: len(np.intersect1d(x.article_id, x.recs))/ len(x.article_id), axis=1)

In [23]:
final_df

Unnamed: 0,customer_id,article_id,recs,hit_all
0,0,"[1652, 7053, 11572]","[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ...",0.0
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782...",0.2
2,2,"[8443, 3023, 7068, 8089, 3215, 7193]","[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309...",0.0
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6...",0.5
4,5,[991],"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725...",0.0
...,...,...,...,...
962725,1136200,[22036],"[20583, 17333, 3328, 18900, 16949, 21549, 2049...",0.0
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128...",1.0
962727,1136203,[4770],"[19923, 20246, 20091, 19496, 16864, 21001, 199...",0.0
962728,1136204,[17478],"[17478, 20806, 20946, 18373, 18249, 17488, 215...",1.0


In [25]:
print("Recall@100")
final_df["hit_all"].mean()

Recall@100


0.30180611594459295

In [26]:
final_df[final_df["hit_all"]>0]

Unnamed: 0,customer_id,article_id,recs,hit_all
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782...",0.200000
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6...",0.500000
5,6,"[1891, 12146, 8234, 4443, 1655, 12235]","[4133, 2204, 8796, 451, 2012, 3360, 7317, 7177...",0.166667
6,7,"[391, 391, 7312, 7312, 10256, 7995, 8823, 1013...","[7116, 7117, 73, 6761, 1482, 7597, 2352, 401, ...",0.086957
8,9,[79],"[11, 10, 19, 2041, 228, 261, 2055, 7270, 9702,...",1.000000
...,...,...,...,...
962717,1136190,"[19321, 9294]","[7954, 8835, 16601, 8285, 8923, 8532, 3468, 69...",0.500000
962719,1136192,[18777],"[6241, 6431, 6491, 6680, 7289, 3747, 4214, 743...",1.000000
962721,1136194,"[14732, 81]","[21495, 18303, 19410, 15523, 15679, 20865, 207...",0.500000
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128...",1.000000


In [29]:
final_df["recs_hit"] = final_df.apply(lambda x: np.intersect1d(x.article_id, x.recs), axis=1)

In [30]:
final_df

Unnamed: 0,customer_id,article_id,recs,hit_all,recs_hit
0,0,"[1652, 7053, 11572]","[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ...",0.0,[]
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782...",0.2,[3161]
2,2,"[8443, 3023, 7068, 8089, 3215, 7193]","[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309...",0.0,[]
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6...",0.5,[3]
4,5,[991],"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725...",0.0,[]
...,...,...,...,...,...
962725,1136200,[22036],"[20583, 17333, 3328, 18900, 16949, 21549, 2049...",0.0,[]
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128...",1.0,[17418]
962727,1136203,[4770],"[19923, 20246, 20091, 19496, 16864, 21001, 199...",0.0,[]
962728,1136204,[17478],"[17478, 20806, 20946, 18373, 18249, 17488, 215...",1.0,[17478]


In [31]:
fd = dr.get_filtered_full_data()

In [37]:
all_article_id = fd.groupby(DEFAULT_USER_COL)[DEFAULT_ITEM_COL].apply(list).rename("all_items")

In [38]:
all_article_id

customer_id
0          [0, 0, 0, 644, 1952, 1952, 1482, 1638, 1797, 4...
1          [1, 482, 759, 2006, 339, 339, 6018, 7534, 7534...
2          [2, 326, 2055, 2238, 4151, 309, 5786, 5174, 54...
3                              [3, 3, 3, 2596, 3936, 632, 3]
4                                                        [4]
                                 ...                        
1136201                                       [17418, 17418]
1136202                                              [20154]
1136203                                 [19923, 20246, 4770]
1136204                                       [17478, 17478]
1136205               [10033, 7580, 1765, 838, 12955, 15003]
Name: all_items, Length: 1136206, dtype: object

In [39]:
final_df = pd.merge(final_df, all_article_id, on=DEFAULT_USER_COL)

In [45]:
final_df.drop("article_id_y", inplace=True, axis=1)

In [46]:
final_df

Unnamed: 0,customer_id,article_id,recs,hit_all,recs_hit,all_items
0,0,"[1652, 7053, 11572]","[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ...",0.0,[],"[0, 0, 0, 644, 1952, 1952, 1482, 1638, 1797, 4..."
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782...",0.2,[3161],"[1, 482, 759, 2006, 339, 339, 6018, 7534, 7534..."
2,2,"[8443, 3023, 7068, 8089, 3215, 7193]","[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309...",0.0,[],"[2, 326, 2055, 2238, 4151, 309, 5786, 5174, 54..."
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6...",0.5,[3],"[3, 3, 3, 2596, 3936, 632, 3]"
4,5,[991],"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725...",0.0,[],"[5, 991]"
...,...,...,...,...,...,...
962725,1136200,[22036],"[20583, 17333, 3328, 18900, 16949, 21549, 2049...",0.0,[],"[3328, 17333, 20583, 22036]"
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128...",1.0,[17418],"[17418, 17418]"
962727,1136203,[4770],"[19923, 20246, 20091, 19496, 16864, 21001, 199...",0.0,[],"[19923, 20246, 4770]"
962728,1136204,[17478],"[17478, 20806, 20946, 18373, 18249, 17488, 215...",1.0,[17478],"[17478, 17478]"


In [66]:
import collections
from tqdm import tqdm_notebook

In [57]:
def multiple_buy_items(x):
    c = collections.Counter(x)
    p=[]
    c = {key:val for key, val in c.items() if val != 1 }
    for k,v in c.items():
        p.append(np.repeat(k,v))
    if len(p) > 0:
        p = np.concatenate(p) 
    return p

In [None]:
final_df["mb_items"] = final_df.apply(lambda x: multiple_buy_items(x.all_items), axis=1)

In [63]:
final_df

Unnamed: 0,customer_id,article_id,recs,hit_all,recs_hit,all_items,mb_items
0,0,"[1652, 7053, 11572]","[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ...",0.0,[],"[0, 0, 0, 644, 1952, 1952, 1482, 1638, 1797, 4...","[0, 0, 0, 1952, 1952]"
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782...",0.2,[3161],"[1, 482, 759, 2006, 339, 339, 6018, 7534, 7534...","[339, 339, 7534, 7534, 3161, 3161]"
2,2,"[8443, 3023, 7068, 8089, 3215, 7193]","[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309...",0.0,[],"[2, 326, 2055, 2238, 4151, 309, 5786, 5174, 54...",[]
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6...",0.5,[3],"[3, 3, 3, 2596, 3936, 632, 3]","[3, 3, 3, 3]"
4,5,[991],"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725...",0.0,[],"[5, 991]",[]
...,...,...,...,...,...,...,...
962725,1136200,[22036],"[20583, 17333, 3328, 18900, 16949, 21549, 2049...",0.0,[],"[3328, 17333, 20583, 22036]",[]
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128...",1.0,[17418],"[17418, 17418]","[17418, 17418]"
962727,1136203,[4770],"[19923, 20246, 20091, 19496, 16864, 21001, 199...",0.0,[],"[19923, 20246, 4770]",[]
962728,1136204,[17478],"[17478, 20806, 20946, 18373, 18249, 17488, 215...",1.0,[17478],"[17478, 17478]","[17478, 17478]"


In [64]:
final_df["sb_items"] = final_df.apply(lambda x: np.setdiff1d(x.all_items, x.mb_items), axis=1)

In [65]:
final_df

Unnamed: 0,customer_id,article_id,recs,hit_all,recs_hit,all_items,mb_items,sb_items
0,0,"[1652, 7053, 11572]","[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ...",0.0,[],"[0, 0, 0, 644, 1952, 1952, 1482, 1638, 1797, 4...","[0, 0, 0, 1952, 1952]","[644, 1482, 1638, 1652, 1797, 4861, 7053, 9027..."
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782...",0.2,[3161],"[1, 482, 759, 2006, 339, 339, 6018, 7534, 7534...","[339, 339, 7534, 7534, 3161, 3161]","[1, 482, 759, 981, 1482, 2006, 2427, 2997, 350..."
2,2,"[8443, 3023, 7068, 8089, 3215, 7193]","[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309...",0.0,[],"[2, 326, 2055, 2238, 4151, 309, 5786, 5174, 54...",[],"[2, 9, 196, 248, 249, 309, 326, 2055, 2238, 30..."
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6...",0.5,[3],"[3, 3, 3, 2596, 3936, 632, 3]","[3, 3, 3, 3]","[632, 2596, 3936]"
4,5,[991],"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725...",0.0,[],"[5, 991]",[],"[5, 991]"
...,...,...,...,...,...,...,...,...
962725,1136200,[22036],"[20583, 17333, 3328, 18900, 16949, 21549, 2049...",0.0,[],"[3328, 17333, 20583, 22036]",[],"[3328, 17333, 20583, 22036]"
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128...",1.0,[17418],"[17418, 17418]","[17418, 17418]",[]
962727,1136203,[4770],"[19923, 20246, 20091, 19496, 16864, 21001, 199...",0.0,[],"[19923, 20246, 4770]",[],"[4770, 19923, 20246]"
962728,1136204,[17478],"[17478, 20806, 20946, 18373, 18249, 17488, 215...",1.0,[17478],"[17478, 17478]","[17478, 17478]",[]


In [69]:
final_df["holdout_sb_items"] = final_df.apply(lambda x: np.setdiff1d(x.article_id, x.mb_items), axis=1)

In [70]:
final_df

Unnamed: 0,customer_id,article_id,recs,hit_all,recs_hit,all_items,mb_items,sb_items,holdout_sb_items
0,0,"[1652, 7053, 11572]","[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ...",0.0,[],"[0, 0, 0, 644, 1952, 1952, 1482, 1638, 1797, 4...","[0, 0, 0, 1952, 1952]","[644, 1482, 1638, 1652, 1797, 4861, 7053, 9027...","[1652, 7053, 11572]"
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782...",0.2,[3161],"[1, 482, 759, 2006, 339, 339, 6018, 7534, 7534...","[339, 339, 7534, 7534, 3161, 3161]","[1, 482, 759, 981, 1482, 2006, 2427, 2997, 350...","[2427, 8254, 13392, 16695]"
2,2,"[8443, 3023, 7068, 8089, 3215, 7193]","[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309...",0.0,[],"[2, 326, 2055, 2238, 4151, 309, 5786, 5174, 54...",[],"[2, 9, 196, 248, 249, 309, 326, 2055, 2238, 30...","[3023, 3215, 7068, 7193, 8089, 8443]"
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6...",0.5,[3],"[3, 3, 3, 2596, 3936, 632, 3]","[3, 3, 3, 3]","[632, 2596, 3936]",[632]
4,5,[991],"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725...",0.0,[],"[5, 991]",[],"[5, 991]",[991]
...,...,...,...,...,...,...,...,...,...
962725,1136200,[22036],"[20583, 17333, 3328, 18900, 16949, 21549, 2049...",0.0,[],"[3328, 17333, 20583, 22036]",[],"[3328, 17333, 20583, 22036]",[22036]
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128...",1.0,[17418],"[17418, 17418]","[17418, 17418]",[],[]
962727,1136203,[4770],"[19923, 20246, 20091, 19496, 16864, 21001, 199...",0.0,[],"[19923, 20246, 4770]",[],"[4770, 19923, 20246]",[4770]
962728,1136204,[17478],"[17478, 20806, 20946, 18373, 18249, 17488, 215...",1.0,[17478],"[17478, 17478]","[17478, 17478]",[],[]


In [72]:
final_df["hit_sb"] = final_df.apply(lambda x: len(np.intersect1d(x.recs_hit, x.holdout_sb_items))/ len(x.article_id), axis=1)

In [74]:
final_df[final_df["hit_sb"]>0]

Unnamed: 0,customer_id,article_id,recs,hit_all,recs_hit,all_items,mb_items,sb_items,holdout_sb_items,hit_sb
5,6,"[1891, 12146, 8234, 4443, 1655, 12235]","[4133, 2204, 8796, 451, 2012, 3360, 7317, 7177...",0.166667,[1891],"[6, 1990, 2229, 2603, 1928, 1950, 3597, 1638, ...",[],"[6, 309, 451, 1496, 1638, 1655, 1891, 1928, 19...","[1655, 1891, 4443, 8234, 12146, 12235]",0.166667
8,9,[79],"[11, 10, 19, 2041, 228, 261, 2055, 7270, 9702,...",1.000000,[79],"[10, 11, 79]",[],"[10, 11, 79]",[79],1.000000
9,10,"[11127, 7270, 19, 15804, 12893, 12893, 3629, 1...","[11, 9316, 11232, 19, 9702, 12042, 8850, 4612,...",0.625000,"[19, 7270, 11035, 11127, 12893]","[11, 2041, 2055, 308, 1711, 2347, 3133, 2216, ...","[11, 11, 19, 19, 19, 11232, 11232, 9316, 9316,...","[64, 308, 1523, 1711, 2041, 2055, 2216, 2347, ...","[3629, 7270, 11035, 11127, 15804]",0.375000
13,14,"[3899, 7343, 7565, 2162, 2162, 308, 486, 7600,...","[523, 1638, 308, 7, 1531, 1567, 1742, 340, 371...",0.222222,"[308, 3899]","[15, 16, 17, 523, 15, 714, 523, 523, 456, 1409...","[15, 15, 523, 523, 523, 523, 308, 308, 1638, 1...","[7, 14, 16, 17, 130, 207, 271, 318, 339, 340, ...","[486, 3899, 7343, 7565, 7597, 7600]",0.111111
17,18,"[7941, 14497, 10210, 14454, 12853, 7137, 15672...","[2997, 8537, 7943, 9174, 6638, 2729, 611, 3225...",0.125000,[7941],"[23, 24, 616, 1366, 1138, 910, 4229, 4663, 467...","[1138, 1138]","[23, 24, 365, 611, 616, 689, 834, 910, 1366, 1...","[7137, 7941, 10210, 10910, 12853, 14454, 14497...",0.125000
...,...,...,...,...,...,...,...,...,...,...
962714,1136184,[4705],"[4971, 19664, 4077, 18844, 20084, 4577, 4705, ...",1.000000,[4705],"[4077, 19664, 4971, 4705]",[],"[4077, 4705, 4971, 19664]",[4705],1.000000
962716,1136189,[19221],"[17085, 17115, 17084, 16952, 17301, 17223, 170...",1.000000,[19221],"[17085, 19221]",[],"[17085, 19221]",[19221],1.000000
962717,1136190,"[19321, 9294]","[7954, 8835, 16601, 8285, 8923, 8532, 3468, 69...",0.500000,[9294],"[7954, 8285, 8835, 16601, 19321, 9294]",[],"[7954, 8285, 8835, 9294, 16601, 19321]","[9294, 19321]",0.500000
962719,1136192,[18777],"[6241, 6431, 6491, 6680, 7289, 3747, 4214, 743...",1.000000,[18777],"[6241, 18777]",[],"[6241, 18777]",[18777],1.000000


In [75]:
final_df["hit_sb"].mean()

0.18945345688655238

In [84]:
final_df["hit_all"].mean()

0.30180611594459295

In [85]:
final_df

Unnamed: 0,customer_id,article_id,recs,hit_all,recs_hit,all_items,mb_items,sb_items,holdout_sb_items,hit_sb
0,0,"[1652, 7053, 11572]","[1482, 1638, 1797, 4861, 9027, 1952, 0, 1488, ...",0.0,[],"[0, 0, 0, 644, 1952, 1952, 1482, 1638, 1797, 4...","[0, 0, 0, 1952, 1952]","[644, 1482, 1638, 1652, 1797, 4861, 7053, 9027...","[1652, 7053, 11572]",0.0
1,1,"[3161, 8254, 16695, 13392, 2427]","[3161, 7534, 2997, 3503, 1482, 7628, 6992, 782...",0.2,[3161],"[1, 482, 759, 2006, 339, 339, 6018, 7534, 7534...","[339, 339, 7534, 7534, 3161, 3161]","[1, 482, 759, 981, 1482, 2006, 2427, 2997, 350...","[2427, 8254, 13392, 16695]",0.0
2,2,"[8443, 3023, 7068, 8089, 3215, 7193]","[248, 7818, 8074, 196, 7135, 249, 4151, 9, 309...",0.0,[],"[2, 326, 2055, 2238, 4151, 309, 5786, 5174, 54...",[],"[2, 9, 196, 248, 249, 309, 326, 2055, 2238, 30...","[3023, 3215, 7068, 7193, 8089, 8443]",0.0
3,3,"[632, 3]","[3, 2596, 3936, 3935, 2872, 621, 376, 16595, 6...",0.5,[3],"[3, 3, 3, 2596, 3936, 632, 3]","[3, 3, 3, 3]","[632, 2596, 3936]",[632],0.0
4,5,[991],"[5, 499, 69, 123, 16014, 2005, 3554, 2008, 725...",0.0,[],"[5, 991]",[],"[5, 991]",[991],0.0
...,...,...,...,...,...,...,...,...,...,...
962725,1136200,[22036],"[20583, 17333, 3328, 18900, 16949, 21549, 2049...",0.0,[],"[3328, 17333, 20583, 22036]",[],"[3328, 17333, 20583, 22036]",[22036],0.0
962726,1136201,[17418],"[17418, 20780, 14892, 15765, 17869, 17715, 128...",1.0,[17418],"[17418, 17418]","[17418, 17418]",[],[],0.0
962727,1136203,[4770],"[19923, 20246, 20091, 19496, 16864, 21001, 199...",0.0,[],"[19923, 20246, 4770]",[],"[4770, 19923, 20246]",[4770],0.0
962728,1136204,[17478],"[17478, 20806, 20946, 18373, 18249, 17488, 215...",1.0,[17478],"[17478, 17478]","[17478, 17478]",[],[],0.0


In [83]:
pd.read_feather(dr.get_preprocessed_data_path()/"cosine_recs_100.feather").explode("recs")

Unnamed: 0,customer_id,recs
0,0,1482
0,0,1638
0,0,1797
0,0,4861
0,0,9027
...,...,...
962729,1136205,4459
962729,1136205,5696
962729,1136205,146
962729,1136205,3115
