In [1]:
from pathlib import Path
from typing import Tuple
from numpy.typing import NDArray

import amazon_dataset 
import numpy as np
import pandas as pd
from scipy import sparse

DMRL_RES_PATH = Path('data/DMRL-Clothing_Shoes_and_Jewelry/best_scores.npz')
SEMMACRID_RES_PATH = Path('data/SEM-MacridVAE-Clothing_Shoes_and_Jewelry-run/eval.npz')
TRAIN_PATH = Path('data/DMRL-Clothing_Shoes_and_Jewelry/train.txt')
TEST_PATH = Path('data/DMRL-Clothing_Shoes_and_Jewelry/test.txt')
ITEMS_PATH = Path('data/DMRL-Clothing_Shoes_and_Jewelry/items.txt')
USERS_PATH = Path('data/DMRL-Clothing_Shoes_and_Jewelry/users.txt')

In [2]:
assert DMRL_RES_PATH.exists()
assert SEMMACRID_RES_PATH.exists()
assert TRAIN_PATH.exists()
assert TEST_PATH.exists()
assert ITEMS_PATH.exists()
assert USERS_PATH.exists()

#  Load all data

In [3]:
dmrl_results: NDArray[np.floating] = np.vstack(next(iter(np.load(DMRL_RES_PATH, allow_pickle=True).values())))
dmrl_results.shape

(23318, 38493)

In [4]:
sem_macrid_results: NDArray[np.floating] = next(iter(np.load(SEMMACRID_RES_PATH, allow_pickle=True).values()))
sem_macrid_results.shape

(23318, 38493)

In [5]:
assert sem_macrid_results.shape == dmrl_results.shape

Get users

In [6]:
users = pd.read_csv(USERS_PATH)
assert len(users) == sem_macrid_results.shape[0]
users

Unnamed: 0.1,Unnamed: 0,0
0,A30NKRF3KBGA06,0
1,AL0XGCBE6Z22M,1
2,AMT5LF0TKY67C,2
3,A2BY8EVXA3NRHD,3
4,AWE6KR1ELIYQ3,4
...,...,...
23313,A1MFBF49ZFMH2N,23313
23314,A36AF5I7D0VO8F,23314
23315,A35ZS7JT3G9B8,23315
23316,A3GC94SEKQI3QU,23316


Get items

In [7]:
items = pd.read_csv(ITEMS_PATH)
assert len(items) == sem_macrid_results.shape[1]
items

Unnamed: 0.1,Unnamed: 0,item_id
0,B000B6AV7K,0
1,B0143D7EE4,1
2,B0105V2DEY,2
3,B014EY21H2,3
4,B005LUROIK,4
...,...,...
38488,B00A9R2P7A,38488
38489,B017HK485S,38489
38490,B008H7UKYY,38490
38491,B006K6PJTK,38491


In [8]:
def load_interaction_matrix(file: Path, shape: Tuple[int, int]) -> sparse.csr_matrix:
    df = pd.read_csv(file)
    return sparse.csr_matrix(
        (np.ones_like(df['user']), (df['user'], df['item'])),
        shape=shape,
        dtype=float
    )


train = load_interaction_matrix(TRAIN_PATH, (len(users), len(items)))
test = load_interaction_matrix(TEST_PATH, (len(users), len(items)))

# Analyze results

Let's make the trained data to be zero 

In [9]:
dmrl_results[train.nonzero()] = -np.inf
sem_macrid_results[train.nonzero()] = -np.inf

Analyze how they agree in the top 5 results

In [10]:
k = 5

best_dmrl = dmrl_results.argpartition(kth=-k, axis=1)[:, -k:]
best_sem_macridvae = sem_macrid_results.argpartition(kth=-k, axis=1)[:, -k:]

In [12]:
dmrl_results.argmax(axis=1)

array([19594,  8913, 32930, ..., 24574, 13147, 34301])

Arrays are very memory intensive so we delete the original results

In [14]:
import gc
del sem_macrid_results, dmrl_results
gc.collect()

0

In [16]:
best_dmrl

array([[11253, 19594, 15314, 29368, 12176],
       [32501, 34301,  2121, 23339,  8913],
       [13181, 30484,  3235, 32930, 36164],
       ...,
       [ 3482,  6105,  4866, 14556, 24574],
       [32060,  5807, 24346, 13147, 32438],
       [ 6217, 33684, 27597, 13116, 34301]])

In [17]:
best_sem_macridvae

array([[ 8703, 10284, 26373, 38308,  1752],
       [  854, 30552, 26624, 32501, 34301],
       [ 8703, 10284,  1752, 38308, 26373],
       ...,
       [ 8703, 26373, 10284, 38308,  1752],
       [ 8703, 10284, 26373,  1752, 38308],
       [16534, 13116, 34301, 33684, 27597]])

In [46]:
best_counts_semmacridvae = pd.Series(best_sem_macridvae.flatten()).value_counts(normalize=True).to_frame('percentage').head(20)
best_counts_semmacridvae

Unnamed: 0,percentage
1752,0.086302
38308,0.083978
26373,0.070135
10284,0.066121
8703,0.06456
34301,0.029874
32501,0.027481
30552,0.025868
854,0.021666
26624,0.021271


In [47]:
best_counts_dmrl = pd.Series(best_dmrl.flatten()).value_counts(normalize=True).to_frame('percentage').head(20)
best_counts_dmrl

Unnamed: 0,percentage
6027,0.006896
21557,0.005412
17254,0.005344
921,0.005078
6112,0.005035
24346,0.004992
24956,0.004975
31411,0.004975
8963,0.004666
18492,0.00452


In [48]:
set(best_counts_dmrl.index) & set(best_counts_semmacridvae.index)

{6027, 6112, 17254, 38308}

In [32]:
pd.Series(best_dmrl.flatten()).value_counts() > 10

array([21799,     0,     2, ..., 38484,     3, 38482])

In [91]:
pd.Series(np.asarray(test.sum(axis=0))[0].astype(int)).sort_values(ascending=False).head(20)

18031    168
38308    165
26373    164
3520     163
8703     160
34043    153
28568    153
10284    152
6388     152
5372     146
1752     140
12593    140
13038    138
23510    131
33683    130
307      123
19838    118
35166     91
31411     82
6536      78
dtype: int64

In [21]:
pd.Series(train.sum(axis=0).tolist()[0]).sort_values(ascending=False)

12593    617.0
33683    598.0
19838    590.0
307      569.0
10284    567.0
         ...  
31833      0.0
8552       0.0
8557       0.0
8558       0.0
21618      0.0
Length: 38493, dtype: float64

In [72]:
items.iloc[2]

Unnamed: 0    B0105V2DEY
item_id                2
Name: 2, dtype: object

In [74]:
some_products = amazon_dataset.products_df('Clothing_Shoes_and_Jewelry')
some_products

Unnamed: 0_level_0,asin,description,title,brand,main_cat,rank,price,image_slug,image_url,feature,category,tech_detail
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
47,5120053084,Top Length (Neck to Bottom Hem) Small - 29 inc...,sofsy Soft-Touch Rayon Blend Tie Front Nursing...,,,"87,615inClothing,ShoesJewelry(",,"[51HJbA8UG2L, 51FufN7RbSL, 51vKjwQ6eAL, 410fEp...",[https://images-na.ssl-images-amazon.com/image...,[SAVE 10% WHEN YOU ADD 2 OR MORE sofsy PRODUCT...,"[Clothing, Shoes & Jewelry, Women, Maternity, ...",
144,7709260373,,LJYH Children's Collar Motorcycle Faux Leather...,LJYH,,"15,484inClothing,ShoesJewelry(",$23.99 - $29.99,"[41MAnSZ8QTL, 51qL9W098pL, 51QWVSqlAML, 51eOan...",[https://images-na.ssl-images-amazon.com/image...,"[100% New Faux Leather Coat, Fashion New Slim ...","[Clothing, Shoes & Jewelry, Boys, Clothing, Ja...",
284,B00001TOXD,Includes One Broom. This broom goes great with...,Adult Witch Broom,,Toys & Games,,$4.99,[21VAYWoNedL],[https://m.media-amazon.com/images/I/21VAYWoNe...,[Brand new authentic licensed Pegan Witch broo...,"[Clothing, Shoes & Jewelry, Costumes & Accesso...",
316,B00004U1J2,Tired of having your kid running around the ho...,Buzz Lightyear Jet Pack,,Toys & Games,,$7.41,[31y%2BZwVQJJL],[https://images-na.ssl-images-amazon.com/image...,"[Polyester, Imported, 16"" high, 12"" wide, Qual...","[Clothing, Shoes & Jewelry, Costumes & Accesso...",
333,B00004VWJ3,Birkenstock's Black Super Birki Clog is made o...,"Birki's Super Pu, Black, 44 M EU (13 Women /11...",Birki's,,">#3,164 in Patio, Lawn & Garden (See Top 100 i...",,"[51g%2BtUP7YSL, 41Jn0zjKRuL, 41ElBt2gWdL, 31Aw...",[https://images-na.ssl-images-amazon.com/image...,[Birkenstock SUPER BIRKI BLACK PU 44],"[Clothing, Shoes & Jewelry, Women, Shoes, Mule...",
...,...,...,...,...,...,...,...,...,...,...,...,...
2684954,B01HJDBMUM,When you need a fashionable shoe for your son-...,Deer Stags Kid's Brilliant Dress Comfort Oxfor...,,,"343,935inClothing,ShoesJewelry(",$16.80 - $40.00,"[51Q9Np4I4OL, 41JgD3Klb6L, 31hD-OLssoL, 41ABLP...",[https://images-na.ssl-images-amazon.com/image...,"[100% Manmade, Synthetic sole, QUALITY YOU CAN...","[Clothing, Shoes & Jewelry, Girls, Shoes, Oxfo...",
2684955,B01HJCZ02O,"Dynamic in design, the honor sneaker is comfor...",Dansko Women's Honor Sneaker,,,"44,554inClothing,ShoesJewelry(",$74.93 - $221.43,"[51wGv%2BSmTHL, 41AE5SULqtL, 311xxeLqZ2L, 41Js...",[https://images-na.ssl-images-amazon.com/image...,"[100% Leather, Imported, Rubber sole, Shaft me...","[Clothing, Shoes & Jewelry, Women, Shoes, Fash...",
2684979,B01HJDZM30,Part of our Performance Fishing Gear line of p...,Columbia PFG Mesh Snap Back Ball Cap,Columbia,Sports & Outdoors,"5,346inSportsOutdoors(",$20.90,"[51cajtyCrBL, 416yFSOGLdL, 41t%2Bt3iFPYL, 513t...",[https://images-na.ssl-images-amazon.com/image...,"[blend, ADVANCED TECHNOLOGY: Columbia's PFG Me...","[Clothing, Shoes & Jewelry, Men, Accessories, ...",
2684987,B01HJDVCJI,An edgy take on Adidas running-inspired herita...,adidas Originals Men's Tubular Shadow Fashion ...,,,"74,828inClothing,ShoesJewelry(",$48.45 - $199.00,"[51wvO%2BYYxLL, 51VDqEjkM1L, 31Pab7SYh7L, 41gg...",[https://images-na.ssl-images-amazon.com/image...,"[100% Leather and Textile, Imported, Synthetic...","[Clothing, Shoes & Jewelry, Men, Shoes, Fashio...",


In [78]:
some_products.loc[some_products['asin'] == 'B0105V2DEY'].iloc[0].title

'Slim Compact Leather Key Holder Wallet Pouch Gifts Him Her Men Women'

In [80]:
some_products.loc[some_products['asin'] == 'B0105V2DEY'].iloc[0]

asin                                                  B0105V2DEY
description                                                 None
title          Slim Compact Leather Key Holder Wallet Pouch G...
brand                                                Rustic Town
main_cat                                         Office Products
rank                             788,760inClothing,ShoesJewelry(
price                                                       None
image_slug     [41%2B7pqP-ZHL, 514%2BxE-dulL, 41xjNc9npzL, 51...
image_url      [https://images-na.ssl-images-amazon.com/image...
feature        [Leather, Imported, Soft and Supple Feel of a ...
category       [Clothing, Shoes & Jewelry, Men, Accessories, ...
tech_detail                                                 None
Name: 1997316, dtype: object