In [1]:
import sys
sys.path = ['../RecStudio'] + sys.path
import pandas as pd 
import numpy as np 
import torch
from recstudio.data.advance_dataset import KDDCUPSeqDataset, KDDCUPSessionDataset
import pickle

In [2]:
def _load_cache(path):
    with open(path, 'rb') as f:
        download_obj = pickle.load(f)
    return download_obj

In [3]:
def load_datasets_from_cache(data_dir):
    cache_datasets = _load_cache(data_dir)
    datasets = []
    for i in range(len(cache_datasets)):
        datasets.append(KDDCUPSessionDataset(None, data_dir, None, True))
        for k in cache_datasets[i].__dict__:
            attr = getattr(cache_datasets[i], k)
            setattr(datasets[i], k, attr)
    return datasets 

In [4]:
product_data = pd.read_csv('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/raw_data/products_train.csv')

# DE

In [5]:
DE_product_vectors = np.load('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/phase2_task1_bert_results_DE/results/item_reps/item.npy')
padding_vector = np.array([[0.0 for _ in range(DE_product_vectors.shape[-1])]])
DE_product_vectors = np.concatenate([padding_vector, DE_product_vectors], axis=0)

In [6]:
DE_product_vectors.shape

(518328, 768)

In [7]:
DE_dataset = load_datasets_from_cache('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/b5aeac4e5b9ff0518bbcb59a28086594')[0]

In [8]:
DE_index = DE_dataset.item_feat.get_col('DE_index').long()

In [9]:
reordered_DE_product_vectors = DE_product_vectors[DE_index]

In [10]:
vector = reordered_DE_product_vectors[10]

In [11]:
reordered_DE_product_vectors, DE_product_vectors

(array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [-0.17863712,  0.10916085,  0.11707042, ...,  0.10832006,
         -0.27978522,  0.05633187],
        [ 0.17450115, -0.10964729,  0.20953828, ..., -0.20026118,
          0.06307486, -0.12778322],
        ...,
        [ 0.37622038,  0.61418992, -0.17060213, ...,  0.33777729,
          0.20072481, -0.52602822],
        [-0.26937884, -0.18768312,  0.56726915, ...,  0.12342592,
         -0.21520002, -0.20621392],
        [-0.15914454,  0.61516678,  0.16783659, ...,  0.29599333,
         -0.59034371, -0.27012023]]),
 array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.0163502 , -0.0811897 , -0.02968303, ...,  0.17686917,
         -0.32891601, -0.06422292],
        [ 0.38436505,  0.39604807, -0.33651093, ..., -0.09598921,
          0.20652246, -0.30377069],
        ...,
        [-0.10901906, -0.04847462,  0.17508447, ..., -

In [12]:
(-np.matmul(reordered_DE_product_vectors, vector)).argsort()[:10]

array([    10, 310442,  61126, 392279, 117775, 500305, 179647, 333226,
       129831, 181296])

In [13]:
DE_dataset.field2tokens['product_id'][10], DE_dataset.field2tokens['product_id'][310442]

('B00175X9QE', 'B000S5JVRU')

In [14]:
product_data.query("id=='B00175X9QE'").iloc[0]['title']

'Herlitz 8770307 Musterbeutelklammer, Metall, Rundkopf, 100 Stück in Hängebox, messing'

In [15]:
product_data.query("id=='B000S5JVRU'").iloc[0]['title']

'Herlitz Musterbeutelklammer Flachkopf, 60 Stück in Hängebox, metall'

In [16]:
reordered_DE_product_vectors, reordered_DE_product_vectors.shape

(array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [-0.17863712,  0.10916085,  0.11707042, ...,  0.10832006,
         -0.27978522,  0.05633187],
        [ 0.17450115, -0.10964729,  0.20953828, ..., -0.20026118,
          0.06307486, -0.12778322],
        ...,
        [ 0.37622038,  0.61418992, -0.17060213, ...,  0.33777729,
          0.20072481, -0.52602822],
        [-0.26937884, -0.18768312,  0.56726915, ...,  0.12342592,
         -0.21520002, -0.20621392],
        [-0.15914454,  0.61516678,  0.16783659, ...,  0.29599333,
         -0.59034371, -0.27012023]]),
 (518328, 768))

In [17]:
np.save('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/phase2_task1_bert_results_DE/results/item_reps/reordered_item.npy', reordered_DE_product_vectors)

# JP

In [41]:
JP_product_vectors = np.load('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/phase2_task1_bert_results_JP/results/item_reps/item.npy')
padding_vector = np.array([[0.0 for _ in range(JP_product_vectors.shape[-1])]])
JP_product_vectors = np.concatenate([padding_vector, JP_product_vectors], axis=0)

In [42]:
JP_product_vectors.shape

(395010, 768)

In [43]:
JP_dataset = load_datasets_from_cache('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/2536617955df215e0047f5b220d1c012')[0]

In [44]:
JP_index = JP_dataset.item_feat.get_col('JP_index').long()

In [45]:
reordered_JP_product_vectors = JP_product_vectors[JP_index]

In [46]:
reordered_JP_product_vectors, reordered_JP_product_vectors.shape

(array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [-0.04782737, -0.14124656, -0.03755919, ...,  0.18743366,
         -0.41416973,  0.05504756],
        [-0.37269983, -0.18318939,  0.12348562, ...,  0.29210562,
         -0.26847038,  0.06414852],
        ...,
        [-0.29770693, -0.13189454, -0.03135134, ...,  0.03924929,
         -0.15332511, -0.42025369],
        [-0.00445627,  0.24775715, -0.45316821, ..., -0.04333911,
         -0.22731598,  0.00165353],
        [ 0.02761289,  0.01399995,  0.16871327, ..., -0.19703799,
         -0.0800084 ,  0.15992028]]),
 (395010, 768))

In [47]:
np.save('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/phase2_task1_bert_results_JP/results/item_reps/reordered_item.npy', reordered_JP_product_vectors)

# UK

In [27]:
UK_product_vectors = np.load('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/phase2_task1_roberta_results_UK/results/item_reps/item.npy')
padding_vector = np.array([[0.0 for _ in range(UK_product_vectors.shape[-1])]])
UK_product_vectors = np.concatenate([padding_vector, UK_product_vectors], axis=0)

In [28]:
UK_product_vectors.shape

(500181, 768)

In [29]:
one_vector = UK_product_vectors[2000]

In [30]:
(-np.matmul(UK_product_vectors, one_vector)).argsort()[:10]

array([  2000, 112424, 272461, 472272, 202301, 391722, 232505, 303370,
       469652, 147888])

In [31]:
UK_product_data = product_data.query("locale=='UK'")

In [32]:
UK_product_data

Unnamed: 0,id,locale,title,price,brand,color,size,model,material,author,desc
913336,B087LZNPHS,UK,"SOCHOW Sherpa Fleece Throw Blanket, Double-Sid...",24.99,SOCHOW,Teal Green,127cm×150cm,,100% Polyester,,COLOR: The sherpa throw blanket is available i...
913337,B08THFN1KX,UK,Hippowarehouse Personalised Photo Printed Mous...,9.95,Hippowarehouse,White,240mm x 190mm x 60mm,50245-Mat-Perso,Rubber,,Competitively priced
913338,0804185328,UK,"500 Easy Recipes for Every Machine, Both Stove...",16.49,Clarkson Potter,White,,,,"Scarbrough, Mark",
913339,B09VBKDBW6,UK,"TYHJOY Mini Bag Sealer, Handheld Vacuum Heat S...",11.99,TYHJOY,Black,,FBA-sealer-black,Acrylonitrile Butadiene Styrene,,【AFTER-SALE】This handheld food heat sealer sho...
913340,B096ZW8B49,UK,Lucosobie Steering Wheel Lock - Car Anti-Theft...,26.99,Lucosobie,Black,,,Alloy Steel,,🔐【 Anti-Friction & Customer First】Each box of ...
...,...,...,...,...,...,...,...,...,...,...,...
1413511,B08D7KW8VK,UK,TOMHOUSEE Anime Cosplay Short Straight Hair Wi...,9.99,TOMHOUSEE,Deep Grey Yuki,,,Synthetic,,
1413512,B073WXLXR9,UK,Crystals NEW brilliant ink twister bingo dabbe...,8.99,CRYSTALS,"Orange,blue,green,pink,red,purple",,,Plastic,,
1413513,1529393833,UK,"Before I Do: the new, funny and unexpected lov...",4.50,Hodder Paperbacks,,,,,"Cousens, Sophie",
1413514,B0B3TJ1NDN,UK,"Black iPhone Charger Cable, iPhone Charger Bra...",4.49,AA-TECH,Black,2M,brd-ip-black-2022,Nylon Braided,,Added Protection: An additional layer of prote...


In [33]:
UK_product_data.iloc[1999]['title']

'Lest We Forget Flag Remembrance Day Flag Poppy Flag 3 X 5 ft Remembrance Sunday Poppy Flag Remembrance Day Decorations for Heroes Soldiers Outdoors Street Square Lest We Forget Banner'

In [34]:
UK_product_data.iloc[112423]['title']

'Lest We Forget Flag Remembrance Day Flag Poppy Flag 3 X 5 ft Remembrance Sunday Poppy Flag Remembrance Day Decorations for Heroes Soldiers Outdoors Street Square Lest We Forget Banner'

In [35]:
UK_dataset = load_datasets_from_cache('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/8d133ea55ad67bd3efd625dfeff0fb1d')[0]

In [36]:
len(UK_dataset.field2tokens['product_id'])

500181

In [37]:
UK_index = UK_dataset.item_feat.get_col('UK_index').long()

In [38]:
reordered_UK_product_vectors = UK_product_vectors[UK_index]

In [39]:
reordered_UK_product_vectors

array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.01448025,  0.35636491, -0.22841541, ..., -0.40260869,
         0.30051389,  0.16629395],
       [-0.01257956,  0.34611082, -0.21633583, ..., -0.37115097,
         0.28949398,  0.20995954],
       ...,
       [-0.09949617,  0.06758122, -0.13347711, ..., -0.70578253,
         0.4333744 ,  0.04405852],
       [-0.12538552,  0.27691829, -0.06401664, ..., -0.19322231,
         0.41759366,  0.29057989],
       [ 0.08841106,  0.22024369, -0.24605125, ..., -0.27180609,
         0.32305729,  0.36370665]])

In [40]:
np.save('/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/phase2_task1_roberta_results_UK/results/item_reps/reordered_item.npy', reordered_UK_product_vectors)