In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import pickle
import time

from inference.vectorized import *

## Load data from instacart30k

In [2]:
train_basket_path = 'data/instacart_30k/train_baskets.csv'
test_sample_path = 'data/instacart_30k/test_samples.csv'

train_baskets = pd.read_csv(train_basket_path)
test_samples = pd.read_csv(test_sample_path)

all_baskets = train_baskets[['basket_id', 'item_id', 'add_to_cart_order']].drop_duplicates()
basket_items = all_baskets.sort_values(['basket_id', 'add_to_cart_order']).groupby(['basket_id'])['item_id'] \
            .apply(list).reset_index(name='items')

items_by_basket = dict(zip(basket_items['basket_id'], basket_items['items']))

user_baskets = train_baskets[['basket_id','user_id']].drop_duplicates()
user_baskets = user_baskets.groupby(['user_id'])['basket_id'].apply(list).reset_index(name='baskets')

baskets_by_user = dict(zip(user_baskets['user_id'],user_baskets['baskets']))

user_sim_dict = {}
user_neighbors = {}
        
with open('data/instacart_30k/user_sim_50.pickle', 'rb') as handle:
    user_sim_dict = pickle.load(handle)

for key in user_sim_dict:
    if key[0] not in user_neighbors:
        user_neighbors[key[0]] = []
    user_neighbors[key[0]].append(key[1])

# Functions to precompute the static part of the model

In [6]:
def baskets_of_user(user_id, baskets_by_user, items_by_basket): 
    return [items_by_basket[basket_id] for basket_id in baskets_by_user[user_id]]

def precompute_for_user(user_id, baskets_by_user, items_by_basket, num_items, beta):
    B_u = [as_sparse_vector(basket, num_items) 
           for basket in baskets_of_user(user_id, baskets_by_user, items_by_basket)]
    h_u = history_vector(B_u, num_items)
    C_u = basket_cooccurrence_matrix(B_u, num_items)
    
    baskets_of_similar_users = [
        baskets_of_user(neighbor, baskets_by_user, items_by_basket)
        for neighbor in user_neighbors[user_id]
    ]    
    
    h_N_u, C_N_u = history_and_coocc_from_neighbors(h_u, C_u, baskets_of_similar_users, num_items)

    h_s_u = beta * h_u + (1-beta) * h_N_u
    C_s_u = beta * C_u + (1-beta) * C_N_u
    
    return h_s_u, C_s_u

## Benchmark configuration

In [8]:
seed = 42

num_items = np.max(train_baskets.item_id) + 1

alpha = 0.3
beta = 0.3

num_users_for_benchmark = 500
num_incomplete_baskets_for_user = 5

In [13]:
np.random.seed(seed)

# Choose a random set of users 
users = np.random.choice(sorted(test_samples.user_id.unique()), 
                                size=num_users_for_benchmark, replace=False)

durations = []

for user in users:
    # Precompute static parts of model for users, not measured, can happen offline 
    h_s_u, C_s_u = precompute_for_user(user, baskets_by_user, items_by_basket, num_items, beta)
    # Choose physical representation for the matrices
    h_s_u_opt = h_s_u.tocsc(copy=True)
    C_s_u_opt = C_s_u.tocsc(copy=True)    
    
    print(f"model_size\t{user}\t{h_s_u.count_nonzero()}\t{C_s_u.count_nonzero()}")
    
    all_incomplete_baskets = test_samples[test_samples.user_id==user].input_items.apply(eval).tolist()
    
    if len(all_incomplete_baskets) > 1:
        # Choose a random set of incomplete baskets as query inputs
        incomplete_baskets = np.random.choice(all_incomplete_baskets, num_incomplete_baskets_for_user)

        for incomplete_basket in incomplete_baskets:

            # Query starts    
            start_time = time.time()

            # Compute selection vector from incomplete basket
            f_c = selection_vector(incomplete_basket, num_items).tocsc(copy=True)
            # Compute scores for all potential items to recommend
            scores = alpha * h_s_u_opt + (1-alpha) * C_s_u_opt * f_c

            # Query ends
            duration = time.time() - start_time
            
            # Log duration
            print(f"latency\t{user}\t{len(incomplete_basket)}\t{duration*1000}")
            durations.append(duration)

model_size,15729	2274	2136
latency,15729	3	0.8666515350341797
latency,15729	3	1.0700225830078125
latency,15729	7	0.9779930114746094
latency,15729	8	0.7851123809814453
latency,15729	8	1.0039806365966797
model_size,154203	2474	144
latency,154203	7	0.6349086761474609
latency,154203	3	0.6871223449707031
latency,154203	3	0.7729530334472656
latency,154203	4	0.9829998016357422
latency,154203	4	0.7562637329101562
model_size,160752	1718	562
latency,160752	18	0.49996376037597656
latency,160752	21	1.0838508605957031
latency,160752	26	1.0638236999511719
latency,160752	25	0.9350776672363281
latency,160752	9	0.8690357208251953
model_size,172207	2347	2080
latency,172207	6	0.6589889526367188
latency,172207	4	0.6127357482910156
latency,172207	4	1.0280609130859375
latency,172207	4	0.7007122039794922
latency,172207	4	0.7622241973876953
model_size,18128	1882	1118
latency,18128	18	0.7100105285644531
latency,18128	31	0.8482933044433594
latency,18128	13	1.3015270233154297
latency,18128	4	0.6957054138183594
l

model_size,102471	1928	1716
latency,102471	5	0.7641315460205078
latency,102471	4	1.077890396118164
latency,102471	4	1.0190010070800781
latency,102471	6	0.6940364837646484
latency,102471	6	0.518798828125
model_size,56339	1692	1200
model_size,19584	1803	304
latency,19584	8	1.123666763305664
latency,19584	15	1.2202262878417969
latency,19584	7	0.9548664093017578
latency,19584	3	0.7081031799316406
latency,19584	10	0.61798095703125
model_size,15906	2545	2954
latency,15906	30	0.7431507110595703
latency,15906	25	0.7410049438476562
latency,15906	34	0.7278919219970703
latency,15906	7	0.7750988006591797
latency,15906	28	0.7557868957519531
model_size,60680	2080	1772
latency,60680	9	0.6499290466308594
latency,60680	12	1.0750293731689453
latency,60680	5	0.8020401000976562
latency,60680	10	0.7920265197753906
latency,60680	7	0.9047985076904297
model_size,54178	2361	5000
latency,54178	4	0.8728504180908203
latency,54178	4	0.8199214935302734
latency,54178	12	0.6709098815917969
latency,54178	11	0.76484680

model_size,5909	2303	754
latency,5909	6	0.6768703460693359
latency,5909	3	1.0399818420410156
latency,5909	3	0.9639263153076172
latency,5909	7	0.743865966796875
latency,5909	8	0.6978511810302734
model_size,109247	1751	178
model_size,141071	1815	1190
latency,141071	5	0.4210472106933594
latency,141071	3	0.8609294891357422
latency,141071	5	1.3501644134521484
latency,141071	4	0.7719993591308594
latency,141071	3	0.6349086761474609
model_size,64746	337	60
latency,64746	4	1.0979175567626953
latency,64746	4	0.9648799896240234
latency,64746	4	0.9112358093261719
latency,64746	3	0.7901191711425781
latency,64746	4	0.84686279296875
model_size,193417	2055	168
latency,193417	3	0.6520748138427734
latency,193417	3	0.8521080017089844
latency,193417	4	0.8738040924072266
latency,193417	4	0.6961822509765625
latency,193417	4	0.8358955383300781
model_size,61119	1947	130
latency,61119	4	0.8428096771240234
latency,61119	5	1.1289119720458984
latency,61119	4	0.9400844573974609
latency,61119	6	0.7829666137695312
l

model_size,52445	2087	506
latency,52445	3	0.4830360412597656
latency,52445	3	1.0256767272949219
latency,52445	4	1.0008811950683594
latency,52445	6	0.988006591796875
latency,52445	4	1.0187625885009766
model_size,104851	2508	884
latency,104851	8	0.6551742553710938
latency,104851	6	0.9279251098632812
latency,104851	4	0.7979869842529297
latency,104851	4	0.8399486541748047
latency,104851	10	0.7939338684082031
model_size,115801	2179	680
latency,115801	15	0.6940364837646484
latency,115801	9	1.0590553283691406
latency,115801	7	0.7419586181640625
latency,115801	19	0.9889602661132812
latency,115801	15	0.6909370422363281
model_size,95902	2274	870
latency,95902	6	0.6999969482421875
latency,95902	5	1.2090206146240234
latency,95902	6	1.0459423065185547
latency,95902	4	0.7498264312744141
latency,95902	6	0.6670951843261719
model_size,1583	2202	554
latency,1583	5	1.0170936584472656
latency,1583	5	1.4619827270507812
latency,1583	3	0.7429122924804688
latency,1583	4	0.885009765625
latency,1583	3	0.8151531

model_size,131838	2006	976
latency,131838	10	0.7238388061523438
latency,131838	16	0.9009838104248047
latency,131838	17	0.7250308990478516
latency,131838	7	0.7560253143310547
latency,131838	17	0.682830810546875
model_size,187407	2490	2596
latency,187407	8	0.7801055908203125
latency,187407	3	0.8490085601806641
latency,187407	6	0.7219314575195312
latency,187407	11	0.7371902465820312
latency,187407	6	0.6341934204101562
model_size,153346	1777	1412
latency,153346	4	0.6499290466308594
latency,153346	8	0.9839534759521484
latency,153346	4	0.7309913635253906
latency,153346	5	0.7307529449462891
latency,153346	9	0.7202625274658203
model_size,77063	2461	4028
latency,77063	6	0.6349086761474609
latency,77063	3	0.7481575012207031
latency,77063	11	0.7228851318359375
latency,77063	8	0.6527900695800781
latency,77063	11	0.6768703460693359
model_size,3685	2310	1258
latency,3685	21	0.8499622344970703
latency,3685	7	1.1131763458251953
latency,3685	27	1.0700225830078125
latency,3685	8	0.9760856628417969
laten

model_size,184608	1907	1248
latency,184608	4	0.6401538848876953
latency,184608	3	1.0678768157958984
latency,184608	4	1.1000633239746094
latency,184608	4	0.9469985961914062
latency,184608	3	1.0309219360351562
model_size,64123	1752	262
latency,64123	4	0.8552074432373047
latency,64123	4	1.1069774627685547
latency,64123	4	0.9539127349853516
latency,64123	3	0.8242130279541016
latency,64123	3	0.6649494171142578
model_size,175378	2097	2814
latency,175378	13	0.6279945373535156
latency,175378	7	0.6279945373535156
latency,175378	6	0.7421970367431641
latency,175378	3	0.6299018859863281
latency,175378	3	0.782012939453125
model_size,102138	2084	900
latency,102138	16	0.9238719940185547
latency,102138	17	0.9679794311523438
latency,102138	9	1.0857582092285156
latency,102138	6	0.8640289306640625
latency,102138	3	0.6852149963378906
model_size,53603	2448	86
latency,53603	8	0.5660057067871094
latency,53603	13	1.0528564453125
latency,53603	14	1.1887550354003906
latency,53603	17	0.8111000061035156
latency,5

model_size,39884	1510	368
latency,39884	12	0.8521080017089844
latency,39884	8	1.199960708618164
latency,39884	12	0.8528232574462891
latency,39884	7	0.6811618804931641
latency,39884	8	0.6601810455322266
model_size,70410	2096	1270
latency,70410	5	0.8189678192138672
latency,70410	8	1.1560916900634766
latency,70410	9	1.0690689086914062
latency,70410	7	0.8356571197509766
latency,70410	4	0.8780956268310547
model_size,157149	2194	1620
latency,157149	3	0.6330013275146484
latency,157149	3	1.2540817260742188
latency,157149	4	1.1019706726074219
latency,157149	3	0.9260177612304688
latency,157149	4	0.8230209350585938
model_size,1967	2298	404
latency,1967	3	0.5109310150146484
latency,1967	3	0.8580684661865234
latency,1967	4	0.8521080017089844
latency,1967	3	0.8330345153808594
latency,1967	3	0.7479190826416016
model_size,153978	2167	640
latency,153978	6	0.6520748138427734
latency,153978	6	0.9329319000244141
latency,153978	7	1.1489391326904297
latency,153978	9	0.9751319885253906
latency,153978	6	0.767

model_size,88023	1892	672
latency,88023	11	0.5190372467041016
latency,88023	6	0.7469654083251953
latency,88023	22	0.8952617645263672
latency,88023	17	0.9791851043701172
latency,88023	6	0.6761550903320312
model_size,10120	1280	440
latency,10120	5	0.7228851318359375
latency,10120	7	0.7488727569580078
latency,10120	4	0.701904296875
latency,10120	8	0.7488727569580078
latency,10120	8	0.6191730499267578
model_size,7647	2198	1626
latency,7647	3	0.5519390106201172
latency,7647	4	1.188039779663086
latency,7647	3	1.1188983917236328
latency,7647	4	1.1398792266845703
latency,7647	3	0.9610652923583984
model_size,28605	2082	1080
latency,28605	4	0.6518363952636719
latency,28605	6	1.207113265991211
latency,28605	5	1.0678768157958984
latency,28605	4	0.8599758148193359
latency,28605	4	0.7779598236083984
model_size,75698	2496	1412
latency,75698	12	0.6320476531982422
latency,75698	9	0.9310245513916016
latency,75698	12	1.0995864868164062
latency,75698	5	1.2011528015136719
latency,75698	10	3.426074981689453

model_size,89639	2105	574
latency,89639	3	0.6480216979980469
latency,89639	4	0.7770061492919922
latency,89639	3	0.8871555328369141
latency,89639	5	0.9660720825195312
latency,89639	5	0.7102489471435547
model_size,91053	2034	88
latency,91053	6	0.9951591491699219
latency,91053	8	1.0609626770019531
latency,91053	7	1.008749008178711
latency,91053	5	0.7870197296142578
latency,91053	8	0.61798095703125
model_size,200683	2532	6164
latency,200683	4	0.9241104125976562
latency,200683	3	1.0509490966796875
latency,200683	9	1.528024673461914
latency,200683	3	0.9388923645019531
latency,200683	3	0.9641647338867188
model_size,52904	2277	2864
latency,52904	3	0.6940364837646484
latency,52904	6	0.9579658508300781
latency,52904	10	0.7700920104980469
latency,52904	4	0.7140636444091797
latency,52904	13	0.946044921875
model_size,180291	414	104
latency,180291	3	0.8549690246582031
latency,180291	4	0.7407665252685547
latency,180291	4	0.7550716400146484
latency,180291	3	0.7071495056152344
latency,180291	4	0.737905

model_size,71482	2118	3434
latency,71482	12	0.6442070007324219
latency,71482	20	0.8678436279296875
latency,71482	9	0.8950233459472656
latency,71482	3	0.6709098815917969
latency,71482	5	0.7128715515136719
model_size,161416	1833	4124
latency,161416	30	0.8108615875244141
latency,161416	3	1.0609626770019531
latency,161416	19	0.7340908050537109
latency,161416	17	0.7879734039306641
latency,161416	7	0.6589889526367188
model_size,134560	1905	132
latency,134560	4	0.6031990051269531
latency,134560	5	0.9469985961914062
latency,134560	4	0.8120536804199219
latency,134560	5	1.271963119506836
latency,134560	5	1.0268688201904297
model_size,78711	1778	198
latency,78711	3	0.9028911590576172
latency,78711	7	0.7529258728027344
latency,78711	3	0.6432533264160156
latency,78711	4	0.6000995635986328
latency,78711	4	0.640869140625
model_size,82817	2087	98
latency,82817	5	0.7319450378417969
latency,82817	8	1.2540817260742188
latency,82817	13	1.1720657348632812
latency,82817	8	0.7390975952148438
latency,82817	5	

model_size,156685	2349	910
latency,156685	5	0.5230903625488281
latency,156685	4	0.9295940399169922
latency,156685	5	0.9582042694091797
latency,156685	6	0.8440017700195312
latency,156685	5	0.6139278411865234
model_size,21171	1780	2502
latency,21171	5	0.9129047393798828
latency,21171	4	1.0781288146972656
latency,21171	5	0.8509159088134766
latency,21171	3	0.9207725524902344
latency,21171	4	0.7817745208740234
model_size,70673	1962	62
model_size,150722	2644	6870
latency,150722	15	0.8680820465087891
latency,150722	25	1.001119613647461
latency,150722	3	0.885009765625
latency,150722	21	0.7739067077636719
latency,150722	15	0.7390975952148438
model_size,43957	1795	828
latency,43957	16	0.858306884765625
latency,43957	19	0.8728504180908203
latency,43957	3	0.7891654968261719
latency,43957	7	0.8709430694580078
latency,43957	16	0.9417533874511719
model_size,63740	2026	1100
latency,63740	28	0.9949207305908203
latency,63740	28	1.1751651763916016
latency,63740	20	0.8761882781982422
latency,63740	15	0.75

model_size,146360	410	56
latency,146360	4	0.9059906005859375
latency,146360	3	1.0459423065185547
latency,146360	3	0.9322166442871094
latency,146360	5	0.7288455963134766
latency,146360	4	0.7102489471435547
model_size,103913	1695	1810
latency,103913	3	0.7948875427246094
latency,103913	4	0.8640289306640625
latency,103913	4	0.6968975067138672
latency,103913	5	0.5970001220703125
latency,103913	4	0.5869865417480469
model_size,161341	1940	398
latency,161341	6	0.8029937744140625
latency,161341	5	0.9467601776123047
latency,161341	5	0.8509159088134766
latency,161341	5	0.7600784301757812
latency,161341	4	0.8039474487304688
model_size,126480	1910	1628
latency,126480	3	0.9338855743408203
latency,126480	7	0.7729530334472656
latency,126480	12	0.7970333099365234
latency,126480	7	0.8320808410644531
latency,126480	7	0.72479248046875
model_size,153249	2425	6192
latency,153249	6	0.8718967437744141
latency,153249	5	0.8440017700195312
latency,153249	5	0.7941722869873047
latency,153249	4	0.8018016815185547
l

## p90 response time in ms

In [14]:
np.percentile(durations, 90) * 1000

1.0996341705322266

## median response time in ms

In [15]:
np.median(durations) * 1000

0.8101463317871094