## Toy Function for inference pipelines

In [1]:
import os
import gc
import heapq
import pickle
import numba as nb
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

In [None]:
## steps to make inferences
## 1. read in a test set row, look at its aids and ops
## 2. give time weight and seq weight for each aid it has interaction with
## 3. based on the aids, search the similar items of aids in the fullSimMatrix.
## 4. get all the sim scores and combine with the seq weights and time weight found in step #2
## 5. Find the top 20 items of each user in step 4, use heap_topk to reduce memory overflow.

In [2]:
fullSimMatrix = nb.typed.Dict.empty(
        key_type = nb.types.int64,
        value_type = nb.typeof(nb.typed.Dict.empty(key_type = nb.types.int64, value_type = nb.types.float64)))

inner_dict_1 = nb.typed.Dict.empty(key_type = nb.types.int64, value_type = nb.types.float64)
inner_dict_1[2] = 100.0
inner_dict_1[4] = 120.0
inner_dict_1[6] = 80.0
inner_dict_1[8] = 90.0

inner_dict_3 = nb.typed.Dict.empty(key_type = nb.types.int64, value_type = nb.types.float64)
inner_dict_3[2] = 102.0
inner_dict_3[4] = 800.0
inner_dict_3[6] = 400.0
inner_dict_3[8] = 600.0

itemid_set1 = [1, 3]  # 5, 7, 9, 11]
dict_set = [inner_dict_1, inner_dict_3]

for idx, item1 in enumerate(itemid_set1):
    fullSimMatrix[item1] = dict_set[idx] 


In [13]:
fullSimMatrix

DictType[int64,DictType[int64,float64]<iv=None>]<iv=None>({1: {2: 100.0, 4: 120.0, 6: 80.0, 8: 90.0}, 3: {2: 102.0, 4: 800.0, 6: 400.0, 8: 600.0}})

In [7]:
## create test set data
aids = [1, 2, 2, 1]
ops = [0, 0, 1, 0]
length = 4

In [18]:
@nb.jit(nopython = True)
def heap_topk(item_cnt_dict, cap):
    """
    get the top cap(k) elements of the cnt dict based on value, using a min-heap structure
    """
    q = [(np.float64(0), np.int64(0)) for _ in range(0)]  ## generate empty queue to implement a heap, 
    for item_ref, sim_score in item_cnt_dict.items():   ## read in the dict in heap structure
        heapq.heappush(q, (sim_score, item_ref))   ## push the <sim_score, item_ref_id> pair into min-heap, using sim_score for order
        if len(q) > cap:
            heapq.heappop(q)
            
    res = [heapq.heappop(q)[1] for _ in range(len(q))][::-1]
    
    return res

In [27]:
@nb.jit(nopython=True, parallel=True)
def inference_single_session(candidate_aids, ops, result, full_sim_matrix, test_ops_weights):
    ## 1. read in a test set row, look at its aids and ops
    ## 2. give time weight and seq weight for each aid it has interaction with, 
    ## in other words, the value of unique_aids_weights shows how important the action is,

    ## record all potential aid that might be relevant
    potential_to_recommend = nb.typed.Dict.empty(key_type=nb.types.int64, value_type=nb.types.float64)
    print("hit here")
    for idx, candidate in enumerate(candidate_aids):
        action_realtime_weight = test_ops_weights[ops[idx]]  ## replace by some helper function that define the importance of an action, given by ts, ops, seq etc
        
        ## load the potential items to recommend, ## 3. based on the aids, search the similar items of aids in the fullSimMatrix.
        for similar_item in full_sim_matrix[candidate]:
            if similar_item not in potential_to_recommend:
                potential_to_recommend[similar_item] = 0
            ## step 4
            potential_to_recommend[similar_item] += full_sim_matrix[candidate][similar_item] * action_realtime_weight
        
    ## step 5, get the top 20 of potential_to_recommend, and store it as a list and save it in result. 
    result[0] = heap_topk(potential_to_recommend, 2)  ## TODO: replace with result[session_id] = ...


In [28]:
result_inf = nb.typed.Dict.empty(
    key_type = nb.types.int64,
    value_type = nb.types.int64[:])

inference_single_session(aids, ops, result_inf, fullSimMatrix, np.array([1.0, 6.0, 3.0]))

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1mNo implementation of function Function(<built-in function setitem>) found for signature:
 
 >>> setitem(DictType[int64,array(int64, 1d, A)]<iv=None>, Literal[int](0), list(int64)<iv=None>)
 
There are 16 candidate implementations:
[1m      - Of which 14 did not match due to:
      Overload of function 'setitem': File: <numerous>: Line N/A.
        With argument(s): '(DictType[int64,array(int64, 1d, A)]<iv=None>, int64, list(int64)<iv=None>)':[0m
[1m       No match.[0m
[1m      - Of which 2 did not match due to:
      Overload in function 'impl_setitem': File: numba/typed/dictobject.py: Line 695.
        With argument(s): '(DictType[int64,array(int64, 1d, A)]<iv=None>, int64, list(int64)<iv=None>)':[0m
[1m       Rejected as the implementation raised a specific error:
         NumbaNotImplementedError: Failed in nopython mode pipeline (step: native lowering)
       [1m[1mCannot cast list(int64)<iv=None> to array(int64, 1d, A): %"inserted.parent" = insertvalue {i8*, i8*} %"inserted.meminfo.1", i8* %"arg.value.1", 1[0m
       [0m[1mDuring: lowering "castedval = call $12load_global.4(value, $16load_deref.6, func=$12load_global.4, args=[Var(value, dictobject.py:704), Var($16load_deref.6, dictobject.py:705)], kws=(), vararg=None, varkwarg=None, target=None)" at /Users/itong1900/opt/anaconda3/lib/python3.8/site-packages/numba/typed/dictobject.py (705)[0m[0m
  raised from /Users/itong1900/opt/anaconda3/lib/python3.8/site-packages/numba/core/base.py:704
[0m
[0m[1mDuring: typing of staticsetitem at <ipython-input-27-e4895f48f1c3> (22)[0m
[1m
File "<ipython-input-27-e4895f48f1c3>", line 22:[0m
[1mdef inference_single_session(candidate_aids, ops, result, full_sim_matrix, test_ops_weights):
    <source elided>
    ## step 5, get the top 20 of potential_to_recommend, and store it as a list and save it in result. 
[1m    result[0] = heap_topk(potential_to_recommend, 2)  ## TODO: replace with result[session_id] = ...
[0m    [1m^[0m[0m


In [16]:
np.array([1.0, 6.0, 3.0])

array([1., 6., 3.])