In [1]:
import pickle
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from model import Actor, Critic, DRRAveStateRepresentation, PMF
from utils.history_buffer import HistoryBuffer

In [2]:
def seed_all(cuda, seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.empty_cache()
        torch.cuda.manual_seed(seed=seed)
    
def discretize_reward_(x):
    x = x.item()
    if x<=1:
        return torch.tensor(1.0, dtype=torch.float64)
    elif x<=2:
        return torch.tensor(2.0, dtype=torch.float64)
    else:
        return torch.tensor(3.0, dtype=torch.float64)
    
def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

def calc_cos_similarity(a, b, eps=1e-6):
    a = candidate_items[a]
    b = candidate_items[b]
    
    a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
    a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n))
    b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
    sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
    return sim_mt

In [3]:
STATE_REP_WEIGHT = 'results/220623-075948/state_rep_net.weights'
ACTION_NET_WEIGHT = 'results/220623-075948/actor_net.weights'
TRAINED_PMF_WEIGHT = 'trained/eComm_ratio_0.800000_bs_256_e_25_wd_0.100000_lr_0.000100_trained_pmf.pt'
NUM_USERS, NUM_ITEMS = 5309, 15184
embedding_feature_size = 100
history_buffer_size = 5
cuda = False
device = torch.device('cpu')

In [4]:
seed_all(cuda, 0)

In [5]:
state_rep_net = DRRAveStateRepresentation(n_items=history_buffer_size, item_features=embedding_feature_size, user_features=embedding_feature_size)
state_rep_net.load_state_dict(torch.load(STATE_REP_WEIGHT))

actor_net = Actor(in_features=3 * embedding_feature_size, out_features=embedding_feature_size)
actor_net.load_state_dict(torch.load(ACTION_NET_WEIGHT))

reward_function = PMF(NUM_USERS, NUM_ITEMS, embedding_feature_size, is_sparse=False, no_cuda=True)

reward_function.load_state_dict(torch.load(TRAINED_PMF_WEIGHT))
for param in reward_function.parameters():
    param.requires_grad = False
    
user_embeddings, item_embeddings = reward_function.user_embeddings.weight.data, reward_function.item_embeddings.weight.data

In [6]:
data_df = pd.read_csv('dataset/eComm-sample-data2.csv', usecols=['user_id', 'user_id_num', 'product_id', 'product_id_num', 'event_type', 'event_time', 'cat_1', 'cat_2'])
event_type_to_num = {'view': 1, 'cart': 2, 'purchase': 3}
data_df['behavior'] = data_df['event_type'].apply(lambda x : event_type_to_num[x])

items = dict(zip(data_df['product_id'], data_df['product_id_num']))
users = dict(zip(data_df['user_id'], data_df['user_id_num']))

NUM_USERS, NUM_ITEMS = len(users), len(items)
print(NUM_USERS, NUM_ITEMS)

data = data_df.loc[:, ['user_id_num', 'product_id_num', 'behavior', 'event_time']].values

np.random.shuffle(data)
train_data = torch.from_numpy(data[:int(0.8 * data.shape[0])])
test_data = torch.from_numpy(data[int(0.8 * data.shape[0]):])
print("Data imported, shuffled, and split into Train/Test, ratio=", 0.8)
print("Train data shape: ", train_data.shape)
print("Test data shape: ", test_data.shape)

5309 15184
Data imported, shuffled, and split into Train/Test, ratio= 0.8
Train data shape:  torch.Size([100427, 4])
Test data shape:  torch.Size([25107, 4])


In [7]:
user_idxs = np.unique(test_data[:, 0].numpy())
np.random.shuffle(user_idxs)

In [255]:
history_buffer = HistoryBuffer(history_buffer_size)
eps_eval = 0.1
T = 5

steps = 0
rec = set()
for index, e in enumerate(np.unique([r[0] for r in rew])):
    # extract user reviews and positive user reviews for user id
    user_reviews = test_data[test_data[:, 0] == e]
    pos_user_reviews = user_reviews[user_reviews[:, 2] > 0]

    # history should be sufficient
    if pos_user_reviews.shape[0] < history_buffer_size:
        continue
        
    # print(pos_user_reviews)

    steps += 1   
    
    # sort user history by timestamp
    user_reviews = user_reviews[user_reviews[:, 3].sort(descending=True)[1]]
    pos_user_reviews = pos_user_reviews[pos_user_reviews[:, 3].sort(descending=True)[1]]

    # canditate items embedding for recommendation
    candidate_items = item_embeddings.detach().clone().to(device)

    # get user embedding
    user_emb = user_embeddings[e]

    history_buffer.clear()

    # fill up history buffer
    for i in range(history_buffer_size):
        emb = candidate_items[pos_user_reviews[i, 1]]
        history_buffer.push(emb.detach().clone())

    # get action
    with torch.no_grad():
        # use state rep net weights to get state (input args is user embedding and history buffer)
        state = state_rep_net(user_emb, torch.stack(history_buffer.to_list()))

        if np.random.uniform(0, 1) < eps_eval:
            action = torch.from_numpy(0.1 * np.random.rand(100)).float().to(device)
        else:
            action = actor_net(state.detach())


    # matmul for ranking scores
    ranking_scores = candidate_items @ action

    # get top T item indices
    _, indices = torch.topk(ranking_scores, T)
    rec.update(indices.numpy())
    
    rewards = [reward_function(torch.tensor(e).to(device) ,rec_item_index) for rec_item_index in indices]
    rewards1 = [discretize_reward_(reward).item() for reward in rewards]
    
    print(e, indices, rewards)



37 tensor([545, 626, 528, 334, 535]) [tensor(3.1862), tensor(3.1389), tensor(3.2359), tensor(3.2405), tensor(3.0577)]
38 tensor([545, 626, 528, 334, 535]) [tensor(2.8958), tensor(2.8319), tensor(2.9466), tensor(2.9437), tensor(2.7609)]
132 tensor([545, 626, 528, 334, 535]) [tensor(2.7012), tensor(2.6269), tensor(2.7647), tensor(2.7617), tensor(2.5690)]
139 tensor([545, 626, 528, 334, 535]) [tensor(2.9286), tensor(2.8612), tensor(2.9875), tensor(2.9942), tensor(2.8043)]
141 tensor([545, 626, 528, 334, 535]) [tensor(2.5496), tensor(2.4637), tensor(2.6108), tensor(2.6000), tensor(2.4204)]
176 tensor([545, 626, 528, 334, 535]) [tensor(2.4726), tensor(2.3803), tensor(2.5323), tensor(2.5174), tensor(2.3438)]
256 tensor([545, 626, 528, 334, 675]) [tensor(2.6528), tensor(2.6036), tensor(2.7079), tensor(2.7253), tensor(2.4620)]
257 tensor([545, 626, 528, 334, 535]) [tensor(2.9151), tensor(2.8429), tensor(2.9702), tensor(2.9740), tensor(2.7772)]
258 tensor([545, 626, 528, 334, 535]) [tensor(2.66

In [256]:
len(rec)

13

In [257]:
rec

{18, 98, 334, 528, 535, 545, 592, 613, 623, 626, 675, 797, 842}

In [64]:
steps

1777

In [67]:
data_df[data_df.product_id_num.isin(rec)].cat_2.value_counts()

smartphone    17263
Name: cat_2, dtype: int64

In [70]:
data_df.cat_1.value_counts()

electronics     82871
appliances      16940
computers       15720
apparel          3014
construction     2146
auto             1691
accessories       948
kids              884
furniture         806
sport             431
medicine           49
country_yard       17
stationery         17
Name: cat_1, dtype: int64

In [18]:
data_df[(data_df.user_id_num.isin(user_idxs)) & (data_df.cat_1=='furniture')].user_id_num.unique()

array([1579, 2144, 3952, 2410, 2204, 4842,  543, 4893,  677,  393, 1642,
       1551, 1639, 1572,  460, 2836, 1677, 1379, 3124, 2982,  316, 3399,
        817, 2037, 2210, 5021, 3576, 4794, 2232, 3659, 3762, 4611, 1445,
       1689, 1275, 5186,  596, 1659, 2544, 5251,  152, 1940, 1727, 4055,
       3087, 1789, 3983, 3684, 4599, 3109,  322, 5055, 1646, 3728, 3524,
         41, 2562, 4829, 1498,  446, 3920, 4082, 3162,  650, 1964, 3932,
       2251, 3113, 2171, 1635, 3591, 5169, 4410,  382,   82, 1007, 1157,
       3108, 1430, 3389, 1922, 1850, 3955,  483,  587, 3449, 1530,  750,
        730, 3117, 2406, 1338, 1191, 1978,  844, 1751, 1798,  552, 2992,
       4518, 1238,  657, 2978, 1467, 4641, 1316,  754,  835,  566, 3103,
       1353, 3785, 3778, 3505, 3863, 1698,  149, 1028,  862,   81, 1648,
       3605, 1005, 1182,  530,  102, 2573, 2059, 1526,  857, 2274, 3387,
       3290, 1138, 2517, 1156, 3396,  579, 2706, 4104, 4311, 5141, 1669,
       1297, 4342, 4777, 1454, 2946, 1695, 3709,  8

In [14]:
calc_cos_similarity([437, 5551], [545])

tensor([[0.9305],
        [0.9254]])

In [33]:
data_df.cat_1.value_counts(normalize=True)

electronics     0.660148
appliances      0.134944
computers       0.125225
apparel         0.024009
construction    0.017095
auto            0.013470
accessories     0.007552
kids            0.007042
furniture       0.006421
sport           0.003433
medicine        0.000390
country_yard    0.000135
stationery      0.000135
Name: cat_1, dtype: float64

In [42]:
data_df['non_popular_products'] = data_df.cat_1.apply(lambda x : 1 if x not in ['electronics', 'appliances', 'computers'] else 0)

In [43]:
data_df.head(2)

Unnamed: 0,event_time,event_type,product_id,user_id,cat_1,cat_2,user_id_num,product_id_num,behavior,non_popular_products
0,1569888134000000000,purchase,1004856,543272936,electronics,smartphone,3796,613,3,0
1,1569896511000000000,cart,1005135,515384420,electronics,smartphone,1811,817,2,0


In [48]:
temp = data_df.groupby(by=['user_id_num']).agg({'non_popular_products': 'mean'})

In [52]:
temp.shape

(5309, 1)

In [56]:
temp[temp['non_popular_products']>0.8].index

Int64Index([   4,   26,  103,  221,  250,  259,  263,  301,  313,  318,
            ...
            5205, 5230, 5234, 5239, 5251, 5257, 5274, 5276, 5288, 5296],
           dtype='int64', name='user_id_num', length=251)

In [61]:
r = candidate_items @ user_embeddings[4]

In [64]:
_, rec_items = r.topk(20)

In [65]:
rec_items

tensor([626, 545, 797, 334, 842, 623, 613, 592, 481,  98, 675,  18, 595, 535,
         87,  28, 530, 676, 528,  97])

In [76]:
data_df[data_df.product_id_num.isin(rec_items.numpy())].cat_1.value_counts()

electronics    21274
Name: cat_1, dtype: int64

In [103]:
data_df.groupby('cat_1')['product_id_num'].nunique()

cat_1
accessories      535
apparel         1605
appliances      3663
auto             390
computers       2878
construction     830
country_yard      14
electronics     4166
furniture        432
kids             431
medicine          10
sport            217
stationery        13
Name: product_id_num, dtype: int64

In [182]:
other_product_ids = data_df[data_df.non_popular_products==1].product_id_num.unique()

In [183]:
other_product_ids

array([13032,  6805, 12706, ...,  9805, 14729, 13707])

In [210]:
user_idx = data_df.user_id_num.unique()

In [211]:
user_idx

array([3796, 1811, 3804, ..., 2578, 4171,  187])

In [212]:
user_h1 = user_embeddings[user_idx]

In [213]:
item_h1 = candidate_items[other_product_ids]

In [214]:
R_h = (user_h1 @ item_h1.T) + reward_function.ub(torch.tensor(user_idx)) + reward_function.ib(torch.tensor(other_product_ids)).T

In [216]:
R_h.max()

tensor(2.9125)

In [217]:
R_h.min()

tensor(-0.0597)

In [266]:
user_product_pairs = []
rew = []

In [268]:
for i, uid in enumerate(user_idx):
    for j, iid in enumerate(other_product_ids):
        if R_h[i][j] > 2.0:
            user_product_pairs.append([uid, iid])
            rew.append(R_h[i][j].item)

In [269]:
user_product_pairs = np.array(user_product_pairs)

In [270]:
len(np.unique(user_product_pairs[:, 0]))

230

In [275]:
R_h1 = (user_embeddings[np.unique(user_product_pairs[:, 0])] @ item_embeddings.T) + reward_function.ub(torch.tensor(np.unique(user_product_pairs[:, 0]))) + reward_function.ib.weight.T

In [276]:
R_h1.shape

torch.Size([230, 15184])

In [280]:
_, indices = R_h1.topk(10)

In [281]:
indices.shape

torch.Size([230, 10])

In [299]:
user_profile = data_df.groupby(by=['user_id_num'])['behavior'].sum().reset_index(name='behavior_sum')

In [300]:
x = data_df[data_df.non_popular_products==1].groupby(by=['user_id_num'])['behavior'].sum().reset_index(name='other_sum')

In [306]:
user_profile = pd.merge(user_profile, x, on='user_id_num', how='left')
user_profile.fillna(0, inplace=True)
user_profile

Unnamed: 0,user_id_num,behavior_sum,other_sum
0,0,11,0.0
1,1,25,0.0
2,2,9,0.0
3,3,9,0.0
4,4,19,18.0
...,...,...,...
5304,5304,10,0.0
5305,5305,13,0.0
5306,5306,133,5.0
5307,5307,5,0.0


In [307]:
user_profile['behavior_ratio'] = user_profile['other_sum']/user_profile['behavior_sum']

In [315]:
sample_users1 = user_profile.sort_values(by=['behavior_ratio'], ascending=False)[:50].user_id_num.to_list()

In [321]:
R_h2 = (user_embeddings[sample_users1] @ item_embeddings.T) + reward_function.ub(torch.tensor(sample_users1)) + reward_function.ib.weight.T

In [322]:
R_h2.shape

torch.Size([50, 15184])

In [329]:
v, indices2 = R_h2.topk(10)

In [337]:
uids = np.unique(user_product_pairs[:, 0])

In [342]:
R_h3 = (user_embeddings[sample_users1] @ item_embeddings[other_product_ids].T) + \
        reward_function.ub(torch.tensor(sample_users1)) + \
        reward_function.ib(torch.tensor(other_product_ids)).T

In [343]:
R_h3.shape

torch.Size([50, 4477])

In [344]:
val, indices3 = R_h3.topk(10)

In [336]:
for i in range(50):
    print(data_df[data_df.product_id_num.isin(indices2[i].numpy())].cat_2.unique())

['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphone' 'audio']
['smartphon