In [1]:
import pickle
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from model import Actor, Critic, DRRAveStateRepresentation, PMF
from utils.history_buffer import HistoryBuffer

In [2]:
data_df = pd.read_csv('dataset/eComm-sample-data2.csv')
event_type_to_num = {'view': 1, 'cart': 2, 'purchase': 3}
data_df['behavior'] = data_df['event_type'].apply(lambda x : event_type_to_num[x])
data = data_df.loc[:, ['user_id_num', 'product_id_num', 'behavior', 'event_time']].values

In [3]:
state_rep_net = DRRAveStateRepresentation(n_items=5, item_features=100, user_features=100)
actor_net = Actor(in_features=300, out_features=100)

In [4]:
state_rep_net.load_state_dict(torch.load('results/220621-083226/state_rep_net.weights'))

<All keys matched successfully>

In [5]:
actor_net.load_state_dict(torch.load('results/220621-083226/actor_net.weights'))

<All keys matched successfully>

In [6]:
NUM_USERS, NUM_ITEMS = 5309, 15184
embedding_feature_size = 100
path_to_trained_pmf = 'trained/eComm_ratio_0.800000_bs_256_e_25_wd_0.100000_lr_0.000100_trained_pmf.pt'
device = torch.device('cpu')

In [7]:
# Create and load PMF function for rewards and embeddings
reward_function = PMF(NUM_USERS, NUM_ITEMS, embedding_feature_size, is_sparse=False, no_cuda=True)
reward_function.load_state_dict(torch.load(path_to_trained_pmf))
 
# Freeze all the parameters in the network
for param in reward_function.parameters():
    param.requires_grad = False
print("Initialized PMF, imported weights, created reward_function")
 
# Extract embeddings
user_embeddings = reward_function.user_embeddings.weight.data
item_embeddings = reward_function.item_embeddings.weight.data
print("Extracted user and item embeddings from PMF")
print("User embeddings shape: ", user_embeddings.shape)
print("Item embeddings shape: ", item_embeddings.shape)

Initialized PMF, imported weights, created reward_function
Extracted user and item embeddings from PMF
User embeddings shape:  torch.Size([5309, 100])
Item embeddings shape:  torch.Size([15184, 100])


In [8]:
candidate_item_idxs = np.arange(item_embeddings.shape[0])
candidate_item_idxs = torch.from_numpy(candidate_item_idxs).to(device).long()

In [9]:
data

array([[               3796,                 613,                   3,
        1569888134000000000],
       [               1811,                 817,                   2,
        1569896511000000000],
       [               1811,                 817,                   2,
        1569896520000000000],
       ...,
       [                638,                 513,                   1,
        1571330757000000000],
       [               1194,               13707,                   1,
        1571547129000000000],
       [               3827,                 168,                   1,
        1570462592000000000]])

In [10]:
e = 121
history_buffer = HistoryBuffer(5)
user_reviews = data[data[:, 0] == e]
pos_user_reviews = user_reviews[user_reviews[:, 2] > 0]  

In [11]:
user_reviews = user_reviews[user_reviews[:, 3].argsort()]
pos_user_reviews = pos_user_reviews[pos_user_reviews[:, 3].argsort()]

candidate_items = item_embeddings.detach().clone().to(device)
user_candidate_items = item_embeddings[user_reviews[:, 1]].detach().clone().to(device)

In [12]:
for i in range(5):
    emb = candidate_items[pos_user_reviews[i, 1]]
    history_buffer.push(emb.detach().clone())

user_embed = user_embeddings[e]
state = state_rep_net(user_embed, torch.stack(history_buffer.to_list()))

action = actor_net(state.detach())
ranking_scores = candidate_items @ action

values, indices = torch.topk(ranking_scores, 10)



In [13]:
values

tensor([3.2030, 2.8095, 2.7721, 2.7265, 2.7142, 2.6937, 2.6326, 2.6016, 2.5881,
        2.5559], grad_fn=<TopkBackward0>)

In [14]:
indices

tensor([626, 592, 797,  87, 842,  98, 481, 545, 613, 676])

In [15]:
for i in indices:
    rec_item_emb = candidate_items[i]
    reward = reward_function(torch.tensor(e).to(device), i)
    print(reward)

tensor(1.5852)
tensor(1.7352)
tensor(1.6997)
tensor(1.6991)
tensor(1.5558)
tensor(1.6422)
tensor(1.5555)
tensor(1.7030)
tensor(1.7252)
tensor(1.7119)


torch.Size([5, 100])