In [1]:
import pickle
import random
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from model import Actor, Critic, DRRAveStateRepresentation, PMF
from utils.history_buffer import HistoryBuffer

In [2]:
data_df = pd.read_csv('dataset/eComm-sample-data2.csv')
event_type_to_num = {'view': 1, 'cart': 2, 'purchase': 3}
data_df['behavior'] = data_df['event_type'].apply(lambda x : event_type_to_num[x])
data = data_df.loc[:, ['user_id_num', 'product_id_num', 'behavior', 'event_time']].values

In [3]:
state_rep_net = DRRAveStateRepresentation(n_items=5, item_features=100, user_features=100)
actor_net = Actor(in_features=300, out_features=100)

In [4]:
state_rep_net.load_state_dict(torch.load('results/220623-075948/state_rep_net.weights'))

<All keys matched successfully>

In [5]:
actor_net.load_state_dict(torch.load('results/220623-075948/actor_net.weights'))

<All keys matched successfully>

In [6]:
NUM_USERS, NUM_ITEMS = 5309, 15184
embedding_feature_size = 100
path_to_trained_pmf = 'trained/eComm_ratio_0.800000_bs_256_e_25_wd_0.100000_lr_0.000100_trained_pmf.pt'
device = torch.device('cpu')

In [7]:
# Create and load PMF function for rewards and embeddings
reward_function = PMF(NUM_USERS, NUM_ITEMS, embedding_feature_size, is_sparse=False, no_cuda=True)
reward_function.load_state_dict(torch.load(path_to_trained_pmf))
 
# Freeze all the parameters in the network
for param in reward_function.parameters():
    param.requires_grad = False
print("Initialized PMF, imported weights, created reward_function")
 
# Extract embeddings
user_embeddings = reward_function.user_embeddings.weight.data
item_embeddings = reward_function.item_embeddings.weight.data
print("Extracted user and item embeddings from PMF")
print("User embeddings shape: ", user_embeddings.shape)
print("Item embeddings shape: ", item_embeddings.shape)

Initialized PMF, imported weights, created reward_function
Extracted user and item embeddings from PMF
User embeddings shape:  torch.Size([5309, 100])
Item embeddings shape:  torch.Size([15184, 100])


In [8]:
cuda = False

def seed_all(cuda, seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.empty_cache()
        torch.cuda.manual_seed(seed=seed)
    
seed_all(cuda, 0)

In [9]:
data_df = pd.read_csv('dataset/eComm-sample-data2.csv')
event_type_to_num = {'view': 1, 'cart': 2, 'purchase': 3}
data_df['behavior'] = data_df['event_type'].apply(lambda x : event_type_to_num[x])

items = dict(zip(data_df['product_id'], data_df['product_id_num']))
users = dict(zip(data_df['user_id'], data_df['user_id_num']))

NUM_USERS, NUM_ITEMS = len(users), len(items)
print(NUM_USERS, NUM_ITEMS)

data = data_df.loc[:, ['user_id_num', 'product_id_num', 'behavior', 'event_time']].values

np.random.shuffle(data)
train_data = torch.from_numpy(data[:int(0.8 * data.shape[0])])
test_data = torch.from_numpy(data[int(0.8 * data.shape[0]):])
print("Data imported, shuffled, and split into Train/Test, ratio=", 0.8)
print("Train data shape: ", train_data.shape)
print("Test data shape: ", test_data.shape)


5309 15184
Data imported, shuffled, and split into Train/Test, ratio= 0.8
Train data shape:  torch.Size([100427, 4])
Test data shape:  torch.Size([25107, 4])


In [10]:
device = torch.device('cpu')

In [11]:
test_data = test_data.to(device)

In [12]:
user_idxs = np.unique(test_data[:, 0].numpy())

In [13]:
np.random.shuffle(user_idxs)

In [14]:
candidate_item_idxs = np.arange(item_embeddings.shape[0])
candidate_item_idxs = torch.from_numpy(candidate_item_idxs).to(device).long()

In [15]:
def discretize_reward_(x):
    x = x.item()
    if x<=1:
        return torch.tensor(1.0, dtype=torch.float64)
    elif x<=2:
        return torch.tensor(2.0, dtype=torch.float64)
    else:
        return torch.tensor(3.0, dtype=torch.float64)

In [16]:
def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

In [17]:
eps_eval = 0.1
history_buffer_size = 5
history_buffer = HistoryBuffer(5)

In [18]:
for T in [5, 10, 15, 20, 25]:
    mean_ndcg, mean_precision, steps = 0, 0, 0 

    for index, e in enumerate(user_idxs):
        
        # extract user reviews and positive user reviews for user id
        user_reviews = test_data[test_data[:, 0] == e]
        pos_user_reviews = user_reviews[user_reviews[:, 2] > 0]

        # history should be sufficient
        if pos_user_reviews.shape[0] < history_buffer_size:
            continue
            
        steps += 1    

        # sort user history by timestamp
        user_reviews = user_reviews[user_reviews[:, 3].argsort()]
        pos_user_reviews = pos_user_reviews[pos_user_reviews[:, 3].argsort()]

        # canditate items embedding for recommendation
        candidate_items = item_embeddings.detach().clone().to(device)

        # get user embedding
        user_emb = user_embeddings[e]

        # fill up history buffer
        for i in range(history_buffer_size):
            emb = candidate_items[pos_user_reviews[i, 1]]
            history_buffer.push(emb.detach().clone())

        # get action
        with torch.no_grad():
            # use state rep net weights to get state (input args is user embedding and history buffer)
            state = state_rep_net(user_emb, torch.stack(history_buffer.to_list()))
            
            if np.random.uniform(0, 1) < eps_eval:
                action = torch.from_numpy(0.1 * np.random.rand(100)).float().to(device)
            else:
                action = actor_net(state.detach())

        # matmul for ranking scores
        ranking_scores = candidate_items @ action

        # get top T item indices
        values, indices = torch.topk(ranking_scores, T)
        
        # calculate reward for predictions
        rewards = [reward_function(torch.tensor(e).to(device) ,rec_item_index) for rec_item_index in indices]
        rewards = [discretize_reward_(reward).item() for reward in rewards]

        correct_list = [1 if r ==3.0 else 0 for r in rewards]
        dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(rewards))])
        mean_ndcg += dcg/idcg
        correct_num = T-correct_list.count(0)
        mean_precision += correct_num/T
        

    print(f"T {T} mean precision {(mean_precision/steps):.2f}, mean_ndcg {(mean_ndcg/steps):.2f}, total steps/users {steps}")



T 5 mean precision 0.40, mean_ndcg 0.39, total steps/users 1791
T 10 mean precision 0.39, mean_ndcg 0.39, total steps/users 1791
T 15 mean precision 0.38, mean_ndcg 0.38, total steps/users 1791
T 20 mean precision 0.37, mean_ndcg 0.38, total steps/users 1791
T 25 mean precision 0.37, mean_ndcg 0.37, total steps/users 1791
