In [6]:
from model import Actor, Critic, DRRAveStateRepresentation, PMF
from learn import DRRTrainer
from utils.general import csv_plot
import torch
import pickle
import numpy as np
import pandas as pd
import random
import os
import datetime

import matplotlib.pyplot as plt
 
%matplotlib inline

In [7]:
 class config():
    output_path = 'results/' + datetime.datetime.now().strftime('%y%m%d-%H%M%S') + '/'
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    plot_dir = output_path + 'rewards.pdf'
 
    train_actor_loss_data_dir = output_path + 'train_actor_loss_data.npy'
    train_critic_loss_data_dir = output_path + 'train_critic_loss_data.npy'
    train_mean_reward_data_dir = output_path + 'train_mean_reward_data.npy'
 
    train_actor_loss_plot_dir = output_path + 'train_actor_loss.png'
    train_critic_loss_plot_dir = output_path + 'train_critic_loss.png'
    train_mean_reward_plot_dir = output_path + 'train_mean_reward.png'
 
    trained_models_dir = 'trained/'
 
    actor_model_trained = trained_models_dir + 'actor_net.weights'
    critic_model_trained = trained_models_dir + 'critic_net.weights'
    state_rep_model_trained = trained_models_dir + 'state_rep_net.weights'
 
    actor_model_dir = output_path + 'actor_net.weights'
    critic_model_dir = output_path + 'critic_net.weights'
    state_rep_model_dir = output_path + 'state_rep_net.weights'
 
    csv_dir = output_path + 'log.csv'
 
    path_to_trained_pmf = trained_models_dir + 'eComm_ratio_0.800000_bs_256_e_25_wd_0.100000_lr_0.000100_trained_pmf.pt'
 
    # hyperparams
    batch_size = 128
    gamma = 0.9
    replay_buffer_size = 100000
    history_buffer_size = 5
    learning_start = 1000 #500
    learning_freq = 1
    lr_state_rep = 0.001
    lr_actor = 0.0001
    lr_critic = 0.001
    eps_start = 1
    eps = 0.1
    eps_steps = 10000
    eps_eval = 0.1
    tau = 0.01 # inital 0.001
    beta = 0.4
    prob_alpha = 0.3
    max_timesteps_train = 53090
    max_epochs_offline = 500
    max_timesteps_online = 1000
    embedding_feature_size = 100
    episode_length = 10
    train_ratio = 0.8
    weight_decay = 0.01
    clip_val = 1.0
    log_freq = 1000
    saving_freq = 1000
    zero_reward = False
 
    no_cuda = True
    
    logs_dir = 'runs/training2'

def seed_all(cuda, seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.empty_cache()
        torch.cuda.manual_seed(seed=seed)

In [8]:
print("Initializing DRR Framework ----------------------------------------------------------------------------")
 
# Get CUDA device if available
cuda = True if not config.no_cuda and torch.cuda.is_available() else False
print("Using CUDA") if cuda else print("Using CPU")
 
 
# Init seeds
seed_all(cuda, 0)
print("Seeds initialized")
 
# Grab models
actor_function = Actor
critic_function = Critic
state_rep_function = DRRAveStateRepresentation

Initializing DRR Framework ----------------------------------------------------------------------------
Using CPU
Seeds initialized


In [9]:
 
# Import Data

data_df = pd.read_csv('dataset/eComm-sample-data2.csv')

In [10]:
event_type_to_num = {'view': 1, 'cart': 2, 'purchase': 3}
data_df['behavior'] = data_df['event_type'].apply(lambda x : event_type_to_num[x])

items = dict(zip(data_df['product_id'], data_df['product_id_num']))
users = dict(zip(data_df['user_id'], data_df['user_id_num']))

NUM_USERS, NUM_ITEMS = len(users), len(items)
print(NUM_USERS, NUM_ITEMS)

data = data_df.loc[:, ['user_id_num', 'product_id_num', 'behavior', 'event_time']].values

5309 15184


In [11]:
np.unique(data[:, 2])

array([1, 2, 3])

In [12]:
np.random.shuffle(data)
train_data = torch.from_numpy(data[:int(config.train_ratio * data.shape[0])])
test_data = torch.from_numpy(data[int(config.train_ratio * data.shape[0]):])
print("Data imported, shuffled, and split into Train/Test, ratio=", config.train_ratio)
print("Train data shape: ", train_data.shape)
print("Test data shape: ", test_data.shape)
 

Data imported, shuffled, and split into Train/Test, ratio= 0.8
Train data shape:  torch.Size([100427, 4])
Test data shape:  torch.Size([25107, 4])


In [13]:
# Create and load PMF function for rewards and embeddings
reward_function = PMF(NUM_USERS, NUM_ITEMS, config.embedding_feature_size, is_sparse=False, no_cuda=~cuda)
reward_function.load_state_dict(torch.load(config.path_to_trained_pmf))
 
# Freeze all the parameters in the network
for param in reward_function.parameters():
    param.requires_grad = False
print("Initialized PMF, imported weights, created reward_function")
 
# Extract embeddings
user_embeddings = reward_function.user_embeddings.weight.data
item_embeddings = reward_function.item_embeddings.weight.data
print("Extracted user and item embeddings from PMF")
print("User embeddings shape: ", user_embeddings.shape)
print("Item embeddings shape: ", item_embeddings.shape)
 

Initialized PMF, imported weights, created reward_function
Extracted user and item embeddings from PMF
User embeddings shape:  torch.Size([5309, 100])
Item embeddings shape:  torch.Size([15184, 100])


In [14]:
# Init trainer
print("Initializing DRRTrainer -------------------------------------------------------------------------------")
trainer = DRRTrainer(config,
                      actor_function,
                      critic_function,
                      state_rep_function,
                      reward_function,
                      users,
                      items,
                      train_data,
                      test_data,
                      user_embeddings,
                      item_embeddings,
                      cuda
                      )

Initializing DRRTrainer -------------------------------------------------------------------------------
Current PyTorch Device:  cpu
Data dimensions extracted
Models initialized
Model weights initialized, copied to target
Optimizers initialized


In [15]:
config.learning_start

1000

In [16]:
#Train
print("Starting DRRTrainer.learn() ---------------------------------------------------------------------------")
actor_losses, critic_losses, epi_avg_rewards = trainer.learn()

Starting DRRTrainer.learn() ---------------------------------------------------------------------------


  ignored_items.append(torch.tensor(rec_item_idx).to(self.device))


Timestep 1000 | Episode 99 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 3.8388 | Actor Loss -17.1838 | Critic Loss 0.0847 | 
Timestep 2000 | Episode 199 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 5.6057 | Actor Loss -23.8594 | Critic Loss 0.1027 | 
Timestep 3000 | Episode 299 | Mean Ep R 3.0000 | Max R 3.0000 | Critic Params Norm 46.5860 | Actor Loss -27.7754 | Critic Loss 1.1238 | 
Timestep 4000 | Episode 399 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 17.5064 | Actor Loss -29.1093 | Critic Loss 0.2372 | 
Timestep 5000 | Episode 499 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 13.9061 | Actor Loss -26.7860 | Critic Loss 0.1545 | 
Timestep 6000 | Episode 599 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 1.0089 | Actor Loss -28.2015 | Critic Loss 0.0602 | 
Timestep 7000 | Episode 699 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 30.4927 | Actor Loss -25.6073 | Critic Loss 0.3476 | 
Timestep 8000 | Episode 799 | Mean Ep R 2.000

KeyboardInterrupt: 

In [11]:
config.trained_models_dir = "results/220621-192625/"
output_path = "results/220621-192625/"

train_actor_loss_data_dir = output_path + 'train_actor_loss_data.npy'
train_critic_loss_data_dir = output_path + 'train_critic_loss_data.npy'
train_mean_reward_data_dir = output_path + 'train_mean_reward_data.npy'

config.actor_model_trained = config.trained_models_dir + 'actor_net.weights'
config.critic_model_trained = config.trained_models_dir + 'critic_net.weights'
config.state_rep_model_trained = config.trained_models_dir + 'state_rep_net.weights'

In [12]:
import matplotlib.pyplot as plt
from tsmoothie.smoother import ConvolutionSmoother

In [13]:
def noiseless_plot(y, title, ylabel, save_loc):
    smoother = ConvolutionSmoother(window_len=1000, window_type='ones')
    smoother.smooth(y)

    # generate intervals
    low, up = smoother.get_intervals('sigma_interval', n_sigma=3)

    # plot the smoothed timeseries with intervals
    plt.close()
    plt.figure(figsize=(11,6))
    plt.xlabel("Epoch")
    plt.ylabel(ylabel)
    plt.title(title)
    plt.plot(smoother.data[0], color='orange')
    plt.plot(smoother.smooth_data[0], linewidth=3, color='blue')
    plt.fill_between(range(len(smoother.data[0])), low[0], up[0], alpha=0.3)
    plt.savefig(save_loc)
    plt.close()

In [14]:
actor_losses = np.load(train_actor_loss_data_dir)
critic_losses = np.load(train_critic_loss_data_dir)
epi_avg_rewards = np.load(train_mean_reward_data_dir)

noiseless_plot(actor_losses, 
               "Actor Loss (Train)", 
               "Actor Loss (Train)", 
               output_path + "train_actor_loss_smooth.png")
               
noiseless_plot(critic_losses, 
               "Critic Loss (Train)", 
               "Critic Loss (Train)", 
               output_path + "train_critic_loss_smooth.png")

noiseless_plot(epi_avg_rewards, 
               "Mean Reward (Train)", 
               "Mean Reward (Train)", 
               output_path + "train_mean_reward_smooth.png")