In [1]:
import sys
sys.path.insert(0, '../')

In [2]:
from copyreg import pickle
import os
import random
import pickle
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
import torch
from datetime import datetime
from model.model import Actor, Critic, DRRAveStateRepresentation, PMF
from learn.learn import DRRTrainer

import matplotlib.pyplot as plt
from tsmoothie.smoother import ConvolutionSmoother

In [3]:
class config:
    date_time = datetime.now().strftime('%y%m%d-%H%M%S')
    output_path = '../results/' + date_time + '/'

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    plot_dir = output_path + 'rewards.pdf'
 
    train_actor_loss_data_dir = output_path + 'train_actor_loss_data.npy'
    train_critic_loss_data_dir = output_path + 'train_critic_loss_data.npy'
    train_mean_reward_data_dir = output_path + 'train_mean_reward_data.npy'
 
    train_actor_loss_plot_dir = output_path + 'train_actor_loss.png'
    train_critic_loss_plot_dir = output_path + 'train_critic_loss.png'
    train_mean_reward_plot_dir = output_path + 'train_mean_reward.png'
 
    trained_models_dir = '../trained/' + date_time + '/'
 
    actor_model_trained = trained_models_dir + 'actor_net.weights'
    critic_model_trained = trained_models_dir + 'critic_net.weights'
    state_rep_model_trained = trained_models_dir + 'state_rep_net.weights'
 
    actor_model_dir = output_path + 'actor_net.weights'
    critic_model_dir = output_path + 'critic_net.weights'
    state_rep_model_dir = output_path + 'state_rep_net.weights'
 
    csv_dir = output_path + 'log.csv'
 
    path_to_trained_pmf = '../trained/trained_pmf.pt'
 
    # hyperparams
    batch_size = 128
    gamma = 0.9
    replay_buffer_size = 100000
    history_buffer_size = 5
    learning_start = 1000 #5000
    learning_freq = 1
    lr_state_rep = 0.001
    lr_actor = 0.0001
    lr_critic = 0.001
    eps_start = 1
    eps = 0.1
    eps_steps = 10000
    eps_eval = 0.1
    tau = 0.01 # inital 0.001
    beta = 0.4
    prob_alpha = 0.3
    max_timesteps_train = 15000
    max_epochs_offline = 500
    max_timesteps_online = 2000
    embedding_feature_size = 100
    episode_length = 10
    train_ratio = 0.8
    weight_decay = 0.01
    clip_val = 1.0
    log_freq = 500
    saving_freq = 100
    zero_reward = False
 
    no_cuda = True
    
    logs_dir = '../runs/' + date_time

In [4]:
def seed_all(cuda, seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.empty_cache()
        torch.cuda.manual_seed(seed=seed)

In [5]:
print("Initializing DRR Framework ----------------------------------------------------------------------------")
 
# Get CUDA device if available
cuda = True if not config.no_cuda and torch.cuda.is_available() else False
print("Using CUDA") if cuda else print("Using CPU")
 
# Init seeds
seed_all(cuda, 0)
print("Seeds initialized")
 

Initializing DRR Framework ----------------------------------------------------------------------------
Using CPU
Seeds initialized


In [6]:
# Grab models
actor_function = Actor
critic_function = Critic
state_rep_function = DRRAveStateRepresentation

In [7]:
CSV_PATH = '../dataset/sample_data.csv'

In [8]:
data_df = pd.read_csv(CSV_PATH)
reward_map = {'view': 1, 'cart': 2, 'purchase': 3}
data_df['behavior'] = data_df['event_type'].apply(lambda x : reward_map[x])

with open('../dataset/user_num_to_id.pkl', 'rb') as f:
    users = pickle.load(f)

with open('../dataset/item_num_to_id.pkl', 'rb') as f:
    items = pickle.load(f)

NUM_USERS, NUM_ITEMS = len(users), len(items)

data = data_df.loc[:, ['user_id_num', 'product_id_num', 'behavior', 'event_time']].values

In [9]:
shuffle(data, random_state=1)
train_data = torch.from_numpy(data[:int(config.train_ratio * data.shape[0])])
test_data = torch.from_numpy(data[int(config.train_ratio * data.shape[0]):])
print("Data imported, shuffled, and split into Train/Test, ratio=", config.train_ratio)
print("Train data shape: ", train_data.shape)
print("Test data shape: ", test_data.shape)
 

Data imported, shuffled, and split into Train/Test, ratio= 0.8
Train data shape:  torch.Size([101065, 4])
Test data shape:  torch.Size([25267, 4])


In [13]:
np.save('../dataset/test_data.npy', test_data)
np.save('../dataset/train_data.npy', train_data)

In [10]:
# Create and load PMF function for rewards and embeddings
reward_function = PMF(NUM_USERS, NUM_ITEMS, config.embedding_feature_size, is_sparse=False, no_cuda=~cuda)
reward_function.load_state_dict(torch.load(config.path_to_trained_pmf))
 
# Freeze all the parameters in the network
for param in reward_function.parameters():
    param.requires_grad = False
print("Initialized PMF, imported weights, created reward_function")
 
# Extract embeddings
user_embeddings = reward_function.user_embeddings.weight.data
item_embeddings = reward_function.item_embeddings.weight.data
print("Extracted user and item embeddings from PMF")
print("User embeddings shape: ", user_embeddings.shape)
print("Item embeddings shape: ", item_embeddings.shape)
 

Initialized PMF, imported weights, created reward_function
Extracted user and item embeddings from PMF
User embeddings shape:  torch.Size([5380, 100])
Item embeddings shape:  torch.Size([15286, 100])


In [11]:
# Init trainer
print("Initializing DRRTrainer -------------------------------------------------------------------------------")
trainer = DRRTrainer(config,
                      actor_function,
                      critic_function,
                      state_rep_function,
                      reward_function,
                      users,
                      items,
                      train_data,
                      test_data,
                      user_embeddings,
                      item_embeddings,
                      cuda
                      )

Initializing DRRTrainer -------------------------------------------------------------------------------
Current PyTorch Device:  cpu
Data dimensions extracted
Models initialized
Model weights initialized, copied to target
Optimizers initialized


In [12]:
# Train
print("Starting DRRTrainer.learn() ---------------------------------------------------------------------------")
actor_losses, critic_losses, epi_avg_rewards = trainer.learn()

Starting DRRTrainer.learn() ---------------------------------------------------------------------------
User id 3838, Episode 0, step 1, timestamp 181 rec item 647, reward 2.0

  ignored_items.append(torch.tensor(rec_item_idx).to(self.device))


Timestep 500 | Episode 49 | Mean Ep R 3.0000 | Max R 3.0000 | Critic Params Norm 1.3940 | Actor Loss -11.4000 | Critic Loss 0.0355 | 
Timestep 1000 | Episode 99 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 4.0355 | Actor Loss -17.6124 | Critic Loss 0.0774 | 
Timestep 1500 | Episode 149 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 6.7441 | Actor Loss -21.0875 | Critic Loss 0.1110 | 
Timestep 2000 | Episode 199 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 3.7369 | Actor Loss -24.3092 | Critic Loss 0.0597 | 
Timestep 2500 | Episode 249 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 22.8974 | Actor Loss -24.2616 | Critic Loss 0.4277 | 
Timestep 3000 | Episode 299 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 3.7331 | Actor Loss -26.1402 | Critic Loss 0.0749 | 
Timestep 3500 | Episode 349 | Mean Ep R 2.0000 | Max R 2.0000 | Critic Params Norm 11.3472 | Actor Loss -27.0521 | Critic Loss 0.1299 | 
Timestep 4000 | Episode 399 | Mean Ep R 3.0000 | 

In [14]:
def noiseless_plot(y, title, ylabel, save_loc):
    smoother = ConvolutionSmoother(window_len=1000, window_type='ones')
    smoother.smooth(y)

    # generate intervals
    low, up = smoother.get_intervals('sigma_interval', n_sigma=3)

    # plot the smoothed timeseries with intervals
    plt.close()
    plt.figure(figsize=(11,6))
    plt.xlabel("Epoch")
    plt.ylabel(ylabel)
    plt.title(title)
    plt.plot(smoother.data[0], color='orange')
    plt.plot(smoother.smooth_data[0], linewidth=3, color='blue')
    plt.fill_between(range(len(smoother.data[0])), low[0], up[0], alpha=0.3)
    plt.savefig(save_loc)
    plt.close()

In [15]:
actor_losses = np.load(config.train_actor_loss_data_dir)
critic_losses = np.load(config.train_critic_loss_data_dir)
epi_avg_rewards = np.load(config.train_mean_reward_data_dir)

noiseless_plot(actor_losses, 
               "Actor Loss (Train)", 
               "Actor Loss (Train)", 
               config.output_path + "train_actor_loss_smooth.png")
               
noiseless_plot(critic_losses, 
               "Critic Loss (Train)", 
               "Critic Loss (Train)", 
               config.output_path + "train_critic_loss_smooth.png")

noiseless_plot(epi_avg_rewards, 
               "Mean Reward (Train)", 
               "Mean Reward (Train)", 
               config.output_path + "train_mean_reward_smooth.png")

In [16]:
sourceFile = open(config.output_path + "hyperparams.txt", 'w')
print(config.__dict__, file = sourceFile)
sourceFile.close()