In [3]:
import os
import torch

from src import constants
from src.rl.trainers.trainer_dqn import TrainerDQN
from src.rl.trainers.trainer_c51 import TrainerC51
from src.rl.trainers.trainer_qr import TrainerQR
from src.rl.trainers.trainer_iqn import TrainerIQN
from src.rl.trainers.trainer_fqf import TrainerFQF
from src.rl.trainers.trainer_ddpg import TrainerDDPG
from src.rl.trainers.trainer_td3 import TrainerTD3
from src.rl.trainers.trainer_reinforce import TrainerREINFORCE

In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)
ep_rm_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory_episodic",
    "replay_memory_episodic.ftr"
)
embedding_map_paths = {
    "title": os.path.join(constants.BASE_EMB_PATH, "title_emb_map.pt"),
    "abstract": os.path.join(constants.BASE_EMB_PATH, "abstract_emb_map.pt"),
    "title_and_abstract": os.path.join(constants.BASE_EMB_PATH, "title_and_abstract_emb_map.pt"),
    "category": os.path.join(constants.BASE_EMB_PATH, "category_1hot_map.pt"),
    "sub_category": os.path.join(constants.BASE_EMB_PATH, "sub_category_emb_map.pt"),
    "all": os.path.join(constants.BASE_EMB_PATH, "all_emb_map.pt"),
    "features": os.path.join(constants.BASE_EMB_PATH, "no_ts_features_map.pt")
}
news_enc_elements = ["title_and_abstract"]
encoder_params = {
    "embeddings_map_paths": {key: embedding_map_paths[key] for key in news_enc_elements},
    "news_enc_elements": news_enc_elements,
    "news_embedding_size": 768,
    "history_enc_method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": None,
}

In [6]:
model_name = "SAC"

learning_params = {
    "batch_size": 64,
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.7,
    "gamma": 0.65,
    "pos_mem_pref": 0.3,
    "n_steps": 200_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "pos_mem_pref_adapt": False,
    "freq_pos_mem_pref_adapt": 6_000_000,
    "pos_mem_pref_adapt_step": 0.04,
    "progress_saves": [
        10_000,
        100_000,
        200_000
    ],
    "freq_target_update": 5_000,
    "soft_target_update": True,
    "tau": 0.01,
}

model_params = {
    "type": "default",
    "double_learning": False,
    "net_params": {
        "hidden_size": 4096,
        "state_item_join_size": 1536,
    }
}

In [7]:
seed = 7
trainer = TrainerSAC(
    model_name, device,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params,
    seed=seed
)

[INFO] setting seed: 7
[INFO] device: cuda
[INFO] preparing directory c:\workbench\developer\drlnrs\models\SAC
[INFO] writing config files to directory
[INFO] preparing data and samplers


KeyboardInterrupt: 

In [20]:
trainer.set_trainee()
trainer.train()

[INFO] number of trainable actor parameters: 31992578
[INFO] number of trainable critic (x2) parameters: 31992578
[INFO] initial learning rate: 0.000100


  0%|          | 0/7812 [00:00<?, ?it/s]

# REINFORCE

In [16]:
model_name = "REINFORCE-n-m"

learning_params = {
    "batch_size": 64,
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.7,
    "gamma": 0.65,
    "pos_mem_pref": 0.3,
    "n_steps": 50_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "pos_mem_pref_adapt": False,
    "freq_pos_mem_pref_adapt": 6_000_000,
    "pos_mem_pref_adapt_step": 0.04,
    "progress_saves": [
        10_000,
        100_000,
        200_000
    ],
    "freq_target_update": 5_000,
    "soft_target_update": True,
    "tau": 0.01,
}

model_params = {
    "type": "default",
    "double_learning": False,
    "net_params": {
        "item_size": 768,
        "hidden_size": 4096,
        "state_size": 768,
    }
}

In [17]:
seed = 7
trainer = TrainerREINFORCE(
    model_name, device,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params,
    seed=seed, ep_rm_path=ep_rm_path
)

[INFO] setting seed: 7
[INFO] device: cuda
[INFO] preparing directory c:\workbench\developer\drlnrs\models\REINFORCE-n-m
[INFO] writing config files to directory
[INFO] preparing episodic data and sampler
[DONE] trainer initialized


In [18]:
trainer.set_trainee()

[INFO] number of trainable actor parameters: 48252930


In [19]:
trainer.train_REINFORCE()

[INFO] initial learning rate: 0.000100


  0%|          | 0/50000 [00:00<?, ?it/s]

running rec rate: 0.0
running hit rate: 0.0
102
items recommended 0
items actually clicked 2
clicked items recommended 0
tensor([[0.5070, 0.4930],
        [0.5070, 0.4930],
        [0.5072, 0.4928],
        [0.5064, 0.4936],
        [0.5067, 0.4933],
        [0.5066, 0.4934],
        [0.5064, 0.4936],
        [0.5063, 0.4937],
        [0.5065, 0.4935],
        [0.5066, 0.4934],
        [0.5065, 0.4935],
        [0.5067, 0.4933],
        [0.5071, 0.4929],
        [0.5071, 0.4929],
        [0.5072, 0.4928],
        [0.5070, 0.4930],
        [0.5070, 0.4930],
        [0.5063, 0.4937],
        [0.5068, 0.4932],
        [0.5067, 0.4933],
        [0.5068, 0.4932],
        [0.5071, 0.4929],
        [0.5068, 0.4932],
        [0.5068, 0.4932],
        [0.5072, 0.4928],
        [0.5066, 0.4934],
        [0.5068, 0.4932],
        [0.5073, 0.4927],
        [0.5061, 0.4939],
        [0.5064, 0.4936],
        [0.5068, 0.4932],
        [0.5064, 0.4936],
        [0.5069, 0.4931],
        [0.5062, 0.49

# C51

In [None]:
model_name = "c51"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "stack",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 15,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.65,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 1_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 500_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 12288,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_C51()

In [None]:
trainer.train_C51(64, True)

# QR-DQN

In [None]:
model_name = "qr-dqn-test"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.65,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 4_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_QRDQN()

In [None]:
trainer.train_QRDQN(128, True)

# IQN

In [None]:
model_name = "iqn"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.75,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 8_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_IQN()

In [None]:
trainer.train_IQN(64, True)

# FPF

In [None]:
model_name = "fpf"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.65,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 2_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_FPF()

In [None]:
trainer.train_FPF(32, True)