In [1]:
import os
import torch

from src import constants
from src.rl.trainers.trainer_dqn import TrainerDQN
from src.rl.trainers.trainer_c51 import TrainerC51
from src.rl.trainers.trainer_qr import TrainerQR
from src.rl.trainers.trainer_iqn import TrainerIQN
from src.rl.trainers.trainer_fqf import TrainerFQF

In [2]:
%load_ext autoreload
%autoreload 2

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)
embedding_map_paths = {
    "title": os.path.join(constants.BASE_EMB_PATH, "title_emb_map.pt"),
    "abstract": os.path.join(constants.BASE_EMB_PATH, "abstract_emb_map.pt"),
    "title_and_abstract": os.path.join(constants.BASE_EMB_PATH, "title_and_abstract_emb_map.pt"),
    "category": os.path.join(constants.BASE_EMB_PATH, "category_1hot_map.pt"),
    "sub_category": os.path.join(constants.BASE_EMB_PATH, "sub_category_emb_map.pt"),
    "all": os.path.join(constants.BASE_EMB_PATH, "all_emb_map.pt"),
    "features": os.path.join(constants.BASE_EMB_PATH, "no_ts_features_map.pt")
}
news_enc_elements = ["title_and_abstract"]
encoder_params = {
    "embeddings_map_paths": {key: embedding_map_paths[key] for key in news_enc_elements},
    "news_enc_elements": news_enc_elements,
    "news_embedding_size": 768,
    "history_enc_method": "mean",
    "weighted": True,
    "alpha": 0.99, # Ignored, if weighted == False
    "history_max_len": None,
}

In [5]:
model_name = "DQN-n-m-99w"

learning_params = {
    "batch_size": 64,
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.7,
    "gamma": 0.65,
    "pos_mem_pref": 0.3,
    "n_steps": 6_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "pos_mem_pref_adapt": False,
    "freq_pos_mem_pref_adapt": 6_000_000,
    "pos_mem_pref_adapt_step": 0.04,
    "progress_saves": [
        10_000,
        100_000,
        200_000
    ],
    "freq_target_update": 5_000,
    "soft_target_update": False,
    "tau": 0.01,
}

model_params = {
    "type": "default",
    "double_learning": False,
    "net_params": {
        "news_emb_layers": False,
        "norm": False,
        "item_size": 768,
        "hidden_size": 4096,
        "state_item_join_size": 1536,
    }
}

In [6]:
seed = 7
trainer = TrainerDQN(
    model_name, device,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params,
    seed=seed
)

[INFO] setting seed: 7
[INFO] device: cuda
[INFO] preparing directory c:\workbench\developer\drlnrs\models\DQN-n-m-99w
[INFO] writing config files to directory
[INFO] preparing data and samplers


In [6]:
trainer.set_trainee()
trainer.train()

[INFO] number of trainable DQN parameters: 31992321
[INFO] initial learning rate: 0.000100


  0%|          | 0/93750 [00:00<?, ?it/s]

[INFO] saving model checkpoint 1
[INFO] example Q values: 
tensor([-0.5408, -0.5742, -0.3827, -0.2989, -0.3819, -0.3104, -0.3998, -0.4309,
        -0.5216, -0.5583, -0.3476, -0.6435, -0.5549, -0.4170, -0.6313, -0.3208,
        -0.6606, -0.4331, -0.3294, -0.4411, -0.3937, -0.3033, -0.3954, -0.5241,
        -0.5324, -0.4486, -0.3906, -0.3982, -0.5798, -0.5457, -0.3273, -0.6060,
        -0.5211, -0.3531, -0.4378, -0.3169, -0.3353, -0.3851, -0.6677, -0.4151,
        -0.3415, -0.5415, -0.5735, -0.5324, -0.5582, -0.2292, -0.3489, -0.3001,
        -0.4246, -0.5395, -0.3994, -0.7239, -0.6472, -0.4442, -0.3387, -0.5758,
        -0.2581, -0.4851, -0.4196, -0.4571, -0.3442, -0.6308, -0.2390, -0.4362],
       device='cuda:0', grad_fn=<SqueezeBackward1>)
[INFO] saving model checkpoint 2
[INFO] example Q values: 
tensor([-0.1409, -0.1676, -0.3412, -0.5495,  0.0119, -0.1155, -0.5147, -0.7255,
        -0.2593, -0.0627,  1.4096, -0.5600, -0.3133,  0.9325, -0.5591, -0.3196,
        -0.2651, -0.1227, -0.

In [None]:
seed = 42
trainer = TrainerQR(
    model_name, device,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params,
    seed=seed
)

In [None]:
trainer.set_trainee()
trainer.train()

# SAC

In [None]:
model_name = "sac"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.7,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 2_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": True,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "item_size": 768,
    "hidden_size": 2048,
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_SAC()

In [None]:
trainer.train_SAC(128, neg_rewards=True)

# TD3


In [None]:
model_name = "td3-real"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.7,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 4_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": True,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "action_size": 768,
    "a_hidden_size": 2048,
    "c_hidden_size": 2048,
    "tanh": True,
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_TD3()

In [None]:
trainer.train_td3(16, neg_rewards=True, lstm=False)

# DQN Dueling

In [None]:
model_name = "dqn-dueling-emb-with-feat-1M"

learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.7,
    "gamma": 0.8,
    "pos_mem_pref": 0.7,
    "n_steps": 1_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "action_size": 768,
    "hidden_size": 4096
}

In [None]:
trainer = Trainer_DQN(
    model_name, device,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee(type="dueling", n_actions=2)

In [None]:
trainer.train(batch_size=32)

In [None]:
import numpy as np
import pandas as pd
samples = np.array([ac for ac in a])
points = pd.DataFrame(samples.reshape(-1, 1), columns=["point"])

In [None]:
import plotly.express as px
fig = px.histogram(
    points,
    x="point",
    range_x=[-0.5, 0.5],
    nbins=200,
    histnorm='probability',
    title="Embeddings Distribution",
    width=750, height=500,
    # TODO template
)
fig.update_xaxes(dtick=0.1)
fig.show()

In [None]:
import plotly.express as px
fig = px.histogram(
    points,
    x="point",
    range_x=[-0.5, 0.5],
    nbins=200,
    histnorm='probability',
    title="Embeddings Distribution",
    width=750, height=500,
    # TODO template
)
fig.update_xaxes(dtick=0.1)
fig.show()

In [None]:
import plotly.express as px
fig = px.histogram(
    points,
    x="point",
    range_x=[-2, 2],
    nbins=200,
    histnorm='probability',
    title="Embeddings Distribution",
    width=750, height=500,
    # TODO template
)
fig.update_xaxes(dtick=0.1)
fig.show()

# DQN

In [None]:
model_name = "DQN-n-m-nonorm-xxlh-pmp30-g65-stu1"

learning_params = {
    "batch_size": 64,
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.7,
    "gamma": 0.65,
    "pos_mem_pref": 0.3,
    "n_steps": 6_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "progress_saves": [
        10000,
        100000,
        200000
    ],
    "freq_target_update": 64,
    "soft_target_update": True,
    "tau": 0.01,
}

model_params = {
    "type": "default",
    "n_actions": 1,
    "double_learning": False,
    "net_params": {
        "news_emb_layers": False,
        "norm": False,
        "item_size": 768,
        "hidden_size": 4096,
        "state_item_join_size": 1536
    }
}

In [None]:
seed = 7
trainer = TrainerDQN(
    model_name, device,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params,
    seed=seed
)
trainer.set_trainee()
trainer.train()
seed = 42
trainer = TrainerDQN(
    model_name, device,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params,
    seed=seed
)
trainer.set_trainee()
trainer.train()

In [None]:
trainer.set_trainee()
trainer.train()

In [None]:
model_name = "DDQN-n-stu1"

learning_params = {
    "batch_size": 64,
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.7,
    "gamma": 0.65,
    "pos_mem_pref": 0.3,
    "n_steps": 6_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "progress_saves": [
        10000,
        100000,
        200000
    ],
    "freq_target_update": 64,
    "soft_target_update": True,
    "tau": 0.01,
}

model_params = {
    "type": "default",
    "n_actions": 1,
    "double_learning": True,
    "net_params": {
        "news_emb_layers": False,
        "norm": False,
        "item_size": 768,
        "hidden_size": 4096,
        "state_item_join_size": 1536
    }
}

In [None]:
seed = 42
trainer = TrainerDQN(
    model_name, device,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params,
    seed=seed
)
trainer.set_trainee()
trainer.train()

## Model Param Numbers
- DQN: 31992321
- DQN Dueling: 40908035

# DQN-1M-product

In [None]:
model_name = "dqn-stack-weighted"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "stack",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 15,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 1,
    "gamma": 0.7,
    "pos_mem_pref": 0.75,
    "n_steps": 1_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 250_000,
    "freq_target_update": 300,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 12288,
    "action_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)
trainer.set_trainee_dqn()

In [None]:
trainer.train_dqn()

# DDPG

In [None]:
model_name = "ddpg-LSTM-4M"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "stack",
    "weighted": False,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 2,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 5e-4,
    "learning_decay_rate": 0.65,
    "gamma": 0.7,
    "pos_mem_pref": 0.8,
    "n_steps": 4_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 300,
    "soft_target_update": True,
    "tau": 0.005,
}
model_params = {
    "state_size": 2304,
    "action_size": 768,
    "a_hidden_size": 2048,
    "c_hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_ddpg(lstm=True)

In [None]:
trainer.train_ddpg_lstm(128)

In [None]:
print(a.mean(dim=1).mean())
print(a.std(dim=1).mean())
print(a.max())
print(a.min())

In [None]:
print(pa.mean(dim=1).mean())
print(pa.std(dim=1).mean())
print(pa.max())
print(pa.min())

In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
points = pd.DataFrame(a.detach().cpu().numpy().reshape(-1, 1), columns=["point"])
points.describe()

In [None]:
fig = px.histogram(
    points,
    x="point",
    range_x=[-0.5, 0.5],
    nbins=200,
    histnorm='probability',
    title="Embeddings Distribution",
    width=750, height=500,
    # TODO template
)
fig.update_xaxes(dtick=0.1)
fig.show()

# REINFORCE

In [None]:
model_name = "reinforce-batched"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)
rm_episodic_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory_episodic",
    "replay_memory_episodic.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-6,
    "learning_decay_rate": 0.6,
    "gamma": 0.9,
    "pos_mem_pref": 0.5,
    "n_steps": 1_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 250_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params, rm_episodic_path
)

In [None]:
trainer.set_trainee_REINFORCE()

In [None]:
trainer.train_REINFORCE(32)

# C51

In [None]:
model_name = "c51"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "stack",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 15,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.65,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 1_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 500_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 12288,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_C51()

In [None]:
trainer.train_C51(64, True)

# QR-DQN

In [None]:
model_name = "qr-dqn-test"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.65,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 4_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_QRDQN()

In [None]:
trainer.train_QRDQN(128, True)

# IQN

In [None]:
model_name = "iqn"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.75,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 8_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_IQN()

In [None]:
trainer.train_IQN(64, True)

# FPF

In [None]:
model_name = "fpf"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

to_embed = "title_and_abstract"
pos_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "positive_samples.ftr"
)
neg_replay_memory_path = os.path.join(
    constants.TRAIN_PATH,
    "replay_memory",
    "negative_samples.ftr"
)

encoder_params = {
    "method": "mean",
    "weighted": True,
    "alpha": 0.999, # Ignored, if weighted == False
    "history_max_len": 20,
    "embedding_size": 768
}
learning_params = {
    "learning_rate": 1e-4,
    "learning_decay_rate": 0.65,
    "gamma": 0.8,
    "pos_mem_pref": 0.5,
    "n_steps": 2_000_000,
    "freq_lr_schedule": 1_000_000,
    "freq_checkpoint_save": 1_000_000,
    "freq_target_update": 500,
    "soft_target_update": False,
    "tau": 0.005,
}
model_params = {
    "state_size": 768,
    "item_size": 768,
    "hidden_size": 2048
}

In [None]:
trainer = Trainer(
    model_name, device, to_embed,
    pos_replay_memory_path, neg_replay_memory_path,
    encoder_params, learning_params, model_params
)

In [None]:
trainer.set_trainee_FPF()

In [None]:
trainer.train_FPF(32, True)