In [1]:
import random

import gymnasium as gym
import numpy as np
import torch
import torch.optim as optim

from stable_baselines3.common.buffers import ReplayBuffer

import sys
sys.path.append('../..')

from src.rlmcmc.agent import Actor, QNetwork
from src.rlmcmc.env import RLMHEnvV33
from src.rlmcmc.utils import Args, MCMCAnimation, Toolbox
from src.rlmcmc.learning import LearningDDPGRandomCountClipping

In [2]:
log_p = Toolbox.make_log_target_pdf(
    "test-multivariant_normal-test-multivariant_normal",
    "../../posteriordb/posterior_database"
)

In [3]:
# env setup
args = Args()
args.env_id = 'RLMHEnv-v3.3'
args.seed = 1234
args.log_target_pdf = log_p
args.total_timesteps = 10_000
# args.total_timesteps = 11
args.exploration_noise = 0.1
args.batch_size = 128
# args.learning_starts = args.batch_size
args.learning_starts = 1_000
args.gamma = 0.99
# args.buffer_size = args.total_timesteps
args.learning_rate = 1e-5
args.policy_frequency = 2

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.backends.cudnn.deterministic = args.torch_deterministic

device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
# device = torch.device("cuda")

envs = gym.vector.SyncVectorEnv(
    [
        Toolbox.make_env(

            env_id=args.env_id,
            seed=args.seed,
            log_target_pdf=args.log_target_pdf,
            sample_dim=args.sample_dim,
            total_timesteps=args.total_timesteps
        )
    ]
)
predicted_envs = gym.vector.SyncVectorEnv(
    [
        Toolbox.make_env(

            env_id=args.env_id,
            seed=args.seed,
            log_target_pdf=args.log_target_pdf,
            sample_dim=args.sample_dim,
            total_timesteps=args.total_timesteps
        )
    ]
)
assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"

actor = Actor(envs, device).to(device)
actor = actor.double()
# actor = torch.compile(actor)
qf1 = QNetwork(envs).to(device)
qf1 = qf1.double()
# qf1 = torch.compile(qf1)
qf1_target = QNetwork(envs).to(device)
qf1_target = qf1_target.double()
# qf1_target = torch.compile(qf1_target)
target_actor = Actor(envs, device).to(device)
target_actor = target_actor.double()
# target_actor = torch.compile(target_actor)
target_actor.load_state_dict(actor.state_dict())
qf1_target.load_state_dict(qf1.state_dict())
q_optimizer = optim.Adam(list(qf1.parameters()), lr=args.learning_rate)
actor_optimizer = optim.Adam(list(actor.parameters()), lr=args.learning_rate)

envs.single_observation_space.dtype = np.float64
rb = ReplayBuffer(
    args.buffer_size,
    envs.single_observation_space,
    envs.single_action_space,
    device,
    handle_timeout_termination=False
)

In [4]:
learning = LearningDDPGRandomCountClipping(
    env=envs,
    actor=actor,
    target_actor=target_actor,
    critic=qf1,
    target_critic=qf1_target,
    actor_optimizer=actor_optimizer,
    critic_optimizer=q_optimizer,
    replay_buffer=rb,
    total_timesteps=args.total_timesteps,
    learning_starts=args.learning_starts,
    batch_size=args.batch_size,
    exploration_noise=args.exploration_noise,
    gamma=args.gamma,
    policy_frequency=args.policy_frequency,
    tau=args.tau,
    seed=args.seed,
    device=device
)

In [5]:
training_func = learning.train(gradient_clipping=True)
# training_func = learning.train()

  0%|          | 0/10000 [00:00<?, ?it/s]

  torch.nn.utils.clip_grad_norm(self.critic.parameters(), 1.0)


AttributeError: 'Adam' object has no attribute 'grad'

In [None]:
learning.count_num_gradient_clipping_actor

In [None]:
learning.count_num_gradient_clipping_critic

In [None]:
training_func.plot(critic_loss=True)

In [None]:
df_training = training_func.dataframe()

In [None]:
df_training.tail()

In [None]:
df_training.to_csv('save/data/training.csv', index=False)

In [None]:
predict_func = learning.predict(predicted_envs, 5_000)
predict_func.plot()

In [None]:
df_predict = predict_func.dataframe()

In [None]:
df_predict.to_csv('save/data/predict.csv', index=False)

In [None]:
mcmc_animation = MCMCAnimation(
    log_target_pdf=log_p,
    dataframe=df_predict,
    xlim=(-5, 5),
    ylim=(-5, 5)
    )

In [None]:
anim_file_path = "./save/data/MVN01_train.mp4"
mcmc_animation.make().save(anim_file_path, writer='ffmpeg')

In [None]:
learning.save("save/model")