In [None]:
from ppo_agent import PPOAgent
import gymnasium as gym
import torch
from agent_configs import PPOConfig, PPOActorConfig, PPOCriticConfig
from game_configs import CartPoleConfig

env = gym.make('CartPole-v1', render_mode='rgb_array')
config_dict = {
        'activation': 'tanh',
        'clip_param': 0.2,
        'kernel_initializer': 'orthogonal',
        # NORMALIZATION?
        'discount_factor': 0.99,
        'gae_lambda': 0.95,
        'critic_dense_layers': [64],
        'actor_dense_layers': [64],
        # REWARD CLIPPING
        'steps_per_epoch': 512,
        'train_policy_iterations': 4,
        'train_value_iterations': 4,
        'target_kl': 0.02,
        'entropy_coefficient': 0.01,
        'num_minibatches': 4,
        'loss_function': None,
    }

actor_config_dict = {
    'optimizer': torch.optim.Adam,
    'learning_rate': 2.5e-4,
    'adam_epsilon': 1e-7,
    'clipnorm': 0.5,
    'loss_function': None,
}

critic_config_dict = {
    'optimizer': torch.optim.Adam,
    'learning_rate': 2.5e-4,
    'adam_epsilon': 1e-7,
    'clipnorm': 0.5,
    'loss_function': None,
}

print("Actor Config")
actor_config = PPOActorConfig(actor_config_dict)
print("Critic Config")
critic_config = PPOCriticConfig(critic_config_dict)

print("PPO Config")
config = PPOConfig(config_dict, CartPoleConfig(), actor_config=actor_config, critic_config=critic_config)

agent = PPOAgent(env, config=config, name='PPOAgent')


In [None]:
for param in agent.model.actor.parameters():
    print(param)
    print(param.grad)
    print(param.grad_fn)
    print(param.requires_grad)

for param in agent.model.critic.parameters():
    print(param)
    print(param.grad)
    print(param.grad_fn)
    print(param.requires_grad)

In [None]:
# state, _ = env.reset()
# print(state)
# print(agent.select_action(state)[0])
agent.train()