In [1]:
from ppo_agent import PPOAgent
import gymnasium as gym
import torch
from agent_configs import PPOConfig, PPOActorConfig, PPOCriticConfig
from game_configs import CartPoleConfig

env = gym.make('CartPole-v1', render_mode='rgb_array')
config_dict = {
        'activation': 'tanh',
        'clip_param': 0.2,
        'kernel_initializer': 'orthogonal',
        # NORMALIZATION?
        'discount_factor': 0.99,
        'gae_lambda': 0.95,
        'critic_dense_layers': [64],
        'actor_dense_layers': [64],
        # REWARD CLIPPING
        'steps_per_epoch': 512,
        'train_policy_iterations': 4,
        'train_value_iterations': 4,
        'target_kl': 0.02,
        'entropy_coefficient': 0.01,
        'num_minibatches': 4,
        'loss_function': None,
    }

actor_config_dict = {
    'optimizer': torch.optim.Adam,
    'learning_rate': 2.5e-4,
    'adam_epsilon': 1e-7,
    'clipnorm': 0.5,
    'loss_function': None,
}

critic_config_dict = {
    'optimizer': torch.optim.Adam,
    'learning_rate': 2.5e-4,
    'adam_epsilon': 1e-7,
    'clipnorm': 0.5,
    'loss_function': None,
}

print("Actor Config")
actor_config = PPOActorConfig(actor_config_dict)
print("Critic Config")
critic_config = PPOCriticConfig(critic_config_dict)

print("PPO Config")
config = PPOConfig(config_dict, CartPoleConfig(), actor_config=actor_config, critic_config=critic_config)

agent = PPOAgent(env, config=config, name='PPOAgent')


Actor Config
Using         adam_epsilon                  : 1e-07
Using         learning_rate                 : 0.00025
Using         clipnorm                      : 0.5
Using         optimizer                     : <class 'torch.optim.adam.Adam'>
Critic Config
Using         adam_epsilon                  : 1e-07
Using         learning_rate                 : 0.00025
Using         clipnorm                      : 0.5
Using         optimizer                     : <class 'torch.optim.adam.Adam'>
PPO Config
Using default save_intermediate_weights     : False
Using default training_steps                : 10000
Using default adam_epsilon                  : 1e-06
Using default learning_rate                 : 0.001
Using default clipnorm                      : 0
Using default optimizer                     : <class 'torch.optim.adam.Adam'>
Using         loss_function                 : None
Using         activation                    : tanh
Using         kernel_initializer            : orthogonal
U

  logger.warn(


In [2]:
for param in agent.model.actor.parameters():
    print(param)
    print(param.grad)
    print(param.grad_fn)
    print(param.requires_grad)

for param in agent.model.critic.parameters():
    print(param)
    print(param.grad)
    print(param.grad_fn)
    print(param.requires_grad)

Parameter containing:
tensor([[-0.1283,  0.4498, -0.4212, -0.2205],
        [-0.2200,  0.1541,  0.4448, -0.3712],
        [-0.0764, -0.1693, -0.0078,  0.2326],
        [ 0.2199, -0.4295, -0.3075, -0.4266],
        [-0.1808, -0.4838,  0.2228,  0.4602],
        [-0.1234, -0.3796, -0.4958, -0.2272],
        [-0.2951,  0.2854,  0.1650, -0.4327],
        [-0.1554, -0.1519,  0.1491,  0.3750],
        [ 0.4047, -0.0234,  0.2969,  0.0974],
        [ 0.4054,  0.3630, -0.3681, -0.4211],
        [ 0.3755,  0.3261,  0.1887, -0.1894],
        [ 0.4952,  0.1453,  0.2273,  0.0869],
        [ 0.0439, -0.2210, -0.4143, -0.4081],
        [-0.3972,  0.3859,  0.4173,  0.4554],
        [ 0.1466,  0.0462, -0.2971,  0.4586],
        [-0.2947, -0.4808,  0.1965, -0.1010],
        [ 0.0928,  0.3903,  0.1589, -0.1112],
        [-0.0450, -0.0193,  0.3997, -0.3397],
        [ 0.0360, -0.4342,  0.1875,  0.1877],
        [-0.1962, -0.0089, -0.0884, -0.1251],
        [-0.0569, -0.1631, -0.3724,  0.0545],
        [ 0.

In [3]:
# state, _ = env.reset()
# print(state)
# print(agent.select_action(state)[0])
agent.train()

Training Step:  0
Open AI Spinning Up KL Divergence tensor(1.2885e-06)
37 Implimentation Details KL Divergence tensor(1.4901e-08)
Open AI Spinning Up KL Divergence tensor(-0.0002)
37 Implimentation Details KL Divergence tensor(4.6142e-06)
Open AI Spinning Up KL Divergence tensor(0.0006)
37 Implimentation Details KL Divergence tensor(1.6058e-05)
Open AI Spinning Up KL Divergence tensor(0.0008)
37 Implimentation Details KL Divergence tensor(2.9630e-05)
Open AI Spinning Up KL Divergence tensor(-0.0002)
37 Implimentation Details KL Divergence tensor(5.0755e-05)
Open AI Spinning Up KL Divergence tensor(-0.0011)
37 Implimentation Details KL Divergence tensor(0.0001)
Open AI Spinning Up KL Divergence tensor(0.0010)
37 Implimentation Details KL Divergence tensor(0.0001)
Open AI Spinning Up KL Divergence tensor(0.0018)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning Up KL Divergence tensor(-0.0005)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning U

  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


Open AI Spinning Up KL Divergence tensor(9.9150e-06)
37 Implimentation Details KL Divergence tensor(1.9238e-08)
Open AI Spinning Up KL Divergence tensor(-0.0006)
37 Implimentation Details KL Divergence tensor(5.0408e-06)
Open AI Spinning Up KL Divergence tensor(0.0007)
37 Implimentation Details KL Divergence tensor(2.1114e-05)
Open AI Spinning Up KL Divergence tensor(0.0002)
37 Implimentation Details KL Divergence tensor(4.0910e-05)
Open AI Spinning Up KL Divergence tensor(-0.0008)
37 Implimentation Details KL Divergence tensor(6.2382e-05)
Open AI Spinning Up KL Divergence tensor(-0.0024)
37 Implimentation Details KL Divergence tensor(8.4252e-05)
Open AI Spinning Up KL Divergence tensor(0.0017)
37 Implimentation Details KL Divergence tensor(0.0001)
Open AI Spinning Up KL Divergence tensor(0.0006)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning Up KL Divergence tensor(-0.0013)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning Up KL Divergenc

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/PPOAgent/step_333/videos/PPOAgent/333/PPOAgent-episode-4.mp4
score:  276.0


  axs[row][col].set_xlim(1, len(values))


Training Step:  334
Open AI Spinning Up KL Divergence tensor(1.0242e-05)
37 Implimentation Details KL Divergence tensor(1.2165e-08)
Open AI Spinning Up KL Divergence tensor(0.0001)
37 Implimentation Details KL Divergence tensor(3.6285e-07)
Open AI Spinning Up KL Divergence tensor(-0.0002)
37 Implimentation Details KL Divergence tensor(1.2661e-06)
Open AI Spinning Up KL Divergence tensor(-2.3317e-06)
37 Implimentation Details KL Divergence tensor(1.5803e-06)
Open AI Spinning Up KL Divergence tensor(0.0001)
37 Implimentation Details KL Divergence tensor(1.6463e-06)
Open AI Spinning Up KL Divergence tensor(0.0002)
37 Implimentation Details KL Divergence tensor(1.5456e-06)
Open AI Spinning Up KL Divergence tensor(0.0001)
37 Implimentation Details KL Divergence tensor(2.5876e-06)
Open AI Spinning Up KL Divergence tensor(0.0001)
37 Implimentation Details KL Divergence tensor(2.9949e-06)
Open AI Spinning Up KL Divergence tensor(0.0002)
37 Implimentation Details KL Divergence tensor(6.1953e-06

  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


Open AI Spinning Up KL Divergence tensor(-0.0029)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning Up KL Divergence tensor(0.0014)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning Up KL Divergence tensor(-0.0049)
37 Implimentation Details KL Divergence tensor(0.0003)
Open AI Spinning Up KL Divergence tensor(0.0003)
37 Implimentation Details KL Divergence tensor(0.0002)
critic loss tensor(299.6158, grad_fn=<MeanBackward0>)
True
critic loss tensor(322.1039, grad_fn=<MeanBackward0>)
True
critic loss tensor(296.3108, grad_fn=<MeanBackward0>)
True
critic loss tensor(304.2780, grad_fn=<MeanBackward0>)
True
critic loss tensor(299.9787, grad_fn=<MeanBackward0>)
True
critic loss tensor(322.3073, grad_fn=<MeanBackward0>)
True
critic loss tensor(296.4745, grad_fn=<MeanBackward0>)
True
critic loss tensor(304.3424, grad_fn=<MeanBackward0>)
True
critic loss tensor(299.9640, grad_fn=<MeanBackward0>)
True
critic loss tensor(322.2390, grad_fn=<MeanBackward0

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/PPOAgent/step_666/videos/PPOAgent/666/PPOAgent-episode-9.mp4
score:  271.0
Training Step:  667
Open AI Spinning Up KL Divergence tensor(-4.7967e-06)
37 Implimentation Details KL Divergence tensor(1.2224e-08)
Open AI Spinning Up KL Divergence tensor(-5.3537e-05)
37 Implimentation Details KL Divergence tensor(4.2462e-08)
Open AI Spinning Up KL Divergence tensor(-4.3931e-05)
37 Implimentation Details KL Divergence tensor(5.5827e-07)
Open AI Spinning Up KL Divergence tensor(-0.0001)
37 Implimentation Details KL Divergence tensor(1.5585e-06)
Open AI Spinning Up KL Divergence tensor(-0.0002)
37 Implimentation Details KL Divergence tensor(1.7165e-06)
Open AI Spinning Up KL Divergence tensor(-0.0001)
37 Implimentation Details KL Divergence tensor(1.6628e-06)
Open AI Spinning Up KL Divergence tensor(-0.0001)
37 Implimentation Details KL Divergence tensor(3.3572e-06)
Open AI Spinning Up KL Divergence tensor(-0.0003)
37 Implimentation Details KL 

  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


Open AI Spinning Up KL Divergence tensor(3.2596e-06)
37 Implimentation Details KL Divergence tensor(1.5323e-08)
Open AI Spinning Up KL Divergence tensor(-0.0005)
37 Implimentation Details KL Divergence tensor(4.0566e-06)
Open AI Spinning Up KL Divergence tensor(-0.0004)
37 Implimentation Details KL Divergence tensor(2.3852e-05)
Open AI Spinning Up KL Divergence tensor(0.0008)
37 Implimentation Details KL Divergence tensor(4.0637e-05)
Open AI Spinning Up KL Divergence tensor(7.9257e-05)
37 Implimentation Details KL Divergence tensor(5.5713e-05)
Open AI Spinning Up KL Divergence tensor(-0.0021)
37 Implimentation Details KL Divergence tensor(6.8415e-05)
Open AI Spinning Up KL Divergence tensor(-0.0009)
37 Implimentation Details KL Divergence tensor(0.0001)
Open AI Spinning Up KL Divergence tensor(0.0015)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning Up KL Divergence tensor(0.0002)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning Up KL Diver

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/PPOAgent/step_999/videos/PPOAgent/999/PPOAgent-episode-14.mp4
score:  224.0
Training Step:  1000
Open AI Spinning Up KL Divergence tensor(-2.5847e-05)
37 Implimentation Details KL Divergence tensor(1.8656e-08)
Open AI Spinning Up KL Divergence tensor(-0.0003)
37 Implimentation Details KL Divergence tensor(8.9458e-06)
Open AI Spinning Up KL Divergence tensor(-0.0024)
37 Implimentation Details KL Divergence tensor(4.5896e-05)
Open AI Spinning Up KL Divergence tensor(-0.0003)
37 Implimentation Details KL Divergence tensor(6.2329e-05)
Open AI Spinning Up KL Divergence tensor(0.0001)
37 Implimentation Details KL Divergence tensor(9.0633e-05)
Open AI Spinning Up KL Divergence tensor(-0.0011)
37 Implimentation Details KL Divergence tensor(0.0001)
Open AI Spinning Up KL Divergence tensor(-0.0050)
37 Implimentation Details KL Divergence tensor(0.0002)
Open AI Spinning Up KL Divergence tensor(-0.0003)
37 Implimentation Details KL Divergence tens

  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


Open AI Spinning Up KL Divergence tensor(0.0006)
37 Implimentation Details KL Divergence tensor(8.3198e-06)
Open AI Spinning Up KL Divergence tensor(0.0004)
37 Implimentation Details KL Divergence tensor(2.0913e-05)
Open AI Spinning Up KL Divergence tensor(-0.0005)
37 Implimentation Details KL Divergence tensor(1.7944e-05)
Open AI Spinning Up KL Divergence tensor(-0.0006)
37 Implimentation Details KL Divergence tensor(2.5864e-05)
Open AI Spinning Up KL Divergence tensor(0.0011)
37 Implimentation Details KL Divergence tensor(2.3018e-05)
Open AI Spinning Up KL Divergence tensor(0.0006)
37 Implimentation Details KL Divergence tensor(4.7210e-05)
Open AI Spinning Up KL Divergence tensor(-0.0007)
37 Implimentation Details KL Divergence tensor(3.4840e-05)
Open AI Spinning Up KL Divergence tensor(-0.0008)
37 Implimentation Details KL Divergence tensor(4.5055e-05)
Open AI Spinning Up KL Divergence tensor(0.0014)
37 Implimentation Details KL Divergence tensor(3.6877e-05)
Open AI Spinning Up KL D

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/PPOAgent/step_1332/videos/PPOAgent/1332/PPOAgent-episode-19.mp4
score:  257.0
Training Step:  1333
Open AI Spinning Up KL Divergence tensor(-1.7664e-05)
37 Implimentation Details KL Divergence tensor(1.6706e-08)
Open AI Spinning Up KL Divergence tensor(1.5781e-05)
37 Implimentation Details KL Divergence tensor(6.9372e-07)
Open AI Spinning Up KL Divergence tensor(0.0001)
37 Implimentation Details KL Divergence tensor(1.8960e-06)
Open AI Spinning Up KL Divergence tensor(-0.0003)
37 Implimentation Details KL Divergence tensor(5.4390e-06)
Open AI Spinning Up KL Divergence tensor(-0.0007)
37 Implimentation Details KL Divergence tensor(1.0628e-05)
Open AI Spinning Up KL Divergence tensor(9.3207e-05)
37 Implimentation Details KL Divergence tensor(1.6823e-05)
Open AI Spinning Up KL Divergence tensor(0.0003)
37 Implimentation Details KL Divergence tensor(1.7278e-05)
Open AI Spinning Up KL Divergence tensor(-0.0007)
37 Implimentation Details KL 

                                                               

Moviepy - Done !
Moviepy - video ready checkpoints/PPOAgent/step_1665/videos/PPOAgent/1665/PPOAgent-episode-24.mp4
score:  500.0
Training Step:  1666
Open AI Spinning Up KL Divergence tensor(3.2731e-06)
37 Implimentation Details KL Divergence tensor(1.6284e-08)
Open AI Spinning Up KL Divergence tensor(0.0001)
37 Implimentation Details KL Divergence tensor(3.5525e-06)
Open AI Spinning Up KL Divergence tensor(0.0002)
37 Implimentation Details KL Divergence tensor(1.7510e-05)
Open AI Spinning Up KL Divergence tensor(-5.8461e-05)
37 Implimentation Details KL Divergence tensor(2.4468e-05)
Open AI Spinning Up KL Divergence tensor(-0.0010)
37 Implimentation Details KL Divergence tensor(4.5425e-05)
Open AI Spinning Up KL Divergence tensor(0.0007)
37 Implimentation Details KL Divergence tensor(7.1442e-05)
Open AI Spinning Up KL Divergence tensor(0.0007)
37 Implimentation Details KL Divergence tensor(0.0001)
Open AI Spinning Up KL Divergence tensor(-4.6275e-05)
37 Implimentation Details KL Diver

KeyboardInterrupt: 