In [1]:
import gym
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys

# Add the 'nets' folder to the system path to import the necessary modules
sys.path.append('./nets')

from DDPGAgent import DDPGAgent, DDPGAgentConfig
from Memory import Memory, MemoryConfig

2024-07-11 06:03:27.747152: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-11 06:03:27.755732: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-11 06:03:27.766485: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-11 06:03:27.766510: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-11 06:03:27.773863: I tensorflow/core/platform/cpu_feature_gua

In [2]:
def save_frames_as_gif(frames, path='./', filename='gym_animation.gif'):
    import imageio
    imageio.mimsave(os.path.join(path, filename), frames, fps=30)

def plot_learning_curve(scores, filename):
    x = [i+1 for i in range(len(scores))]
    plt.plot(x, scores)
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.savefig(filename)
    plt.close()

In [3]:
def train_agent(agent, env, num_games, scores_plot_file):
    scores = []
    best_avg_score = -np.inf
    
    for t in range(num_games):
        state, _ = env.reset()
        terminated, truncated = False, False
        score = 0
        step = 0
        agent.noise.reset()
        while not (terminated or truncated):
            step += 1
            action = agent.act(state)
            action = np.argmax(action)  # Convert continuous action to discrete action
            next_state, reward, terminated, truncated, info = env.step(action)
            terminal = terminated or truncated
            agent.memory.store(state, action, reward, next_state, terminal)
            agent.learn()
            score += reward 
            state = next_state
        scores.append(score)
        
        avg_score = np.mean(scores[-100:])
        print(f"game {t}, steps {step}, score {score:.2f}, avg_score {avg_score:.2f}")
        if avg_score > best_avg_score:
            agent.save_models()
            best_avg_score = avg_score

    plot_learning_curve(scores, scores_plot_file)

def test_agent(agent, env, final_landing_file):
    agent.load_models()
    
    frames = []
    terminated, truncated = False, False
    score = 0
    state, _ = env.reset()
    step = 0
    agent.noise.reset()
    while not (terminated or truncated):
        step += 1
        action = agent.act(state)
        action = np.argmax(action)  # Convert continuous action to discrete action
        next_state, reward, terminated, truncated, info = env.step(action)
        terminal = terminated or truncated
        score += reward 
        state = next_state
        frames.append(env.render())

    print(f"score {score:.2f}")      
    save_frames_as_gif(frames, final_landing_file)

In [4]:
def main(mode="train"):
    env_name = "Acrobot-v1"
    env = gym.make(env_name)
    num_games = 1000
    a_lr = 0.0001
    c_lr = 0.001
    gamma = 0.99
    tau = 0.001
    input_size = env.observation_space.shape[0]
    fcl1_size = 400
    fcl2_size = 300
    actions_num = env.action_space.n  # Note the change to .n for discrete actions
    memory_size = 1000000
    batch_size = 64
    
    file_name = f"DDPG_{env_name}_{a_lr}_{c_lr}_{num_games}"
    scores_plot_file = f"./plots/{file_name}.png"
    final_landing_file = f"./plots/{file_name}.gif"
    actor_file_name = f"Actor_DDPG_{env_name}_{a_lr}_{num_games}"
    critic_file_name = f"Critic_DDPG_{env_name}_{c_lr}_{num_games}"
    oa_mf = f"./models/Online_{actor_file_name}.h5"
    oc_mf = f"./models/Online_{critic_file_name}.h5"
    ta_mf = f"./models/Target_{actor_file_name}.h5"
    tc_mf = f"./models/Target_{critic_file_name}.h5"
    
    agent_config = DDPGAgentConfig(
        actor_lr=a_lr,
        critic_lr=c_lr,
        gamma=gamma,
        tau=tau,
        input_dim=input_size,
        fc1_units=fcl1_size,
        fc2_units=fcl2_size,
        action_dim=actions_num,
        memory_size=memory_size,
        batch_size=batch_size,
        actor_model_file=oa_mf,
        critic_model_file=oc_mf,
        target_actor_model_file=ta_mf,
        target_critic_model_file=tc_mf
    )
    
    agent = DDPGAgent(agent_config)
    
    if mode == "train":
        train_agent(agent, env, num_games, scores_plot_file)
    elif mode == "test":
        test_agent(agent, env, final_landing_file)

# Run the main function
main(mode="train")  # Change to "test" for testing

  if not isinstance(terminated, (bool, np.bool8)):


states: (64, 6), actions: (64, 3, 3), rewards: (64,), next_states: (64, 6), dones: (64,)


1. The `call()` method of your layer may be crashing. Try to `__call__()` the layer eagerly on some test input first to see if it works. E.g. `x = np.random.random((3, 4)); y = layer(x)`
2. If the `call()` method is correct, then you may need to implement the `def build(self, input_shape)` method on your layer. It should create all variables used by the layer (e.g. by calling `layer.build()` on all its children layers).
Exception encountered: ''Dimensions must be equal, but are 64 and 3 for '{{node Add}} = AddV2[T=DT_FLOAT](layer_normalization_5_1/add_2, dense_8_1/Add)' with input shapes: [64,300], [64,3,300].''
2024-07-11 06:03:31.041692: W tensorflow/core/framework/op_kernel.cc:1827] INVALID_ARGUMENT: required broadcastable shapes
2024-07-11 06:03:31.041729: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: required broadcastable shapes


InvalidArgumentError: Exception encountered when calling CriticNet.call().

[1m{{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:AddV2] name: [0m

Arguments received by CriticNet.call():
  • inputs=['tf.Tensor(shape=(64, 6), dtype=float32)', 'tf.Tensor(shape=(64, 3, 3), dtype=float32)']

In [1]:
import gym
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys

# Add the 'nets' folder to the system path to import the necessary modules
sys.path.append('./nets')

from DDPGAgent import DDPGAgent, DDPGAgentConfig
from Memory import Memory, MemoryConfig

# Utility functions
def save_frames_as_gif(frames, path='./', filename='gym_animation.gif'):
    import imageio
    imageio.mimsave(os.path.join(path, filename), frames, fps=30)

def plot_learning_curve(scores, filename):
    x = [i+1 for i in range(len(scores))]
    plt.plot(x, scores)
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.savefig(filename)
    plt.close()

def train_agent(agent, env, num_games, scores_plot_file):
    scores = []
    best_avg_score = -np.inf
    
    for t in range(num_games):
        state, _ = env.reset()
        terminated, truncated = False, False
        score = 0
        step = 0
        agent.noise.reset()
        while not (terminated or truncated):
            step += 1
            action = agent.act(state)
            next_state, reward, terminated, truncated, info = env.step(action)
            terminal = terminated or truncated
            agent.memory.store(state, action, reward, next_state, terminal)
            agent.learn()
            score += reward 
            state = next_state
        scores.append(score)
        
        avg_score = np.mean(scores[-100:])
        print(f"game {t}, steps {step}, score {score:.2f}, avg_score {avg_score:.2f}")
        if avg_score > best_avg_score:
            agent.save_models()
            best_avg_score = avg_score

    plot_learning_curve(scores, scores_plot_file)

def test_agent(agent, env, final_landing_file):
    agent.load_models()
    
    frames = []
    terminated, truncated = False, False
    score = 0
    state, _ = env.reset()
    step = 0
    agent.noise.reset()
    while not (terminated or truncated):
        step += 1
        action = agent.act(state)
        next_state, reward, terminated, truncated, info = env.step(action)
        terminal = terminated or truncated
        score += reward 
        state = next_state
        frames.append(env.render())

    print(f"score {score:.2f}")      
    save_frames_as_gif(frames, final_landing_file)            

def main(mode="train"):
    env_name = "Acrobot-v1"
    env = gym.make(env_name)
    num_games = 1000
    a_lr = 0.0001
    c_lr = 0.001
    gamma = 0.99
    tau = 0.001
    input_size = env.observation_space.shape[0]
    fcl1_size = 400
    fcl2_size = 300
    actions_num = env.action_space.n  # Note the change to .n for discrete actions
    memory_size = 1000000
    batch_size = 64
    
    file_name = f"DDPG_{env_name}_{a_lr}_{c_lr}_{num_games}"
    scores_plot_file = f"./plots/{file_name}.png"
    final_landing_file = f"./plots/{file_name}.gif"
    actor_file_name = f"Actor_DDPG_{env_name}_{a_lr}_{num_games}"
    critic_file_name = f"Critic_DDPG_{env_name}_{c_lr}_{num_games}"
    oa_mf = f"./models/Online_{actor_file_name}.h5"
    oc_mf = f"./models/Online_{critic_file_name}.h5"
    ta_mf = f"./models/Target_{actor_file_name}.h5"
    tc_mf = f"./models/Target_{critic_file_name}.h5"
    
    agent_config = DDPGAgentConfig(
        actor_lr=a_lr,
        critic_lr=c_lr,
        gamma=gamma,
        tau=tau,
        input_dim=input_size,
        fc1_units=fcl1_size,
        fc2_units=fcl2_size,
        action_dim=actions_num,
        memory_size=memory_size,
        batch_size=batch_size,
        actor_model_file=oa_mf,
        critic_model_file=oc_mf,
        target_actor_model_file=ta_mf,
        target_critic_model_file=tc_mf
    )
    
    agent = DDPGAgent(agent_config)
    
    if mode == "train":
        train_agent(agent, env, num_games, scores_plot_file)
    elif mode == "test":
        test_agent(agent, env, final_landing_file)

# Run the main function
main(mode="train")  # Change to "test" for testing


2024-07-11 06:07:09.068508: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-11 06:07:09.075921: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-11 06:07:09.087823: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-11 06:07:09.087850: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-11 06:07:09.094876: I tensorflow/core/platform/cpu_feature_gua

TypeError: only integer scalar arrays can be converted to a scalar index

In [1]:
import gym
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys

# Add the 'nets' folder to the system path to import the necessary modules
sys.path.append('./nets')

from DDPGAgent import DDPGAgent, DDPGAgentConfig
from Memory import Memory, MemoryConfig

# Utility functions
def save_frames_as_gif(frames, path='./', filename='gym_animation.gif'):
    import imageio
    imageio.mimsave(os.path.join(path, filename), frames, fps=30)

def plot_learning_curve(scores, filename):
    x = [i+1 for i in range(len(scores))]
    plt.plot(x, scores)
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.savefig(filename)
    plt.close()

def train_agent(agent, env, num_games, scores_plot_file):
    scores = []
    best_avg_score = -np.inf
    
    for t in range(num_games):
        state, _ = env.reset()
        terminated, truncated = False, False
        score = 0
        step = 0
        agent.noise.reset()
        while not (terminated or truncated):
            step += 1
            action = agent.act(state)
            next_state, reward, terminated, truncated, info = env.step(action)
            terminal = terminated or truncated
            agent.memory.store(state, action, reward, next_state, terminal)
            agent.learn()
            score += reward 
            state = next_state
        scores.append(score)
        
        avg_score = np.mean(scores[-100:])
        print(f"game {t}, steps {step}, score {score:.2f}, avg_score {avg_score:.2f}")
        if avg_score > best_avg_score:
            agent.save_models()
            best_avg_score = avg_score

    plot_learning_curve(scores, scores_plot_file)

def test_agent(agent, env, final_landing_file):
    agent.load_models()
    
    frames = []
    terminated, truncated = False, False
    score = 0
    state, _ = env.reset()
    step = 0
    agent.noise.reset()
    while not (terminated or truncated):
        step += 1
        action = agent.act(state)
        next_state, reward, terminated, truncated, info = env.step(action)
        terminal = terminated or truncated
        score += reward 
        state = next_state
        frames.append(env.render())

    print(f"score {score:.2f}")      
    save_frames_as_gif(frames, final_landing_file)            

def main(mode="train"):
    env_name = "Acrobot-v1"
    env = gym.make(env_name)
    num_games = 1000
    a_lr = 0.0001
    c_lr = 0.001
    gamma = 0.99
    tau = 0.001
    input_size = env.observation_space.shape[0]
    fcl1_size = 400
    fcl2_size = 300
    actions_num = env.action_space.n  # Note the change to .n for discrete actions
    memory_size = 1000000
    batch_size = 64
    
    file_name = f"DDPG_{env_name}_{a_lr}_{c_lr}_{num_games}"
    scores_plot_file = f"./plots/{file_name}.png"
    final_landing_file = f"./plots/{file_name}.gif"
    actor_file_name = f"Actor_DDPG_{env_name}_{a_lr}_{num_games}"
    critic_file_name = f"Cric_DDPG_{env_name}_{c_lr}_{num_games}"
    oa_mf = f"./models/Online_{actor_file_name}.h5"
    oc_mf = f"./models/Online_{critic_file_name}.h5"
    ta_mf = f"./models/Target_{actor_file_name}.h5"
    tc_mf = f"./models/Target_{critic_file_name}.h5"
    
    agent_config = DDPGAgentConfig(
        actor_lr=a_lr,
        critic_lr=c_lr,
        gamma=gamma,
        tau=tau,
        input_dim=input_size,
        fc1_units=fcl1_size,
        fc2_units=fcl2_size,
        action_dim=actions_num,
        memory_size=memory_size,
        batch_size=batch_size,
        actor_model_file=oa_mf,
        critic_model_file=oc_mf,
        target_actor_model_file=ta_mf,
        target_critic_model_file=tc_mf
    )
    
    agent = DDPGAgent(agent_config)
    
    if mode == "train":
        train_agent(agent, env, num_games, scores_plot_file)
    elif mode == "test":
        test_agent(agent, env, final_landing_file)

# Run the main function
main(mode="train")  # Change to "test" for testing


2024-07-11 06:11:36.038474: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-11 06:11:36.047074: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-11 06:11:36.058576: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-11 06:11:36.058595: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-11 06:11:36.065493: I tensorflow/core/platform/cpu_feature_gua

states: (64, 6), actions: (64, 3), rewards: (64,), next_states: (64, 6), dones: (64,)
critic_grads: [None, None, None, None, None, None, None, None, None, None, None, None]


ValueError: No gradients provided for any variable.