In [1]:
import gym
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys

# Add the 'nets' folder to the system path to import the necessary modules
sys.path.append('./nets')

from DDPGAgent import DDPGAgent, DDPGAgentConfig
from Memory import Memory, MemoryConfig

2024-07-11 05:46:15.561139: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-11 05:46:15.569861: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-11 05:46:15.580742: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-11 05:46:15.580763: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-11 05:46:15.587570: I tensorflow/core/platform/cpu_feature_gua

In [2]:
def save_frames_as_gif(frames, path='./', filename='gym_animation.gif'):
    import imageio
    imageio.mimsave(os.path.join(path, filename), frames, fps=30)

def plot_learning_curve(scores, filename):
    x = [i+1 for i in range(len(scores))]
    plt.plot(x, scores)
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.savefig(filename)
    plt.close()

In [4]:
def train_agent(agent, env, num_games, scores_plot_file):
    scores = []
    best_avg_score = -np.inf
    
    for t in range(num_games):
        state, _ = env.reset()
        terminated, truncated = False, False
        score = 0
        step = 0
        agent.noise.reset()
        while not (terminated or truncated):
            step += 1
            action = agent.act(state)
            action = np.argmax(action)  # Convert continuous action to discrete action
            next_state, reward, terminated, truncated, info = env.step(action)
            terminal = terminated or truncated
            agent.memory.store(state, action, reward, next_state, terminal)
            agent.learn()
            score += reward 
            state = next_state
        scores.append(score)
        
        avg_score = np.mean(scores[-100:])
        print(f"game {t}, steps {step}, score {score:.2f}, avg_score {avg_score:.2f}")
        if avg_score > best_avg_score:
            agent.save_models()
            best_avg_score = avg_score

    plot_learning_curve(scores, scores_plot_file)

def test_agent(agent, env, final_landing_file):
    agent.load_models()
    
    frames = []
    terminated, truncated = False, False
    score = 0
    state, _ = env.reset()
    step = 0
    agent.noise.reset()
    while not (terminated or truncated):
        step += 1
        action = agent.act(state)
        action = np.argmax(action)  # Convert continuous action to discrete action
        next_state, reward, terminated, truncated, info = env.step(action)
        terminal = terminated or truncated
        score += reward 
        state = next_state
        frames.append(env.render())

    print(f"score {score:.2f}")      
    save_frames_as_gif(frames, final_landing_file)

In [6]:
def main(mode="train"):
    env_name = "Acrobot-v1"
    env = gym.make(env_name)
    num_games = 1000
    a_lr = 0.0001
    c_lr = 0.001
    gamma = 0.99
    tau = 0.001
    input_size = env.observation_space.shape[0]
    fcl1_size = 400
    fcl2_size = 300
    actions_num = env.action_space.n  # Note the change to .n for discrete actions
    memory_size = 1000000
    batch_size = 64
    
    file_name = f"DDPG_{env_name}_{a_lr}_{c_lr}_{num_games}"
    scores_plot_file = f"./plots/{file_name}.png"
    final_landing_file = f"./plots/{file_name}.gif"
    actor_file_name = f"Actor_DDPG_{env_name}_{a_lr}_{num_games}"
    critic_file_name = f"Critic_DDPG_{env_name}_{c_lr}_{num_games}"
    oa_mf = f"./models/Online_{actor_file_name}.h5"
    oc_mf = f"./models/Online_{critic_file_name}.h5"
    ta_mf = f"./models/Target_{actor_file_name}.h5"
    tc_mf = f"./models/Target_{critic_file_name}.h5"
    
    agent_config = DDPGAgentConfig(
        actor_lr=a_lr,
        critic_lr=c_lr,
        gamma=gamma,
        tau=tau,
        input_dim=input_size,
        fc1_units=fcl1_size,
        fc2_units=fcl2_size,
        action_dim=actions_num,
        memory_size=memory_size,
        batch_size=batch_size,
        actor_model_file=oa_mf,
        critic_model_file=oc_mf,
        target_actor_model_file=ta_mf,
        target_critic_model_file=tc_mf
    )
    
    agent = DDPGAgent(agent_config)
    
    if mode == "train":
        train_agent(agent, env, num_games, scores_plot_file)
    elif mode == "test":
        test_agent(agent, env, final_landing_file)

# Run the main function
main(mode="train")  # Change to "test" for testing

  if not isinstance(terminated, (bool, np.bool8)):


states: (64, 6), actions: (64, 3, 3), rewards: (64,), next_states: (64, 6), dones: (64,)


TypeError: missing a required argument: 'action'