In [1]:
import gymnasium as gym
from tqdm import tqdm
from RL_practice_stuff import Agent

env = gym.make("LunarLander-v3")

In [2]:
state_space_size = len(env.observation_space.high)
action_space_size = env.action_space.n
agent = Agent(
    state_space_size, 
    action_space_size, 
    batch_size=2, 
    train_every_n_iters=2
)

In [3]:
hyper_parameters = {
    "state_space_size": len(env.observation_space.high),
    "action_space_size": env.action_space.n, 
    "batch_size": 2, 
    "train_every_n_iters": 2,
    "total_episodes": 500,
}

In [4]:
agent = Agent(**hyper_parameters)

In [5]:
agent.kwargs

{'total_episodes': 500}

In [None]:
reward_full = []
mode = 'train'
agent.start_clock(mode)
for ep in tqdm(range(hyper_parameters['total_episodes'])):
# for ep in range(total_episodes):
    state, _ = env.reset()
    terminated, truncated = False, False
    rewards_ep = []
    while not (terminated or truncated):
        action = agent.get_action(state)
        next_state, reward, terminated, truncated, info = env.step(action)
        agent.step(state, action, reward, next_state, (terminated or truncated))
        rewards_ep.append(reward)

        state = next_state
    agent.train_models()
    
    reward_full.append(rewards_ep)

env.close()
agent.stop_clock(mode)
print(f"Training time (ms): {(agent.tot_time_train * 1000): .2f}")
print(f"Training time (min): {(agent.tot_time_train / 60): .2f}")


In [None]:
test = Agent(8, 3)
test.start_clock('test')
for i in range(1000):
    pass
test.stop_clock('test')
test.tot_time_test

In [None]:
agent.training_iter

In [None]:
want_visual = False
if want_visual:
    env = gym.make('LunarLander-v3', render_mode='human')
else:
    env = gym.make('LunarLander-v3')

test_episodes = 10
agent.start_clock('test')
test_rewards = []
for ep in range(test_episodes):
    state, _ = env.reset()
    terminated, truncated = False, False
    ep_rewards = []
    while (not terminated) and (not truncated):
        action = agent.get_action(state)
        nxt_stp, rwd, terminated, truncated, info = env.step(action)
        ep_rewards.append(rwd)
        if truncated or terminated:
            break
        state = nxt_stp
    test_rewards.append(ep_rewards[-1])
agent.stop_clock('test')

env.close()
print(f"Testing time: {(agent.tot_time_test * 1000): .2f}")
agent.save_models(
    test_scores=test_rewards, 
    reward_full=reward_full
)