In [None]:
! pip install -r requirements.txt

In [None]:
import gymnasium as gym
import os
import matplotlib.pyplot as plt
import json # for dumping debug data
import time # for benchmarking 
from ddqn_tfkeras import SingleQAgent, DoubleQAgent
import numpy as np
import random

LEARN_EVERY = 4
def train_agent(atype='double', n_episodes=2000, load_latest_model=False):
    print("Training a {} DQN TF-Keras agent on {} episodes. Pretrained model = {}".format(atype,n_episodes,load_latest_model))
    env = gym.make("LunarLander-v2")
    if atype == 'double':
        agent = DoubleQAgent(gamma=0.99, epsilon=1.0, epsilon_dec=0.995, lr=0.001, mem_size=2000000, batch_size=128, epsilon_end=0.01)
    elif atype == 'single':
        agent = SingleQAgent(gamma=0.99, epsilon=0.01, epsilon_dec=0.996, lr=0.001, mem_size=2000000, batch_size=128, epsilon_end=0.01)
    
    if load_latest_model:
        agent.load_saved_model('{}_dqn_tfk_model.h5'.format(atype))
        print('Loaded most recent {} model.'.format(atype))
        
    scores = []
    eps_history = []
    start = time.time()
    for i in range(n_episodes):
        terminated = False
        truncated = False
        score = 0
        steps = 0
        state = env.reset()[0]
        while not (terminated or truncated):
            action = agent.choose_action(state)
            new_state, reward, terminated, truncated, info = env.step(action)
            agent.save(state, action, reward, new_state, terminated)
            state = new_state
            if steps > 0 and steps % LEARN_EVERY == 0:
                agent.learn()
            steps += 1
            score += reward
        eps_history.append(agent.epsilon)
        scores.append(score)
        avg_score = np.mean(scores[max(0, i-100):(i+1)])

        if (i+1) % 5 == 0 and i > 0:
            # Report expected time to finish the training
            print('Episode {} in {:.2f} min. Expected total time for {} episodes: {:.0f} min. [{:.2f}/{:.2f}]'.format((i+1), 
                                                                                                                      (time.time() - start)/60, 
                                                                                                                      n_episodes, 
                                                                                                                      (((time.time() - start)/i)*n_episodes)/60, 
                                                                                                                      score, 
                                                                                                                      avg_score))

        if (i+1) % 25 == 0 and i > 0:
            # Save the model every N-th step just in case
            agent.save_model('dqn_tfk_model.h5')
            with open("dqn_tfk_model_scores_{}.json".format(int(time.time())), "w") as fp:
                json.dump(scores, fp)
            with open("dqn_tfk_model_history_{}.json".format(int(time.time())), "w") as fp:
                json.dump(eps_history, fp)

    return agent
    
train_agent(atype='single', n_episodes=76, load_latest_model=True)



Training a single DQN TF-Keras agent on 76 episodes. Pretrained model = True


2023-02-06 23:59:57.099806: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
  super().__init__(name, **kwargs)


Loaded most recent single model.


In [3]:
# # Visualize the model
# import gymnasium as gym
# import os
# import matplotlib.pyplot as plt
# os.environ["SDL_VIDEODRIVER"] = "dummy"
# from IPython.display import clear_output

# def visualize_model(name, atype='single'):
#     env = gym.make("LunarLander-v2", render_mode="rgb_array")
#     agent = SingleQAgent(gamma=0.99, epsilon=0.0, lr=0.0005, mem_size=1000000, batch_size=64, epsilon_end=0.01)
#     agent.load_saved_model(name)
#     state, info = env.reset(seed=42)
#     for _ in range(5):
#         terminated = False
#         truncated = False
#         while not (terminated or truncated):
#             action = agent.choose_action(state)
#             new_state, reward, terminated, truncated, info = env.step(action)
#             if truncated:
#                 print("Truncated game at {}", steps)
#             state = new_state
#             clear_output(wait=True)
#             plt.imshow( env.render() )
#             plt.show()
#         state = env.reset()[0]
#     env.close()

# visualize_model('double_dqn_model_epoch_50_fc256xfc256.h5', atype='double')

KeyboardInterrupt: 