# Training the DQN Model using CNN at 350,000 steps

In [10]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack

GAME_NAME = "ALE/Pacman-v5"

env_id = f"{GAME_NAME}"
env = make_atari_env(env_id, n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

model = DQN('CnnPolicy', env, verbose=1, tensorboard_log="./atari_dqn_tensorboard/")

TIMESTEPS = 350000
model.learn(total_timesteps=TIMESTEPS)

model_save_path = f"{GAME_NAME}_dqn_model"
model.save(model_save_path)

print("Model trained and saved successfully.")

Using cpu device
Wrapping the env in a VecTransposeImage.
ERROR! Session/line number was not unique in database. History logging moved to new session 747
Logging to ./atari_dqn_tensorboard/DQN_23




----------------------------------
| rollout/            |          |
|    ep_len_mean      | 438      |
|    ep_rew_mean      | 18       |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 234      |
|    time_elapsed     | 0        |
|    total_timesteps  | 101      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 421      |
|    ep_rew_mean      | 17.5     |
|    exploration_rate | 0.995    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 113      |
|    time_elapsed     | 1        |
|    total_timesteps  | 199      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0333   |
|    n_updates        | 24       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean    

# Evaluating the DQN Model
    1. Load in the model
    2. Evaluate the model over multiple episodes
    3. See how many rewards it collected

In [8]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
import matplotlib.pyplot as plt

GAME_NAME = "ALE/Pacman-v5"

env_id = f"{GAME_NAME}"
env = make_atari_env(env_id, n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

model = DQN.load(f"{GAME_NAME}_dqn_model.zip", env=env)

mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward} +/- {std_reward}")

def evaluate_game(model, env):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward
    return total_reward

num_episodes = 50
total_rewards = []

for _ in range(num_episodes):
    episode_reward = evaluate_game(model, env)
    total_rewards.append(episode_reward)

print(f"Average Reward per Game: {np.mean(total_rewards)}")

import numpy as np  

episode_list = np.array([i + 1 for i in range(num_episodes)], dtype='int32')
score = np.array([reward.item() for reward in total_rewards], dtype='int32')

summary = np.column_stack((episode_list,score))

print('evealuation done')
print(summary)

Wrapping the env in a VecTransposeImage.




Mean reward: 177.4 +/- 76.17899973089696
Average Reward per Game: 20.260000228881836
evealuation done
[[ 1 59]
 [ 2  7]
 [ 3 15]
 [ 4  0]
 [ 5 11]
 [ 6 38]
 [ 7 18]
 [ 8  2]
 [ 9 51]
 [10 27]
 [11  0]
 [12  3]
 [13 37]
 [14 35]
 [15  2]
 [16  3]
 [17 59]
 [18 14]
 [19 17]
 [20  0]
 [21 28]
 [22 56]
 [23  0]
 [24  1]
 [25 30]
 [26 37]
 [27  9]
 [28  0]
 [29 53]
 [30 22]
 [31  7]
 [32  1]
 [33 33]
 [34 44]
 [35 11]
 [36  0]
 [37 28]
 [38 27]
 [39  1]
 [40  0]
 [41 56]
 [42 12]
 [43 16]
 [44  0]
 [45 49]
 [46 12]
 [47 10]
 [48  0]
 [49 37]
 [50 35]]
