1. Import dependencies + Create required directories

In [16]:
import gymnasium as gym
from gymnasium.wrappers import GrayscaleObservation, ResizeObservation, FrameStackObservation
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import EvalCallback

In [18]:
import os
log_dir = os.path.join('training', 'logs')
save_dir = os.path.join('training', 'saved_models')

2. Create CarRacing-v3 Environment

In [22]:
ENV_ID = "CarRacing-v3"

def make_env(render_mode=None):
    """
    Input:
        render_mode: Default to None, render_mode="human" to visualize the driving
    Output:
        env (the created CarRacing-v3 environment) with modifications: greyscaled, and scaled down
            (to reduce computation load)
    """
    env = gym.make(ENV_ID, continuous=False, render_mode=render_mode)
    env = GreyscaleObservation(env)
    env = ResizeObservation(env, (84, 84))
    return env

3. Train Model

In [23]:
train_env = DummyVecEnv([lambda: make_env(render_mode=None)])
train_env = VecFrameStack(train_env, n_stack=4)

NameError: name 'GreyscaleObservation' is not defined

In [None]:
eval_env = DummyVecEnv([lambda: make_env(render_mode=None)])
eval_env = VecFrameStack(eval_env, n_stack=4)

In [None]:
# configure our DQN model with parameters
model = DQN(
    policy='CnnPolicy', 
    env=train_env, 
    learning_rate=1e-4, 
    buffer_size=100_000,
    learning_starts=10_000,
    batch_size=128,
    train_freq=4,
    target_update_interval=1_000,
    gamma=0.99,
    exploration_initial_eps=1.0,
    exploration_final_eps=0.1,
    exploration_fraction=0.5,
    tensorboard_log=log_path,
    verbose=1,
)

In [None]:
# eval callback
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=os.path.join('training', 'best_model'),
    log_path=os.path.join('training', 'best_model_logs'),
    eval_freq=10_000,
    n_eval_episodes=5,
    deterministic=True,
    render="human"
)

In [None]:
# Train Model
model.learn(total_timesteps=200_000, callback=eval_callback)

4. Save Model

In [None]:
dqn_path = os.path.join(save_dir, "dqn_carracing")
model.save(dqn_path)

5. Evaluate Model

In [None]:
best_model_path = os.path.join("training", "best_model", "best_model.zip")
best_model = DQN.load(best_model_path)

mean_r, std_r = evaluate_policy(best_model, eval_env, n_eval_episodes=10, render=True)
print(f"Best Model:\nMean Reward: {mean_r:.2f}\tStandard Deviation: {std_r:.2f}")

eval_env.close()
train_env.close()