In [1]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import cv2
import random
import os
from collections import deque
import matplotlib.pyplot as plt
import gym_chrome_dino
from gym_chrome_dino.utils.wrappers import make_dino

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


# Preprocessing

In [2]:
def preprocess_observation(obs):
    """
    Convert RGB (150, 600, 3) â†’ grayscale (84, 84) and normalize to [0,1]
    """
    # Convert to grayscale
    gray = cv2.cvtColor(obs, cv2.COLOR_RGB2GRAY)
    # Resize to 84x84 (standard for Atari/Dino)
    resized = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_AREA)
    # Normalize to [0,1]
    normalized = resized.astype(np.float32) / 255.0
    return normalized

# Stack 4 frames
class FrameStack:
    def __init__(self, k=4):
        self.k = k
        self.frames = deque([], maxlen=k)

    def reset(self, obs):
        processed = preprocess_observation(obs)
        self.frames = deque([processed] * self.k, maxlen=self.k)
        return np.stack(self.frames, axis=0)

    def step(self, obs):
        processed = preprocess_observation(obs)
        self.frames.append(processed)
        return np.stack(self.frames, axis=0)

# Custom CNN DQN Model (PyTorch)

In [3]:
class DQN(nn.Module):
    def __init__(self, action_dim):
        super(DQN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(4, 32, kernel_size=8, stride=4),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.ReLU(),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 512),
            nn.ReLU(),
            nn.Linear(512, action_dim)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

## DQN Agent

In [4]:
class DQNAgent:
    def __init__(self, action_dim, lr=1e-4, gamma=0.99, epsilon=1.0, eps_min=0.01, eps_decay=0.995):
        self.action_dim = action_dim
        self.gamma = gamma
        self.epsilon = epsilon
        self.eps_min = eps_min
        self.eps_decay = eps_decay
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        self.q_net = DQN(action_dim).to(self.device)
        self.target_net = DQN(action_dim).to(self.device)
        self.optimizer = optim.Adam(self.q_net.parameters(), lr=lr)
        self.update_target()

    def update_target(self):
        self.target_net.load_state_dict(self.q_net.state_dict())

    def act(self, state):
        if np.random.random() <= self.epsilon:
            return random.randrange(self.action_dim)
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        q_vals = self.q_net(state)
        return q_vals.argmax().item()

    def train_step(self, batch, double_dqn=True):
        states, actions, rewards, next_states, dones = batch
        states = torch.FloatTensor(states).to(self.device)
        actions = torch.LongTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        next_states = torch.FloatTensor(next_states).to(self.device)
        dones = torch.BoolTensor(dones).to(self.device)

        curr_q = self.q_net(states).gather(1, actions.unsqueeze(1)).squeeze(1)

        if double_dqn:
            next_actions = self.q_net(next_states).argmax(1)
            next_q = self.target_net(next_states).gather(1, next_actions.unsqueeze(1)).squeeze(1)
        else:
            next_q = self.target_net(next_states).max(1)[0]

        target_q = rewards + (self.gamma * next_q * ~dones)
        loss = nn.MSELoss()(curr_q, target_q.detach())

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.eps_min:
            self.epsilon *= self.eps_decay

        return loss.item()

# Training loop

In [5]:
def train_dino_dqn(
    env_name="ChromeDino-v0",
    total_steps=50_000,
    batch_size=32,
    replay_size=10_000,
    update_freq=1000,
    save_path="dino_dqn.pth",
    render=False
):
    env = gym.make(env_name, render_mode="human" if render else None)
    action_dim = env.action_space.n

    agent = DQNAgent(action_dim)
    replay_buffer = deque(maxlen=replay_size)
    frame_stack = FrameStack()

    scores = []
    losses = []
    score = 0

    obs, _ = env.reset()
    state = frame_stack.reset(obs)

    for step in range(total_steps):
        action = agent.act(state)
        next_obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated

        next_state = frame_stack.step(next_obs)
        replay_buffer.append((state, action, reward, next_state, done))
        state = next_state
        score += reward

        if done:
            scores.append(score)
            score = 0
            obs, _ = env.reset()
            state = frame_stack.reset(obs)

        # Train if enough samples
        if len(replay_buffer) >= batch_size:
            batch = random.sample(replay_buffer, batch_size)
            states, actions, rewards, next_states, dones = zip(*batch)
            loss = agent.train_step((states, actions, rewards, next_states, dones))
            losses.append(loss)

        # Update target network
        if step % update_freq == 0:
            agent.update_target()

        if step % 1000 == 0:
            avg_score = np.mean(scores[-10:]) if scores else 0
            avg_loss = np.mean(losses[-100:]) if losses else 0
            print(f"Step {step}/{total_steps} | Avg Score (last 10): {avg_score:.2f} | Îµ: {agent.epsilon:.3f} | Loss: {avg_loss:.4f}")

    # Save model
    torch.save(agent.q_net.state_dict(), save_path)
    print(f"Model saved to {save_path}")

    # Plot metrics
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(scores)
    plt.title("Episode Scores")
    plt.xlabel("Episode")
    plt.ylabel("Score")

    plt.subplot(1, 2, 2)
    plt.plot(losses)
    plt.title("Training Loss")
    plt.xlabel("Training Step")
    plt.ylabel("Loss")
    plt.tight_layout()
    plt.savefig("dino_dqn_training.png")
    plt.show()

    env.close()
    return agent

In [6]:
# train_dino_dqn(total_steps=10_000, render=False)

In [7]:
train_dino_dqn(
    env_name="ChromeDinoNoBrowser-v0",
        total_steps=1_000,   # Just 5k steps (~1â€“5 mins)
        render=True,         # ðŸ‘ˆ THIS enables live view
        batch_size=16,       # Smaller batch for faster updates
        update_freq=500      # Update target net less frequently
    )

NameNotFound: Environment `ChromeDinoNoBrowser` doesn't exist.

In [8]:
import gym
import gym_chrome_dino
from gym_chrome_dino.utils.wrappers import make_dino
env = gym.make('ChromeDino-v0')
env = make_dino(env, timer=True, frame_stack=True)
done = True
while True:
    if done:
        env.reset()
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)

SessionNotCreatedException: Message: session not created: This version of ChromeDriver only supports Chrome version 91
Current browser version is 141.0.7390.122 with binary path /usr/sbin/chromium; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#sessionnotcreatedexception
Stacktrace:
#0 0x564e47af5ee9 <unknown>
