In [181]:
import gymnasium as gym
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
from collections import deque
from gymnasium.wrappers import FrameStack
import random
from tqdm import tqdm

In [182]:
# Define constants and hyperparameters
num_episodes = 100
max_steps_per_episode = 1000
learning_rate = 0.01 # Use 0.0001 for 10000 episodes, 0.001 for 1000 episodes, and 0.01 for 100 episodes
batch_size = 64
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.98 # Use 0.995 for 1000 episodes, 0.98 for 100 episodes
memory = deque(maxlen=10000)  # Experience replay buffer
env_name = "ALE/Frogger-v5"

In [183]:
def build_model(input_shape, num_actions):
    model = Sequential([ # Each person should change the amount of Conv2D/Dense layers, as well as the filter amount and kernel_size/strides
        Conv2D(32, kernel_size=(8, 8), strides=(4, 4), activation='relu', input_shape=input_shape, data_format="channels_first"),
        Conv2D(64, kernel_size=(4, 4), strides=(2, 2), activation='relu'),
        Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(256, activation='relu'),
        Dense(num_actions, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

In [184]:
# Create the environment
env = gym.make(env_name, obs_type='grayscale')
num_actions = env.action_space.n

In [185]:
env = FrameStack(env, 4)
frames, width, height = env.observation_space.shape

In [186]:
env.observation_space.shape

(4, 210, 160)

In [187]:
# Build the DQN model
model = build_model((frames, width, height), num_actions)

In [188]:
# Training loop
for episode in tqdm(range(num_episodes), desc='Episode Progress', position=0):
    state, _ = env.reset()
    episode_reward = 0
    done = False

    for step in range(max_steps_per_episode):
        if np.random.rand() <= epsilon:
            action = env.action_space.sample()  # Exploration
        else:
            q_values = model.predict(np.array([state]), verbose=None)[0]
            action = np.argmax(q_values)  # Exploitation

        # Ensure action is within bounds
        action = np.clip(action, 0, num_actions - 1)

        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        episode_reward += reward

        memory.append((state, action, reward, next_state, done))

        state = next_state

        if done:
            break

    # Experience replay
    if len(memory) >= batch_size:
        minibatch = random.sample(memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + gamma * np.amax(model.predict(np.array([next_state]), verbose=None)[0])

            target_f = model.predict(np.array([state]), verbose=None)
            target_f[0][action] = target
            model.fit(np.array([state]), target_f, epochs=1, verbose=None)

    # Decay exploration rate
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    print(f"\rPrevious episode: Episode: {episode + 1}/{num_episodes}, Total Reward: {episode_reward}, Epsilon: {epsilon:.4f}", end="")

env.close()

Episode Progress:   1%|          | 1/100 [00:22<37:17, 22.60s/it]

Previous episode: Episode: 1/100, Total Reward: 10.0, Epsilon: 0.9800

Episode Progress:   2%|▏         | 2/100 [00:45<37:24, 22.90s/it]

Previous episode: Episode: 2/100, Total Reward: 12.0, Epsilon: 0.9604

Episode Progress:   3%|▎         | 3/100 [01:08<36:36, 22.64s/it]

Previous episode: Episode: 3/100, Total Reward: 8.0, Epsilon: 0.9412

Episode Progress:   4%|▍         | 4/100 [01:32<37:23, 23.37s/it]

Previous episode: Episode: 4/100, Total Reward: 11.0, Epsilon: 0.9224

Episode Progress:   5%|▌         | 5/100 [01:58<38:28, 24.30s/it]

Previous episode: Episode: 5/100, Total Reward: 9.0, Epsilon: 0.9039

Episode Progress:   6%|▌         | 6/100 [02:21<37:29, 23.93s/it]

Previous episode: Episode: 6/100, Total Reward: 8.0, Epsilon: 0.8858

Episode Progress:   7%|▋         | 7/100 [02:45<37:10, 23.99s/it]

Previous episode: Episode: 7/100, Total Reward: 9.0, Epsilon: 0.8681

Episode Progress:   8%|▊         | 8/100 [03:13<38:32, 25.14s/it]

Previous episode: Episode: 8/100, Total Reward: 11.0, Epsilon: 0.8508

Episode Progress:   9%|▉         | 9/100 [03:41<39:35, 26.11s/it]

Previous episode: Episode: 9/100, Total Reward: 11.0, Epsilon: 0.8337

Episode Progress:  10%|█         | 10/100 [04:09<40:05, 26.73s/it]

Previous episode: Episode: 10/100, Total Reward: 5.0, Epsilon: 0.8171

Episode Progress:  11%|█         | 11/100 [04:36<39:27, 26.60s/it]

Previous episode: Episode: 11/100, Total Reward: 9.0, Epsilon: 0.8007

Episode Progress:  11%|█         | 11/100 [04:40<37:53, 25.54s/it]


KeyboardInterrupt: 

In [None]:
model.save_weights('froggerweights.hs')