In [1]:
import gymnasium as gym
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
from collections import deque
from gymnasium.wrappers import FrameStack
import random
from tqdm import tqdm



In [2]:
# Define constants and hyperparameters
num_episodes = 100
max_steps_per_episode = 1000
learning_rate = 0.01 # Use 0.0001 for 10000 episodes, 0.001 for 1000 episodes, and 0.01 for 100 episodes
batch_size = 64
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.98 # Use 0.995 for 1000 episodes, 0.98 for 100 episodes
memory = deque(maxlen=10000)  # Experience replay buffer
env_name = "ALE/Frogger-v5"

In [3]:
def build_model(input_shape, num_actions):
    model = Sequential([ # Each person should change the amount of Conv2D/Dense layers, as well as the filter amount and kernel_size/strides
        Conv2D(16, kernel_size=(8, 8), strides=(4, 4), activation='relu', input_shape=input_shape, data_format="channels_first"),
        Conv2D(32, kernel_size=(4, 4), strides=(2, 2), activation='relu'),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(256, activation='relu'),
        Dense(num_actions, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

In [4]:
# Create the environment
env = gym.make(env_name, obs_type='grayscale')
num_actions = env.action_space.n

In [5]:
env = FrameStack(env, 4)
frames, width, height = env.observation_space.shape

In [6]:
env.observation_space.shape

(4, 210, 160)

In [7]:
# Build the DQN model
model = build_model((frames, width, height), num_actions)

  super().__init__(


In [None]:
# Training loop
for episode in tqdm(range(num_episodes), desc='Episode Progress', position=0):
    state, _ = env.reset()
    episode_reward = 0
    done = False

    for step in range(max_steps_per_episode):
        if np.random.rand() <= epsilon:
            action = env.action_space.sample()  # Exploration
        else:
            q_values = model.predict(np.array([state]), verbose=None)[0]
            action = np.argmax(q_values)  # Exploitation

        # Ensure action is within bounds
        action = np.clip(action, 0, num_actions - 1)

        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        episode_reward += reward

        memory.append((state, action, reward, next_state, done))

        state = next_state

        if done:
            break

    # Experience replay
    if len(memory) >= batch_size:
        minibatch = random.sample(memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + gamma * np.amax(model.predict(np.array([next_state]), verbose=None)[0])

            target_f = model.predict(np.array([state]), verbose=None)
            target_f[0][action] = target
            model.fit(np.array([state]), target_f, epochs=1, verbose=None)

    # Decay exploration rate
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    print(f"\rPrevious episode: Episode: {episode + 1}/{num_episodes}, Total Reward: {episode_reward}, Epsilon: {epsilon:.4f}", end="")

env.close()

Episode Progress:   1%|          | 1/100 [00:21<34:58, 21.20s/it]

Previous episode: Episode: 1/100, Total Reward: 11.0, Epsilon: 0.9800

Episode Progress:   2%|▏         | 2/100 [00:41<33:44, 20.66s/it]

Previous episode: Episode: 2/100, Total Reward: 9.0, Epsilon: 0.9604

Episode Progress:   3%|▎         | 3/100 [01:02<33:26, 20.69s/it]

Previous episode: Episode: 3/100, Total Reward: 6.0, Epsilon: 0.9412

Episode Progress:   4%|▍         | 4/100 [01:22<33:01, 20.64s/it]

Previous episode: Episode: 4/100, Total Reward: 9.0, Epsilon: 0.9224

Episode Progress:   5%|▌         | 5/100 [01:44<33:34, 21.21s/it]

Previous episode: Episode: 5/100, Total Reward: 10.0, Epsilon: 0.9039

Episode Progress:   6%|▌         | 6/100 [02:07<33:41, 21.51s/it]

Previous episode: Episode: 6/100, Total Reward: 10.0, Epsilon: 0.8858

Episode Progress:   7%|▋         | 7/100 [02:30<34:04, 21.98s/it]

Previous episode: Episode: 7/100, Total Reward: 9.0, Epsilon: 0.8681

Episode Progress:   8%|▊         | 8/100 [02:52<33:56, 22.13s/it]

Previous episode: Episode: 8/100, Total Reward: 8.0, Epsilon: 0.8508

Episode Progress:   9%|▉         | 9/100 [03:15<33:49, 22.30s/it]

Previous episode: Episode: 9/100, Total Reward: 11.0, Epsilon: 0.8337

Episode Progress:  10%|█         | 10/100 [03:37<33:36, 22.41s/it]

Previous episode: Episode: 10/100, Total Reward: 8.0, Epsilon: 0.8171

Episode Progress:  11%|█         | 11/100 [04:03<34:55, 23.55s/it]

Previous episode: Episode: 11/100, Total Reward: 10.0, Epsilon: 0.8007

Episode Progress:  12%|█▏        | 12/100 [04:27<34:40, 23.65s/it]

Previous episode: Episode: 12/100, Total Reward: 6.0, Epsilon: 0.7847

Episode Progress:  13%|█▎        | 13/100 [04:56<36:29, 25.17s/it]

Previous episode: Episode: 13/100, Total Reward: 13.0, Epsilon: 0.7690

Episode Progress:  14%|█▍        | 14/100 [05:23<36:54, 25.75s/it]

Previous episode: Episode: 14/100, Total Reward: 8.0, Epsilon: 0.7536

Episode Progress:  15%|█▌        | 15/100 [05:49<36:42, 25.91s/it]

Previous episode: Episode: 15/100, Total Reward: 9.0, Epsilon: 0.7386

Episode Progress:  16%|█▌        | 16/100 [06:16<36:37, 26.16s/it]

Previous episode: Episode: 16/100, Total Reward: 10.0, Epsilon: 0.7238

Episode Progress:  17%|█▋        | 17/100 [06:45<37:30, 27.11s/it]

Previous episode: Episode: 17/100, Total Reward: 9.0, Epsilon: 0.7093

Episode Progress:  18%|█▊        | 18/100 [07:16<38:40, 28.30s/it]

Previous episode: Episode: 18/100, Total Reward: 14.0, Epsilon: 0.6951

Episode Progress:  19%|█▉        | 19/100 [07:48<39:27, 29.23s/it]

Previous episode: Episode: 19/100, Total Reward: 15.0, Epsilon: 0.6812

Episode Progress:  20%|██        | 20/100 [08:18<39:25, 29.57s/it]

Previous episode: Episode: 20/100, Total Reward: 8.0, Epsilon: 0.6676

Episode Progress:  21%|██        | 21/100 [08:46<38:07, 28.96s/it]

Previous episode: Episode: 21/100, Total Reward: 7.0, Epsilon: 0.6543

Episode Progress:  22%|██▏       | 22/100 [09:15<37:42, 29.00s/it]

Previous episode: Episode: 22/100, Total Reward: 9.0, Epsilon: 0.6412

Episode Progress:  23%|██▎       | 23/100 [09:43<36:56, 28.79s/it]

Previous episode: Episode: 23/100, Total Reward: 14.0, Epsilon: 0.6283

Episode Progress:  24%|██▍       | 24/100 [10:16<37:49, 29.86s/it]

Previous episode: Episode: 24/100, Total Reward: 8.0, Epsilon: 0.6158

Episode Progress:  25%|██▌       | 25/100 [10:47<37:49, 30.25s/it]

Previous episode: Episode: 25/100, Total Reward: 11.0, Epsilon: 0.6035

Episode Progress:  26%|██▌       | 26/100 [11:15<36:35, 29.67s/it]

Previous episode: Episode: 26/100, Total Reward: 11.0, Epsilon: 0.5914

Episode Progress:  27%|██▋       | 27/100 [11:46<36:43, 30.19s/it]

Previous episode: Episode: 27/100, Total Reward: 7.0, Epsilon: 0.5796

Episode Progress:  28%|██▊       | 28/100 [12:16<35:58, 29.98s/it]

Previous episode: Episode: 28/100, Total Reward: 6.0, Epsilon: 0.5680

Episode Progress:  29%|██▉       | 29/100 [12:47<35:46, 30.23s/it]

Previous episode: Episode: 29/100, Total Reward: 10.0, Epsilon: 0.5566

Episode Progress:  30%|███       | 30/100 [13:18<35:38, 30.54s/it]

Previous episode: Episode: 30/100, Total Reward: 8.0, Epsilon: 0.5455

Episode Progress:  31%|███       | 31/100 [13:51<35:59, 31.30s/it]

Previous episode: Episode: 31/100, Total Reward: 9.0, Epsilon: 0.5346

Episode Progress:  32%|███▏      | 32/100 [14:24<36:04, 31.84s/it]

Previous episode: Episode: 32/100, Total Reward: 8.0, Epsilon: 0.5239

Episode Progress:  33%|███▎      | 33/100 [14:58<36:18, 32.52s/it]

Previous episode: Episode: 33/100, Total Reward: 16.0, Epsilon: 0.5134

Episode Progress:  34%|███▍      | 34/100 [15:33<36:26, 33.13s/it]

Previous episode: Episode: 34/100, Total Reward: 8.0, Epsilon: 0.5031

Episode Progress:  35%|███▌      | 35/100 [16:06<36:03, 33.29s/it]

Previous episode: Episode: 35/100, Total Reward: 10.0, Epsilon: 0.4931

Episode Progress:  36%|███▌      | 36/100 [16:43<36:29, 34.22s/it]

Previous episode: Episode: 36/100, Total Reward: 8.0, Epsilon: 0.4832

Episode Progress:  37%|███▋      | 37/100 [17:16<35:31, 33.84s/it]

Previous episode: Episode: 37/100, Total Reward: 10.0, Epsilon: 0.4735

Episode Progress:  38%|███▊      | 38/100 [17:54<36:24, 35.23s/it]

Previous episode: Episode: 38/100, Total Reward: 11.0, Epsilon: 0.4641

Episode Progress:  39%|███▉      | 39/100 [18:37<38:07, 37.50s/it]

Previous episode: Episode: 39/100, Total Reward: 12.0, Epsilon: 0.4548

Episode Progress:  40%|████      | 40/100 [19:11<36:28, 36.48s/it]

Previous episode: Episode: 40/100, Total Reward: 7.0, Epsilon: 0.4457

Episode Progress:  41%|████      | 41/100 [19:47<35:48, 36.42s/it]

Previous episode: Episode: 41/100, Total Reward: 7.0, Epsilon: 0.4368

Episode Progress:  42%|████▏     | 42/100 [20:25<35:35, 36.82s/it]

Previous episode: Episode: 42/100, Total Reward: 10.0, Epsilon: 0.4281

Episode Progress:  43%|████▎     | 43/100 [20:59<34:11, 36.00s/it]

Previous episode: Episode: 43/100, Total Reward: 9.0, Epsilon: 0.4195

Episode Progress:  44%|████▍     | 44/100 [21:33<32:59, 35.35s/it]

Previous episode: Episode: 44/100, Total Reward: 12.0, Epsilon: 0.4111

Episode Progress:  45%|████▌     | 45/100 [22:13<33:40, 36.73s/it]

Previous episode: Episode: 45/100, Total Reward: 7.0, Epsilon: 0.4029

Episode Progress:  46%|████▌     | 46/100 [22:50<33:03, 36.73s/it]

Previous episode: Episode: 46/100, Total Reward: 12.0, Epsilon: 0.3948

Episode Progress:  47%|████▋     | 47/100 [23:31<33:36, 38.06s/it]

Previous episode: Episode: 47/100, Total Reward: 6.0, Epsilon: 0.3869

Episode Progress:  48%|████▊     | 48/100 [24:10<33:17, 38.40s/it]

Previous episode: Episode: 48/100, Total Reward: 10.0, Epsilon: 0.3792

Episode Progress:  49%|████▉     | 49/100 [24:55<34:15, 40.31s/it]

Previous episode: Episode: 49/100, Total Reward: 11.0, Epsilon: 0.3716

Episode Progress:  50%|█████     | 50/100 [25:30<32:20, 38.80s/it]

Previous episode: Episode: 50/100, Total Reward: 9.0, Epsilon: 0.3642

Episode Progress:  51%|█████     | 51/100 [26:12<32:32, 39.85s/it]

Previous episode: Episode: 51/100, Total Reward: 8.0, Epsilon: 0.3569

Episode Progress:  52%|█████▏    | 52/100 [26:57<32:58, 41.22s/it]

Previous episode: Episode: 52/100, Total Reward: 8.0, Epsilon: 0.3497

Episode Progress:  53%|█████▎    | 53/100 [27:43<33:29, 42.74s/it]

Previous episode: Episode: 53/100, Total Reward: 10.0, Epsilon: 0.3428

Episode Progress:  54%|█████▍    | 54/100 [28:13<29:49, 38.89s/it]

Previous episode: Episode: 54/100, Total Reward: 6.0, Epsilon: 0.3359

In [None]:
model.save_weights("gio.weights.h5")