In [1]:
import gymnasium as gym
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
from collections import deque
from gymnasium.wrappers import FrameStack
import random
from tqdm import tqdm



In [2]:
# Define constants and hyperparameters
num_episodes = 100
max_steps_per_episode = 1000
learning_rate = 0.01 # Use 0.0001 for 10000 episodes, 0.001 for 1000 episodes, and 0.01 for 100 episodes
batch_size = 64
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.98 # Use 0.995 for 1000 episodes, 0.98 for 100 episodes
memory = deque(maxlen=10000)  # Experience replay buffer
env_name = "ALE/Frogger-v5"

In [3]:
def build_model(input_shape, num_actions):
    model = Sequential([ # Each person should change the amount of Conv2D/Dense layers, as well as the filter amount and kernel_size/strides
        Conv2D(16, kernel_size=(8, 8), strides=(4, 4), activation='relu', input_shape=input_shape, data_format="channels_first"),
        Conv2D(32, kernel_size=(4, 4), strides=(2, 2), activation='relu'),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(256, activation='relu'),
        Dense(num_actions, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

In [4]:
# Create the environment
env = gym.make(env_name, obs_type='grayscale')
num_actions = env.action_space.n

In [5]:
env = FrameStack(env, 4)
frames, width, height = env.observation_space.shape

In [6]:
env.observation_space.shape

(4, 210, 160)

In [7]:
# Build the DQN model
model = build_model((frames, width, height), num_actions)

  super().__init__(


In [12]:
# Training loop
for episode in tqdm(range(num_episodes), desc='Episode Progress', position=0):
    state, _ = env.reset()
    episode_reward = 0
    done = False

    for step in range(max_steps_per_episode):
        if np.random.rand() <= epsilon:
            action = env.action_space.sample()  # Exploration
        else:
            q_values = model.predict(np.array([state]), verbose=None)[0]
            action = np.argmax(q_values)  # Exploitation

        # Ensure action is within bounds
        action = np.clip(action, 0, num_actions - 1)

        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        episode_reward += reward

        memory.append((state, action, reward, next_state, done))

        state = next_state

        if done:
            break

    # Experience replay
    if len(memory) >= batch_size:
        minibatch = random.sample(memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + gamma * np.amax(model.predict(np.array([next_state]), verbose=None)[0])

            target_f = model.predict(np.array([state]), verbose=None)
            target_f[0][action] = target
            model.fit(np.array([state]), target_f, epochs=1, verbose=None)

    # Decay exploration rate
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    print(f"\rPrevious episode: Episode: {episode + 1}/{num_episodes}, Total Reward: {episode_reward}, Epsilon: {epsilon:.4f}", end="")

env.close()

Episode Progress:   1%|          | 1/100 [00:25<41:50, 25.36s/it]

Previous episode: Episode: 1/100, Total Reward: 12.0, Epsilon: 0.4195

Episode Progress:   2%|▏         | 2/100 [00:48<38:51, 23.79s/it]

Previous episode: Episode: 2/100, Total Reward: 9.0, Epsilon: 0.4111

Episode Progress:   3%|▎         | 3/100 [01:11<38:03, 23.54s/it]

Previous episode: Episode: 3/100, Total Reward: 8.0, Epsilon: 0.4029

Episode Progress:   4%|▍         | 4/100 [01:32<36:08, 22.59s/it]

Previous episode: Episode: 4/100, Total Reward: 6.0, Epsilon: 0.3948

Episode Progress:   5%|▌         | 5/100 [01:53<35:08, 22.20s/it]

Previous episode: Episode: 5/100, Total Reward: 6.0, Epsilon: 0.3869

Episode Progress:   6%|▌         | 6/100 [02:18<36:15, 23.15s/it]

Previous episode: Episode: 6/100, Total Reward: 11.0, Epsilon: 0.3792

Episode Progress:   7%|▋         | 7/100 [02:41<35:44, 23.06s/it]

Previous episode: Episode: 7/100, Total Reward: 8.0, Epsilon: 0.3716

Episode Progress:   8%|▊         | 8/100 [03:08<37:01, 24.15s/it]

Previous episode: Episode: 8/100, Total Reward: 8.0, Epsilon: 0.3642

Episode Progress:   9%|▉         | 9/100 [03:33<37:20, 24.62s/it]

Previous episode: Episode: 9/100, Total Reward: 8.0, Epsilon: 0.3569

Episode Progress:  10%|█         | 10/100 [03:58<36:57, 24.64s/it]

Previous episode: Episode: 10/100, Total Reward: 7.0, Epsilon: 0.3497

Episode Progress:  11%|█         | 11/100 [04:20<35:06, 23.67s/it]

Previous episode: Episode: 11/100, Total Reward: 7.0, Epsilon: 0.3428

Episode Progress:  12%|█▏        | 12/100 [04:45<35:29, 24.20s/it]

Previous episode: Episode: 12/100, Total Reward: 9.0, Epsilon: 0.3359

Episode Progress:  13%|█▎        | 13/100 [05:09<35:11, 24.27s/it]

Previous episode: Episode: 13/100, Total Reward: 7.0, Epsilon: 0.3292

Episode Progress:  14%|█▍        | 14/100 [05:38<36:31, 25.48s/it]

Previous episode: Episode: 14/100, Total Reward: 10.0, Epsilon: 0.3226

Episode Progress:  15%|█▌        | 15/100 [06:06<37:12, 26.26s/it]

Previous episode: Episode: 15/100, Total Reward: 7.0, Epsilon: 0.3161

Episode Progress:  16%|█▌        | 16/100 [06:35<37:49, 27.02s/it]

Previous episode: Episode: 16/100, Total Reward: 9.0, Epsilon: 0.3098

Episode Progress:  17%|█▋        | 17/100 [06:59<36:19, 26.26s/it]

Previous episode: Episode: 17/100, Total Reward: 7.0, Epsilon: 0.3036

Episode Progress:  18%|█▊        | 18/100 [07:27<36:40, 26.83s/it]

Previous episode: Episode: 18/100, Total Reward: 14.0, Epsilon: 0.2976

Episode Progress:  19%|█▉        | 19/100 [07:56<37:12, 27.56s/it]

Previous episode: Episode: 19/100, Total Reward: 11.0, Epsilon: 0.2916

Episode Progress:  20%|██        | 20/100 [08:18<34:29, 25.87s/it]

Previous episode: Episode: 20/100, Total Reward: 5.0, Epsilon: 0.2858

Episode Progress:  21%|██        | 21/100 [08:42<32:58, 25.04s/it]

Previous episode: Episode: 21/100, Total Reward: 7.0, Epsilon: 0.2801

Episode Progress:  22%|██▏       | 22/100 [09:14<35:36, 27.39s/it]

Previous episode: Episode: 22/100, Total Reward: 10.0, Epsilon: 0.2745

Episode Progress:  23%|██▎       | 23/100 [09:37<33:07, 25.82s/it]

Previous episode: Episode: 23/100, Total Reward: 9.0, Epsilon: 0.2690

Episode Progress:  24%|██▍       | 24/100 [10:08<34:50, 27.51s/it]

Previous episode: Episode: 24/100, Total Reward: 7.0, Epsilon: 0.2636

Episode Progress:  25%|██▌       | 25/100 [10:40<36:09, 28.92s/it]

Previous episode: Episode: 25/100, Total Reward: 11.0, Epsilon: 0.2583

Episode Progress:  26%|██▌       | 26/100 [11:09<35:37, 28.88s/it]

Previous episode: Episode: 26/100, Total Reward: 8.0, Epsilon: 0.2531

Episode Progress:  27%|██▋       | 27/100 [11:36<34:25, 28.29s/it]

Previous episode: Episode: 27/100, Total Reward: 8.0, Epsilon: 0.2481

Episode Progress:  28%|██▊       | 28/100 [12:02<33:17, 27.75s/it]

Previous episode: Episode: 28/100, Total Reward: 10.0, Epsilon: 0.2431

Episode Progress:  29%|██▉       | 29/100 [12:33<33:49, 28.58s/it]

Previous episode: Episode: 29/100, Total Reward: 8.0, Epsilon: 0.2383

Episode Progress:  30%|███       | 30/100 [13:10<36:20, 31.15s/it]

Previous episode: Episode: 30/100, Total Reward: 10.0, Epsilon: 0.2335

Episode Progress:  31%|███       | 31/100 [13:43<36:25, 31.67s/it]

Previous episode: Episode: 31/100, Total Reward: 9.0, Epsilon: 0.2288

Episode Progress:  32%|███▏      | 32/100 [14:12<35:03, 30.94s/it]

Previous episode: Episode: 32/100, Total Reward: 16.0, Epsilon: 0.2242

Episode Progress:  33%|███▎      | 33/100 [14:47<35:51, 32.11s/it]

Previous episode: Episode: 33/100, Total Reward: 8.0, Epsilon: 0.2198

Episode Progress:  34%|███▍      | 34/100 [15:22<36:07, 32.83s/it]

Previous episode: Episode: 34/100, Total Reward: 8.0, Epsilon: 0.2154

Episode Progress:  35%|███▌      | 35/100 [15:57<36:24, 33.61s/it]

Previous episode: Episode: 35/100, Total Reward: 9.0, Epsilon: 0.2111

Episode Progress:  36%|███▌      | 36/100 [16:25<34:10, 32.05s/it]

Previous episode: Episode: 36/100, Total Reward: 8.0, Epsilon: 0.2068

Episode Progress:  37%|███▋      | 37/100 [16:55<32:57, 31.38s/it]

Previous episode: Episode: 37/100, Total Reward: 10.0, Epsilon: 0.2027

Episode Progress:  38%|███▊      | 38/100 [17:25<31:47, 30.77s/it]

Previous episode: Episode: 38/100, Total Reward: 8.0, Epsilon: 0.1986

Episode Progress:  39%|███▉      | 39/100 [17:59<32:32, 32.00s/it]

Previous episode: Episode: 39/100, Total Reward: 9.0, Epsilon: 0.1947

Episode Progress:  40%|████      | 40/100 [18:39<34:07, 34.13s/it]

Previous episode: Episode: 40/100, Total Reward: 8.0, Epsilon: 0.1908

Episode Progress:  41%|████      | 41/100 [19:14<34:02, 34.62s/it]

Previous episode: Episode: 41/100, Total Reward: 6.0, Epsilon: 0.1870

Episode Progress:  42%|████▏     | 42/100 [19:50<33:49, 34.99s/it]

Previous episode: Episode: 42/100, Total Reward: 8.0, Epsilon: 0.1832

Episode Progress:  43%|████▎     | 43/100 [20:23<32:34, 34.28s/it]

Previous episode: Episode: 43/100, Total Reward: 7.0, Epsilon: 0.1796

Episode Progress:  44%|████▍     | 44/100 [20:54<31:06, 33.33s/it]

Previous episode: Episode: 44/100, Total Reward: 7.0, Epsilon: 0.1760

Episode Progress:  45%|████▌     | 45/100 [21:30<31:27, 34.31s/it]

Previous episode: Episode: 45/100, Total Reward: 8.0, Epsilon: 0.1725

Episode Progress:  46%|████▌     | 46/100 [22:07<31:22, 34.86s/it]

Previous episode: Episode: 46/100, Total Reward: 9.0, Epsilon: 0.1690

Episode Progress:  47%|████▋     | 47/100 [22:45<31:40, 35.87s/it]

Previous episode: Episode: 47/100, Total Reward: 7.0, Epsilon: 0.1656

Episode Progress:  48%|████▊     | 48/100 [23:19<30:44, 35.47s/it]

Previous episode: Episode: 48/100, Total Reward: 6.0, Epsilon: 0.1623

Episode Progress:  49%|████▉     | 49/100 [23:56<30:29, 35.87s/it]

Previous episode: Episode: 49/100, Total Reward: 6.0, Epsilon: 0.1591

Episode Progress:  50%|█████     | 50/100 [24:41<32:07, 38.55s/it]

Previous episode: Episode: 50/100, Total Reward: 9.0, Epsilon: 0.1559

Episode Progress:  51%|█████     | 51/100 [25:14<30:01, 36.76s/it]

Previous episode: Episode: 51/100, Total Reward: 10.0, Epsilon: 0.1528

Episode Progress:  52%|█████▏    | 52/100 [25:52<29:44, 37.17s/it]

Previous episode: Episode: 52/100, Total Reward: 6.0, Epsilon: 0.1497

Episode Progress:  53%|█████▎    | 53/100 [26:35<30:29, 38.92s/it]

Previous episode: Episode: 53/100, Total Reward: 8.0, Epsilon: 0.1467

Episode Progress:  54%|█████▍    | 54/100 [27:10<29:05, 37.94s/it]

Previous episode: Episode: 54/100, Total Reward: 7.0, Epsilon: 0.1438

Episode Progress:  55%|█████▌    | 55/100 [28:02<31:33, 42.07s/it]

Previous episode: Episode: 55/100, Total Reward: 14.0, Epsilon: 0.1409

Episode Progress:  56%|█████▌    | 56/100 [28:40<29:57, 40.85s/it]

Previous episode: Episode: 56/100, Total Reward: 9.0, Epsilon: 0.1381

Episode Progress:  57%|█████▋    | 57/100 [29:06<26:00, 36.30s/it]

Previous episode: Episode: 57/100, Total Reward: 6.0, Epsilon: 0.1353

Episode Progress:  58%|█████▊    | 58/100 [29:34<23:47, 33.98s/it]

Previous episode: Episode: 58/100, Total Reward: 7.0, Epsilon: 0.1326

Episode Progress:  59%|█████▉    | 59/100 [30:10<23:38, 34.61s/it]

Previous episode: Episode: 59/100, Total Reward: 8.0, Epsilon: 0.1300

Episode Progress:  60%|██████    | 60/100 [30:55<25:06, 37.66s/it]

Previous episode: Episode: 60/100, Total Reward: 13.0, Epsilon: 0.1274

Episode Progress:  61%|██████    | 61/100 [31:36<25:02, 38.54s/it]

Previous episode: Episode: 61/100, Total Reward: 11.0, Epsilon: 0.1248

Episode Progress:  62%|██████▏   | 62/100 [32:19<25:21, 40.04s/it]

Previous episode: Episode: 62/100, Total Reward: 9.0, Epsilon: 0.1223

Episode Progress:  63%|██████▎   | 63/100 [33:08<26:13, 42.52s/it]

Previous episode: Episode: 63/100, Total Reward: 10.0, Epsilon: 0.1199

Episode Progress:  64%|██████▍   | 64/100 [33:49<25:17, 42.17s/it]

Previous episode: Episode: 64/100, Total Reward: 8.0, Epsilon: 0.1175

Episode Progress:  65%|██████▌   | 65/100 [34:19<22:29, 38.57s/it]

Previous episode: Episode: 65/100, Total Reward: 7.0, Epsilon: 0.1151

Episode Progress:  66%|██████▌   | 66/100 [35:15<24:50, 43.83s/it]

Previous episode: Episode: 66/100, Total Reward: 7.0, Epsilon: 0.1128

Episode Progress:  67%|██████▋   | 67/100 [35:48<22:15, 40.46s/it]

Previous episode: Episode: 67/100, Total Reward: 7.0, Epsilon: 0.1106

Episode Progress:  68%|██████▊   | 68/100 [36:21<20:22, 38.20s/it]

Previous episode: Episode: 68/100, Total Reward: 7.0, Epsilon: 0.1084

Episode Progress:  69%|██████▉   | 69/100 [37:07<20:55, 40.50s/it]

Previous episode: Episode: 69/100, Total Reward: 7.0, Epsilon: 0.1062

Episode Progress:  70%|███████   | 70/100 [37:47<20:13, 40.46s/it]

Previous episode: Episode: 70/100, Total Reward: 7.0, Epsilon: 0.1041

Episode Progress:  71%|███████   | 71/100 [38:17<18:00, 37.26s/it]

Previous episode: Episode: 71/100, Total Reward: 5.0, Epsilon: 0.1020

Episode Progress:  72%|███████▏  | 72/100 [38:55<17:33, 37.63s/it]

Previous episode: Episode: 72/100, Total Reward: 6.0, Epsilon: 0.0999

Episode Progress:  73%|███████▎  | 73/100 [39:46<18:42, 41.56s/it]

Previous episode: Episode: 73/100, Total Reward: 5.0, Epsilon: 0.0979

Episode Progress:  74%|███████▍  | 74/100 [40:39<19:28, 44.94s/it]

Previous episode: Episode: 74/100, Total Reward: 5.0, Epsilon: 0.0960

Episode Progress:  75%|███████▌  | 75/100 [41:38<20:29, 49.19s/it]

Previous episode: Episode: 75/100, Total Reward: 8.0, Epsilon: 0.0941

Episode Progress:  76%|███████▌  | 76/100 [42:30<19:59, 49.99s/it]

Previous episode: Episode: 76/100, Total Reward: 6.0, Epsilon: 0.0922

Episode Progress:  77%|███████▋  | 77/100 [43:11<18:06, 47.24s/it]

Previous episode: Episode: 77/100, Total Reward: 5.0, Epsilon: 0.0903

Episode Progress:  78%|███████▊  | 78/100 [49:27<53:32, 146.04s/it]

Previous episode: Episode: 78/100, Total Reward: 6.0, Epsilon: 0.0885

Episode Progress:  79%|███████▉  | 79/100 [50:07<39:57, 114.17s/it]

Previous episode: Episode: 79/100, Total Reward: 5.0, Epsilon: 0.0868

Episode Progress:  80%|████████  | 80/100 [51:01<32:01, 96.08s/it] 

Previous episode: Episode: 80/100, Total Reward: 7.0, Epsilon: 0.0850

Episode Progress:  81%|████████  | 81/100 [51:38<24:50, 78.47s/it]

Previous episode: Episode: 81/100, Total Reward: 8.0, Epsilon: 0.0833

Episode Progress:  82%|████████▏ | 82/100 [52:15<19:47, 65.97s/it]

Previous episode: Episode: 82/100, Total Reward: 9.0, Epsilon: 0.0817

Episode Progress:  83%|████████▎ | 83/100 [53:17<18:18, 64.61s/it]

Previous episode: Episode: 83/100, Total Reward: 7.0, Epsilon: 0.0800

Episode Progress:  84%|████████▍ | 84/100 [53:57<15:20, 57.51s/it]

Previous episode: Episode: 84/100, Total Reward: 6.0, Epsilon: 0.0784

Episode Progress:  85%|████████▌ | 85/100 [54:36<12:57, 51.84s/it]

Previous episode: Episode: 85/100, Total Reward: 7.0, Epsilon: 0.0769

Episode Progress:  86%|████████▌ | 86/100 [55:19<11:28, 49.19s/it]

Previous episode: Episode: 86/100, Total Reward: 9.0, Epsilon: 0.0753

Episode Progress:  87%|████████▋ | 87/100 [56:04<10:21, 47.77s/it]

Previous episode: Episode: 87/100, Total Reward: 6.0, Epsilon: 0.0738

Episode Progress:  88%|████████▊ | 88/100 [57:09<10:37, 53.12s/it]

Previous episode: Episode: 88/100, Total Reward: 4.0, Epsilon: 0.0723

Episode Progress:  89%|████████▉ | 89/100 [58:06<09:55, 54.09s/it]

Previous episode: Episode: 89/100, Total Reward: 8.0, Epsilon: 0.0709

Episode Progress:  90%|█████████ | 90/100 [59:11<09:35, 57.56s/it]

Previous episode: Episode: 90/100, Total Reward: 6.0, Epsilon: 0.0695

Episode Progress:  91%|█████████ | 91/100 [1:00:10<08:40, 57.83s/it]

Previous episode: Episode: 91/100, Total Reward: 5.0, Epsilon: 0.0681

Episode Progress:  91%|█████████ | 91/100 [1:00:28<05:58, 39.87s/it]


KeyboardInterrupt: 

In [13]:
model.save_weights('gio_191episodes.weights.h5')