In [8]:
import gymnasium as gym
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
from collections import deque
from gymnasium.wrappers import FrameStack
import random
from tqdm import tqdm

In [9]:
# Define constants and hyperparameters
num_episodes = 1000 # Make sure you change this to 10000 for the final training sequence, and either 1000 or 100 for smaller testing
max_steps_per_episode = 1000
learning_rate = 0.001 # Use 0.0001 for 10000 episodes, 0.001 for 1000 episodes, and 0.01 for 100 episodes
batch_size = 64
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.995 # Use 0.995 for 10000 episodes, 0.985 for 1000 episodes, 0.975 for 100 episodes
memory = deque(maxlen=10000)  # Experience replay buffer
env_name = "ALE/Frogger-v5"

In [10]:
def build_model(input_shape, num_actions):
    model = Sequential([ # Each person should change the amount of Conv2D/Dense layers, as well as the filter amount and kernel_size/strides
        Conv2D(32, kernel_size=(8, 8), strides=(4, 4), activation='relu', input_shape=input_shape, data_format="channels_first"),
        Conv2D(64, kernel_size=(4, 4), strides=(2, 2), activation='relu'),
        Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation='relu'),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(256, activation='relu'),
        Dense(num_actions, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

In [11]:
# Create the environment
env = gym.make(env_name, obs_type='grayscale')
num_actions = env.action_space.n

In [12]:
env = FrameStack(env, 4)
frames, width, height = env.observation_space.shape

In [13]:
# Build the DQN model
model = build_model((frames, width, height), num_actions)

In [14]:
# Training loop
for episode in tqdm(range(num_episodes), desc='Episode Progress', position=0):
    state, _ = env.reset()
    episode_reward = 0
    done = False

    for step in range(max_steps_per_episode):
        if np.random.rand() <= epsilon:
            action = env.action_space.sample()  # Exploration
        else:
            q_values = model.predict(np.array([state]), verbose=None)[0]
            action = np.argmax(q_values)  # Exploitation

        # Ensure action is within bounds
        action = np.clip(action, 0, num_actions - 1)

        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        episode_reward += reward

        memory.append((state, action, reward, next_state, done))

        state = next_state

        if done:
            break

    # Experience replay
    if len(memory) >= batch_size:
        minibatch = random.sample(memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + gamma * np.amax(model.predict(np.array([next_state]), verbose=None)[0])

            target_f = model.predict(np.array([state]), verbose=None)
            target_f[0][action] = target
            model.fit(np.array([state]), target_f, epochs=1, verbose=None)

    # Decay exploration rate
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

    print(f"\rPrevious episode: Episode: {episode + 1}/{num_episodes}, Total Reward: {episode_reward}, Epsilon: {epsilon:.4f}", end="")

env.close()

Episode Progress:   0%|          | 1/1000 [00:22<6:20:34, 22.86s/it]

Previous episode: Episode: 1/1000, Total Reward: 9.0, Epsilon: 0.9850

Episode Progress:   0%|          | 2/1000 [00:44<6:03:49, 21.87s/it]

Previous episode: Episode: 2/1000, Total Reward: 10.0, Epsilon: 0.9702

Episode Progress:   0%|          | 3/1000 [01:05<5:58:11, 21.56s/it]

Previous episode: Episode: 3/1000, Total Reward: 12.0, Epsilon: 0.9557

Episode Progress:   0%|          | 4/1000 [01:27<6:01:54, 21.80s/it]

Previous episode: Episode: 4/1000, Total Reward: 7.0, Epsilon: 0.9413

Episode Progress:   0%|          | 5/1000 [01:52<6:21:08, 22.98s/it]

Previous episode: Episode: 5/1000, Total Reward: 9.0, Epsilon: 0.9272

Episode Progress:   1%|          | 6/1000 [02:16<6:26:29, 23.33s/it]

Previous episode: Episode: 6/1000, Total Reward: 10.0, Epsilon: 0.9133

Episode Progress:   1%|          | 7/1000 [02:39<6:24:17, 23.22s/it]

Previous episode: Episode: 7/1000, Total Reward: 10.0, Epsilon: 0.8996

Episode Progress:   1%|          | 8/1000 [03:03<6:27:23, 23.43s/it]

Previous episode: Episode: 8/1000, Total Reward: 8.0, Epsilon: 0.8861

Episode Progress:   1%|          | 9/1000 [03:40<7:38:12, 27.74s/it]

Previous episode: Episode: 9/1000, Total Reward: 9.0, Epsilon: 0.8728

Episode Progress:   1%|          | 10/1000 [04:19<8:32:46, 31.08s/it]

Previous episode: Episode: 10/1000, Total Reward: 10.0, Epsilon: 0.8597

Episode Progress:   1%|          | 11/1000 [04:57<9:09:20, 33.33s/it]

Previous episode: Episode: 11/1000, Total Reward: 9.0, Epsilon: 0.8468

Episode Progress:   1%|          | 12/1000 [05:38<9:46:15, 35.60s/it]

Previous episode: Episode: 12/1000, Total Reward: 7.0, Epsilon: 0.8341

Episode Progress:   1%|▏         | 13/1000 [06:16<9:56:48, 36.28s/it]

Previous episode: Episode: 13/1000, Total Reward: 6.0, Epsilon: 0.8216

Episode Progress:   1%|▏         | 14/1000 [06:58<10:25:27, 38.06s/it]

Previous episode: Episode: 14/1000, Total Reward: 14.0, Epsilon: 0.8093

Episode Progress:   2%|▏         | 15/1000 [07:41<10:50:59, 39.65s/it]

Previous episode: Episode: 15/1000, Total Reward: 12.0, Epsilon: 0.7972

Episode Progress:   2%|▏         | 16/1000 [08:21<10:52:41, 39.80s/it]

Previous episode: Episode: 16/1000, Total Reward: 10.0, Epsilon: 0.7852

Episode Progress:   2%|▏         | 17/1000 [09:00<10:45:57, 39.43s/it]

Previous episode: Episode: 17/1000, Total Reward: 12.0, Epsilon: 0.7734

Episode Progress:   2%|▏         | 18/1000 [09:31<10:01:59, 36.78s/it]

Previous episode: Episode: 18/1000, Total Reward: 8.0, Epsilon: 0.7618

Episode Progress:   2%|▏         | 19/1000 [10:02<9:34:58, 35.17s/it] 

Previous episode: Episode: 19/1000, Total Reward: 13.0, Epsilon: 0.7504

Episode Progress:   2%|▏         | 20/1000 [10:29<8:54:44, 32.74s/it]

Previous episode: Episode: 20/1000, Total Reward: 12.0, Epsilon: 0.7391

Episode Progress:   2%|▏         | 21/1000 [10:59<8:41:22, 31.95s/it]

Previous episode: Episode: 21/1000, Total Reward: 13.0, Epsilon: 0.7280

Episode Progress:   2%|▏         | 22/1000 [11:32<8:46:16, 32.29s/it]

Previous episode: Episode: 22/1000, Total Reward: 8.0, Epsilon: 0.7171

Episode Progress:   2%|▏         | 23/1000 [12:04<8:41:48, 32.05s/it]

Previous episode: Episode: 23/1000, Total Reward: 10.0, Epsilon: 0.7064

Episode Progress:   2%|▏         | 24/1000 [12:34<8:30:42, 31.40s/it]

Previous episode: Episode: 24/1000, Total Reward: 13.0, Epsilon: 0.6958

Episode Progress:   2%|▎         | 25/1000 [13:07<8:40:17, 32.02s/it]

Previous episode: Episode: 25/1000, Total Reward: 7.0, Epsilon: 0.6853

Episode Progress:   3%|▎         | 26/1000 [13:42<8:54:26, 32.92s/it]

Previous episode: Episode: 26/1000, Total Reward: 9.0, Epsilon: 0.6751

Episode Progress:   3%|▎         | 27/1000 [14:16<9:00:57, 33.36s/it]

Previous episode: Episode: 27/1000, Total Reward: 10.0, Epsilon: 0.6649

Episode Progress:   3%|▎         | 28/1000 [14:48<8:51:17, 32.80s/it]

Previous episode: Episode: 28/1000, Total Reward: 10.0, Epsilon: 0.6550

Episode Progress:   3%|▎         | 29/1000 [15:23<9:03:05, 33.56s/it]

Previous episode: Episode: 29/1000, Total Reward: 9.0, Epsilon: 0.6451

Episode Progress:   3%|▎         | 30/1000 [15:57<9:04:20, 33.67s/it]

Previous episode: Episode: 30/1000, Total Reward: 10.0, Epsilon: 0.6355

Episode Progress:   3%|▎         | 31/1000 [16:35<9:25:50, 35.04s/it]

Previous episode: Episode: 31/1000, Total Reward: 11.0, Epsilon: 0.6259

Episode Progress:   3%|▎         | 32/1000 [17:07<9:10:21, 34.11s/it]

Previous episode: Episode: 32/1000, Total Reward: 9.0, Epsilon: 0.6165

Episode Progress:   3%|▎         | 33/1000 [17:40<9:01:37, 33.61s/it]

Previous episode: Episode: 33/1000, Total Reward: 7.0, Epsilon: 0.6073

Episode Progress:   3%|▎         | 34/1000 [18:22<9:41:10, 36.10s/it]

Previous episode: Episode: 34/1000, Total Reward: 8.0, Epsilon: 0.5982

Episode Progress:   4%|▎         | 35/1000 [18:57<9:38:45, 35.98s/it]

Previous episode: Episode: 35/1000, Total Reward: 8.0, Epsilon: 0.5892

Episode Progress:   4%|▎         | 36/1000 [19:33<9:35:03, 35.79s/it]

Previous episode: Episode: 36/1000, Total Reward: 8.0, Epsilon: 0.5804

Episode Progress:   4%|▎         | 37/1000 [20:08<9:30:03, 35.52s/it]

Previous episode: Episode: 37/1000, Total Reward: 7.0, Epsilon: 0.5717

Episode Progress:   4%|▍         | 38/1000 [20:47<9:46:34, 36.58s/it]

Previous episode: Episode: 38/1000, Total Reward: 9.0, Epsilon: 0.5631

Episode Progress:   4%|▍         | 39/1000 [21:22<9:40:19, 36.23s/it]

Previous episode: Episode: 39/1000, Total Reward: 7.0, Epsilon: 0.5546

Episode Progress:   4%|▍         | 40/1000 [22:01<9:53:09, 37.07s/it]

Previous episode: Episode: 40/1000, Total Reward: 11.0, Epsilon: 0.5463

Episode Progress:   4%|▍         | 41/1000 [22:40<9:59:24, 37.50s/it]

Previous episode: Episode: 41/1000, Total Reward: 8.0, Epsilon: 0.5381

Episode Progress:   4%|▍         | 42/1000 [23:12<9:34:11, 35.96s/it]

Previous episode: Episode: 42/1000, Total Reward: 10.0, Epsilon: 0.5301

Episode Progress:   4%|▍         | 43/1000 [23:46<9:26:15, 35.50s/it]

Previous episode: Episode: 43/1000, Total Reward: 7.0, Epsilon: 0.5221

Episode Progress:   4%|▍         | 44/1000 [24:19<9:10:28, 34.55s/it]

Previous episode: Episode: 44/1000, Total Reward: 8.0, Epsilon: 0.5143

Episode Progress:   4%|▍         | 45/1000 [24:55<9:18:59, 35.12s/it]

Previous episode: Episode: 45/1000, Total Reward: 13.0, Epsilon: 0.5066

Episode Progress:   5%|▍         | 46/1000 [25:39<9:59:19, 37.69s/it]

Previous episode: Episode: 46/1000, Total Reward: 11.0, Epsilon: 0.4990

Episode Progress:   5%|▍         | 47/1000 [26:14<9:47:28, 36.99s/it]

Previous episode: Episode: 47/1000, Total Reward: 10.0, Epsilon: 0.4915

Episode Progress:   5%|▍         | 48/1000 [26:57<10:13:00, 38.63s/it]

Previous episode: Episode: 48/1000, Total Reward: 13.0, Epsilon: 0.4841

Episode Progress:   5%|▍         | 49/1000 [27:32<9:58:27, 37.76s/it] 

Previous episode: Episode: 49/1000, Total Reward: 10.0, Epsilon: 0.4768

Episode Progress:   5%|▌         | 50/1000 [28:09<9:51:43, 37.37s/it]

Previous episode: Episode: 50/1000, Total Reward: 10.0, Epsilon: 0.4697

Episode Progress:   5%|▌         | 51/1000 [28:50<10:08:09, 38.45s/it]

Previous episode: Episode: 51/1000, Total Reward: 10.0, Epsilon: 0.4626

Episode Progress:   5%|▌         | 52/1000 [29:29<10:08:51, 38.53s/it]

Previous episode: Episode: 52/1000, Total Reward: 8.0, Epsilon: 0.4557

Episode Progress:   5%|▌         | 53/1000 [30:11<10:27:05, 39.73s/it]

Previous episode: Episode: 53/1000, Total Reward: 10.0, Epsilon: 0.4489

Episode Progress:   5%|▌         | 54/1000 [30:49<10:18:30, 39.23s/it]

Previous episode: Episode: 54/1000, Total Reward: 6.0, Epsilon: 0.4421

Episode Progress:   6%|▌         | 55/1000 [31:30<10:24:52, 39.67s/it]

Previous episode: Episode: 55/1000, Total Reward: 13.0, Epsilon: 0.4355

Episode Progress:   6%|▌         | 56/1000 [32:11<10:31:11, 40.12s/it]

Previous episode: Episode: 56/1000, Total Reward: 7.0, Epsilon: 0.4290

Episode Progress:   6%|▌         | 57/1000 [33:06<11:41:05, 44.61s/it]

Previous episode: Episode: 57/1000, Total Reward: 9.0, Epsilon: 0.4225

Episode Progress:   6%|▌         | 58/1000 [34:11<13:16:31, 50.73s/it]

Previous episode: Episode: 58/1000, Total Reward: 11.0, Epsilon: 0.4162

Episode Progress:   6%|▌         | 59/1000 [35:14<14:11:08, 54.27s/it]

Previous episode: Episode: 59/1000, Total Reward: 7.0, Epsilon: 0.4100

Episode Progress:   6%|▌         | 60/1000 [36:18<14:58:28, 57.35s/it]

Previous episode: Episode: 60/1000, Total Reward: 9.0, Epsilon: 0.4038

Episode Progress:   6%|▌         | 61/1000 [37:15<14:55:01, 57.19s/it]

Previous episode: Episode: 61/1000, Total Reward: 8.0, Epsilon: 0.3977

Episode Progress:   6%|▌         | 62/1000 [38:06<14:26:42, 55.44s/it]

Previous episode: Episode: 62/1000, Total Reward: 8.0, Epsilon: 0.3918

Episode Progress:   6%|▋         | 63/1000 [39:01<14:20:45, 55.12s/it]

Previous episode: Episode: 63/1000, Total Reward: 6.0, Epsilon: 0.3859

Episode Progress:   6%|▋         | 64/1000 [39:52<14:01:39, 53.95s/it]

Previous episode: Episode: 64/1000, Total Reward: 6.0, Epsilon: 0.3801

Episode Progress:   6%|▋         | 65/1000 [40:51<14:25:10, 55.52s/it]

Previous episode: Episode: 65/1000, Total Reward: 7.0, Epsilon: 0.3744

Episode Progress:   7%|▋         | 66/1000 [41:43<14:06:37, 54.39s/it]

Previous episode: Episode: 66/1000, Total Reward: 10.0, Epsilon: 0.3688

Episode Progress:   7%|▋         | 67/1000 [42:49<15:01:55, 58.00s/it]

Previous episode: Episode: 67/1000, Total Reward: 15.0, Epsilon: 0.3633

Episode Progress:   7%|▋         | 68/1000 [43:56<15:41:11, 60.59s/it]

Previous episode: Episode: 68/1000, Total Reward: 10.0, Epsilon: 0.3578

Episode Progress:   7%|▋         | 69/1000 [44:49<15:03:27, 58.22s/it]

Previous episode: Episode: 69/1000, Total Reward: 7.0, Epsilon: 0.3525

Episode Progress:   7%|▋         | 70/1000 [45:39<14:26:35, 55.91s/it]

Previous episode: Episode: 70/1000, Total Reward: 11.0, Epsilon: 0.3472

Episode Progress:   7%|▋         | 71/1000 [46:36<14:29:59, 56.19s/it]

Previous episode: Episode: 71/1000, Total Reward: 8.0, Epsilon: 0.3420

Episode Progress:   7%|▋         | 72/1000 [47:40<15:07:11, 58.65s/it]

Previous episode: Episode: 72/1000, Total Reward: 10.0, Epsilon: 0.3368

Episode Progress:   7%|▋         | 73/1000 [48:33<14:35:48, 56.69s/it]

Previous episode: Episode: 73/1000, Total Reward: 7.0, Epsilon: 0.3318

Episode Progress:   7%|▋         | 74/1000 [49:29<14:33:47, 56.62s/it]

Previous episode: Episode: 74/1000, Total Reward: 8.0, Epsilon: 0.3268

Episode Progress:   8%|▊         | 75/1000 [50:19<14:04:00, 54.75s/it]

Previous episode: Episode: 75/1000, Total Reward: 8.0, Epsilon: 0.3219

Episode Progress:   8%|▊         | 76/1000 [51:14<14:04:14, 54.82s/it]

Previous episode: Episode: 76/1000, Total Reward: 9.0, Epsilon: 0.3171

Episode Progress:   8%|▊         | 77/1000 [52:38<16:16:14, 63.46s/it]

Previous episode: Episode: 77/1000, Total Reward: 8.0, Epsilon: 0.3123

Episode Progress:   8%|▊         | 78/1000 [53:54<17:13:13, 67.24s/it]

Previous episode: Episode: 78/1000, Total Reward: 9.0, Epsilon: 0.3076

Episode Progress:   8%|▊         | 79/1000 [54:58<16:55:34, 66.16s/it]

Previous episode: Episode: 79/1000, Total Reward: 9.0, Epsilon: 0.3030

Episode Progress:   8%|▊         | 80/1000 [55:56<16:19:10, 63.86s/it]

Previous episode: Episode: 80/1000, Total Reward: 9.0, Epsilon: 0.2985

Episode Progress:   8%|▊         | 81/1000 [56:53<15:45:59, 61.76s/it]

Previous episode: Episode: 81/1000, Total Reward: 8.0, Epsilon: 0.2940

Episode Progress:   8%|▊         | 82/1000 [57:58<15:57:05, 62.56s/it]

Previous episode: Episode: 82/1000, Total Reward: 8.0, Epsilon: 0.2896

Episode Progress:   8%|▊         | 83/1000 [59:34<18:29:37, 72.60s/it]

Previous episode: Episode: 83/1000, Total Reward: 12.0, Epsilon: 0.2852

Episode Progress:   8%|▊         | 84/1000 [1:00:33<17:30:17, 68.80s/it]

Previous episode: Episode: 84/1000, Total Reward: 11.0, Epsilon: 0.2810

Episode Progress:   8%|▊         | 85/1000 [1:01:34<16:50:01, 66.23s/it]

Previous episode: Episode: 85/1000, Total Reward: 9.0, Epsilon: 0.2767

Episode Progress:   9%|▊         | 86/1000 [1:02:40<16:49:25, 66.26s/it]

Previous episode: Episode: 86/1000, Total Reward: 10.0, Epsilon: 0.2726

Episode Progress:   9%|▊         | 87/1000 [1:03:39<16:16:45, 64.19s/it]

Previous episode: Episode: 87/1000, Total Reward: 6.0, Epsilon: 0.2685

Episode Progress:   9%|▉         | 88/1000 [1:04:57<17:15:36, 68.13s/it]

Previous episode: Episode: 88/1000, Total Reward: 13.0, Epsilon: 0.2645

Episode Progress:   9%|▉         | 89/1000 [1:06:10<17:36:06, 69.56s/it]

Previous episode: Episode: 89/1000, Total Reward: 8.0, Epsilon: 0.2605

Episode Progress:   9%|▉         | 90/1000 [1:07:25<18:00:10, 71.22s/it]

Previous episode: Episode: 90/1000, Total Reward: 11.0, Epsilon: 0.2566

Episode Progress:   9%|▉         | 91/1000 [1:08:38<18:10:29, 71.98s/it]

Previous episode: Episode: 91/1000, Total Reward: 9.0, Epsilon: 0.2528

Episode Progress:   9%|▉         | 92/1000 [1:10:03<19:07:15, 75.81s/it]

Previous episode: Episode: 92/1000, Total Reward: 11.0, Epsilon: 0.2490

Episode Progress:   9%|▉         | 93/1000 [1:11:13<18:38:19, 73.98s/it]

Previous episode: Episode: 93/1000, Total Reward: 7.0, Epsilon: 0.2452

Episode Progress:   9%|▉         | 94/1000 [1:12:25<18:29:22, 73.47s/it]

Previous episode: Episode: 94/1000, Total Reward: 9.0, Epsilon: 0.2415

Episode Progress:  10%|▉         | 95/1000 [1:13:28<17:37:50, 70.13s/it]

Previous episode: Episode: 95/1000, Total Reward: 8.0, Epsilon: 0.2379

Episode Progress:  10%|▉         | 96/1000 [1:14:52<18:41:45, 74.45s/it]

Previous episode: Episode: 96/1000, Total Reward: 9.0, Epsilon: 0.2344

Episode Progress:  10%|▉         | 97/1000 [1:16:37<20:58:57, 83.65s/it]

Previous episode: Episode: 97/1000, Total Reward: 8.0, Epsilon: 0.2308

Episode Progress:  10%|▉         | 98/1000 [1:17:55<20:31:37, 81.93s/it]

Previous episode: Episode: 98/1000, Total Reward: 11.0, Epsilon: 0.2274

Episode Progress:  10%|▉         | 99/1000 [1:19:05<19:33:58, 78.18s/it]

Previous episode: Episode: 99/1000, Total Reward: 10.0, Epsilon: 0.2240

Episode Progress:  10%|█         | 100/1000 [1:20:33<20:20:02, 81.34s/it]

Previous episode: Episode: 100/1000, Total Reward: 9.0, Epsilon: 0.2206

Episode Progress:  10%|█         | 101/1000 [1:21:46<19:41:12, 78.83s/it]

Previous episode: Episode: 101/1000, Total Reward: 6.0, Epsilon: 0.2173

Episode Progress:  10%|█         | 102/1000 [1:23:03<19:30:20, 78.20s/it]

Previous episode: Episode: 102/1000, Total Reward: 10.0, Epsilon: 0.2140

Episode Progress:  10%|█         | 103/1000 [1:24:45<21:15:00, 85.28s/it]

Previous episode: Episode: 103/1000, Total Reward: 8.0, Epsilon: 0.2108

Episode Progress:  10%|█         | 104/1000 [1:25:59<20:23:51, 81.95s/it]

Previous episode: Episode: 104/1000, Total Reward: 7.0, Epsilon: 0.2077

Episode Progress:  10%|█         | 105/1000 [1:27:23<20:30:06, 82.47s/it]

Previous episode: Episode: 105/1000, Total Reward: 8.0, Epsilon: 0.2046

Episode Progress:  11%|█         | 106/1000 [1:28:41<20:10:04, 81.21s/it]

Previous episode: Episode: 106/1000, Total Reward: 7.0, Epsilon: 0.2015

Episode Progress:  11%|█         | 107/1000 [1:30:11<20:47:17, 83.80s/it]

Previous episode: Episode: 107/1000, Total Reward: 11.0, Epsilon: 0.1985

Episode Progress:  11%|█         | 108/1000 [1:31:25<20:02:38, 80.90s/it]

Previous episode: Episode: 108/1000, Total Reward: 8.0, Epsilon: 0.1955

Episode Progress:  11%|█         | 109/1000 [1:33:02<21:12:53, 85.72s/it]

Previous episode: Episode: 109/1000, Total Reward: 12.0, Epsilon: 0.1926

Episode Progress:  11%|█         | 110/1000 [1:34:25<20:59:41, 84.92s/it]

Previous episode: Episode: 110/1000, Total Reward: 8.0, Epsilon: 0.1897

Episode Progress:  11%|█         | 111/1000 [1:35:51<21:04:28, 85.34s/it]

Previous episode: Episode: 111/1000, Total Reward: 9.0, Epsilon: 0.1868

Episode Progress:  11%|█         | 112/1000 [1:37:23<21:30:39, 87.21s/it]

Previous episode: Episode: 112/1000, Total Reward: 11.0, Epsilon: 0.1840

Episode Progress:  11%|█▏        | 113/1000 [1:39:01<22:19:28, 90.61s/it]

Previous episode: Episode: 113/1000, Total Reward: 8.0, Epsilon: 0.1813

Episode Progress:  11%|█▏        | 114/1000 [1:40:53<23:51:21, 96.93s/it]

Previous episode: Episode: 114/1000, Total Reward: 8.0, Epsilon: 0.1785

Episode Progress:  12%|█▏        | 115/1000 [1:42:47<25:06:46, 102.15s/it]

Previous episode: Episode: 115/1000, Total Reward: 11.0, Epsilon: 0.1759

Episode Progress:  12%|█▏        | 116/1000 [1:44:32<25:17:15, 102.98s/it]

Previous episode: Episode: 116/1000, Total Reward: 5.0, Epsilon: 0.1732

Episode Progress:  12%|█▏        | 117/1000 [1:45:55<23:45:53, 96.89s/it] 

Previous episode: Episode: 117/1000, Total Reward: 9.0, Epsilon: 0.1706

Episode Progress:  12%|█▏        | 118/1000 [1:48:22<27:24:36, 111.88s/it]

Previous episode: Episode: 118/1000, Total Reward: 7.0, Epsilon: 0.1681

Episode Progress:  12%|█▏        | 119/1000 [1:49:39<24:50:31, 101.51s/it]

Previous episode: Episode: 119/1000, Total Reward: 6.0, Epsilon: 0.1655

Episode Progress:  12%|█▏        | 120/1000 [1:50:54<22:51:17, 93.50s/it] 

Previous episode: Episode: 120/1000, Total Reward: 10.0, Epsilon: 0.1631

Episode Progress:  12%|█▏        | 121/1000 [1:52:08<21:25:06, 87.72s/it]

Previous episode: Episode: 121/1000, Total Reward: 6.0, Epsilon: 0.1606

Episode Progress:  12%|█▏        | 122/1000 [1:53:36<21:25:49, 87.87s/it]

Previous episode: Episode: 122/1000, Total Reward: 8.0, Epsilon: 0.1582

Episode Progress:  12%|█▏        | 123/1000 [1:54:52<20:32:25, 84.32s/it]

Previous episode: Episode: 123/1000, Total Reward: 5.0, Epsilon: 0.1558

Episode Progress:  12%|█▏        | 124/1000 [1:56:22<20:55:12, 85.97s/it]

Previous episode: Episode: 124/1000, Total Reward: 7.0, Epsilon: 0.1535

Episode Progress:  12%|█▎        | 125/1000 [1:57:52<21:10:51, 87.14s/it]

Previous episode: Episode: 125/1000, Total Reward: 12.0, Epsilon: 0.1512

Episode Progress:  13%|█▎        | 126/1000 [1:59:11<20:32:06, 84.58s/it]

Previous episode: Episode: 126/1000, Total Reward: 10.0, Epsilon: 0.1489

Episode Progress:  13%|█▎        | 127/1000 [2:00:31<20:12:34, 83.34s/it]

Previous episode: Episode: 127/1000, Total Reward: 6.0, Epsilon: 0.1467

Episode Progress:  13%|█▎        | 128/1000 [2:01:48<19:44:55, 81.53s/it]

Previous episode: Episode: 128/1000, Total Reward: 9.0, Epsilon: 0.1445

Episode Progress:  13%|█▎        | 129/1000 [2:03:19<20:21:32, 84.15s/it]

Previous episode: Episode: 129/1000, Total Reward: 7.0, Epsilon: 0.1423

Episode Progress:  13%|█▎        | 130/1000 [2:05:20<23:02:59, 95.38s/it]

Previous episode: Episode: 130/1000, Total Reward: 8.0, Epsilon: 0.1402

Episode Progress:  13%|█▎        | 131/1000 [2:06:40<21:55:15, 90.81s/it]

Previous episode: Episode: 131/1000, Total Reward: 8.0, Epsilon: 0.1381

Episode Progress:  13%|█▎        | 132/1000 [2:07:58<20:54:14, 86.70s/it]

Previous episode: Episode: 132/1000, Total Reward: 6.0, Epsilon: 0.1360

Episode Progress:  13%|█▎        | 133/1000 [2:09:32<21:24:53, 88.92s/it]

Previous episode: Episode: 133/1000, Total Reward: 9.0, Epsilon: 0.1340

Episode Progress:  13%|█▎        | 134/1000 [2:11:00<21:22:36, 88.86s/it]

Previous episode: Episode: 134/1000, Total Reward: 9.0, Epsilon: 0.1320

Episode Progress:  14%|█▎        | 135/1000 [2:12:18<20:33:19, 85.55s/it]

Previous episode: Episode: 135/1000, Total Reward: 7.0, Epsilon: 0.1300

Episode Progress:  14%|█▎        | 136/1000 [2:14:18<22:57:53, 95.69s/it]

Previous episode: Episode: 136/1000, Total Reward: 7.0, Epsilon: 0.1280

Episode Progress:  14%|█▎        | 137/1000 [2:15:57<23:14:32, 96.96s/it]

Previous episode: Episode: 137/1000, Total Reward: 8.0, Epsilon: 0.1261

Episode Progress:  14%|█▍        | 138/1000 [2:17:33<23:08:46, 96.67s/it]

Previous episode: Episode: 138/1000, Total Reward: 6.0, Epsilon: 0.1242

Episode Progress:  14%|█▍        | 139/1000 [2:18:57<22:10:59, 92.75s/it]

Previous episode: Episode: 139/1000, Total Reward: 8.0, Epsilon: 0.1224

Episode Progress:  14%|█▍        | 140/1000 [2:20:37<22:40:24, 94.91s/it]

Previous episode: Episode: 140/1000, Total Reward: 5.0, Epsilon: 0.1205

Episode Progress:  14%|█▍        | 141/1000 [2:22:12<22:40:56, 95.06s/it]

Previous episode: Episode: 141/1000, Total Reward: 7.0, Epsilon: 0.1187

Episode Progress:  14%|█▍        | 142/1000 [2:23:38<21:58:44, 92.22s/it]

Previous episode: Episode: 142/1000, Total Reward: 7.0, Epsilon: 0.1169

Episode Progress:  14%|█▍        | 143/1000 [2:25:06<21:38:39, 90.92s/it]

Previous episode: Episode: 143/1000, Total Reward: 6.0, Epsilon: 0.1152

Episode Progress:  14%|█▍        | 144/1000 [2:27:11<24:01:44, 101.06s/it]

Previous episode: Episode: 144/1000, Total Reward: 8.0, Epsilon: 0.1135

Episode Progress:  14%|█▍        | 145/1000 [2:28:59<24:31:44, 103.28s/it]

Previous episode: Episode: 145/1000, Total Reward: 8.0, Epsilon: 0.1118

Episode Progress:  15%|█▍        | 146/1000 [2:30:32<23:44:49, 100.11s/it]

Previous episode: Episode: 146/1000, Total Reward: 7.0, Epsilon: 0.1101

Episode Progress:  15%|█▍        | 147/1000 [2:32:50<26:23:37, 111.39s/it]

Previous episode: Episode: 147/1000, Total Reward: 5.0, Epsilon: 0.1084

Episode Progress:  15%|█▍        | 148/1000 [2:35:18<28:59:17, 122.49s/it]

Previous episode: Episode: 148/1000, Total Reward: 8.0, Epsilon: 0.1068

Episode Progress:  15%|█▍        | 148/1000 [2:36:07<14:58:45, 63.29s/it] 


KeyboardInterrupt: 

In [15]:
model.save_weights('weights/brennan.weights.h5') # Change this to 'weights/[yourname].weights.h5'