Dependencies

In [None]:
!pip install gymnasium[box2d]
!pip install swig
!pip install gymnasium[box2d] imageio

Collecting box2d==2.3.10 (from gymnasium[box2d])
  Downloading Box2D-2.3.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (573 bytes)
Collecting swig==4.* (from gymnasium[box2d])
  Downloading swig-4.4.1-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (3.5 kB)
Downloading Box2D-2.3.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.7/3.7 MB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading swig-4.4.1-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m103.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: swig, box2d
Successfully installed box2d-2.3.10 swig-4.4.1


Exploring environment

In [None]:
import gymnasium as gym

env = gym.make("LunarLander-v3")

In [None]:

env.spec

EnvSpec(id='LunarLander-v3', entry_point='gymnasium.envs.box2d.lunar_lander:LunarLander', reward_threshold=200, nondeterministic=False, max_episode_steps=1000, order_enforce=True, disable_env_checker=False, kwargs={}, namespace=None, name='LunarLander', version=3, additional_wrappers=(), vector_entry_point=None)

In [None]:
print(env.observation_space.low)

[ -2.5        -2.5       -10.        -10.         -6.2831855 -10.
  -0.         -0.       ]


In [None]:
print(env.observation_space)
print(env.observation_space.shape[0])

Box([ -2.5        -2.5       -10.        -10.         -6.2831855 -10.
  -0.         -0.       ], [ 2.5        2.5       10.        10.         6.2831855 10.
  1.         1.       ], (8,), float32)
8


In [None]:
print(env.action_space)
print(env.action_space.n)

Discrete(3)
3


In [None]:
print(env.action_space.sample())
print(env.observation_space.sample())

3
[ 1.3810347   0.6009971  -1.8855329  -7.364891   -1.457443    4.1657453
  0.8680399   0.07304133]


In [None]:
obs,info = env.reset()

In [None]:
info

{}

In [None]:
total_reward = 0
obs,info = env.reset()
for step in range(1000):
  action = env.action_space.sample()
  obs,reward,terminated,truncated,info=env.step(action)
  total_reward = total_reward + reward
  print(obs,reward,terminated,truncated,step)
  if terminated or truncated:
    break
print(total_reward)

[ 0.01496038  1.3951428   0.75660574 -0.36347583 -0.01714099 -0.16962102
  0.          0.        ] -1.021309789015703 False False 0
[ 0.02234554  1.3874038   0.7476031  -0.34408885 -0.02613104 -0.17981772
  0.          0.        ] 1.2046999860138612 False False 1
[ 0.02988405  1.3799744   0.76227057 -0.3303698  -0.03446109 -0.16661623
  0.          0.        ] -1.1840282487433342 False False 2
[ 0.03742275  1.3719456   0.76229453 -0.3570464  -0.0427905  -0.16660377
  0.          0.        ] -1.1473324621094037 False False 3
[ 0.04505224  1.363301    0.77367574 -0.38453802 -0.05340695 -0.21234863
  0.          0.        ] -2.4706446441021215 False False 4
[ 0.05260887  1.3540562   0.764525   -0.41121688 -0.06218233 -0.17552371
  0.          0.        ] -0.42383831431436536 False False 5
[ 0.06016579  1.3442124   0.7645488  -0.43789157 -0.07095759 -0.17552157
  0.          0.        ] -1.222530915300382 False False 6
[ 0.06786213  1.3349495   0.7780915  -0.4121105  -0.07933521 -0.1675678

Training DQN

In [None]:
import gymnasium as gym
import numpy as np
import random
import copy
import imageio

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from collections import deque
import os


os.makedirs("results", exist_ok=True)
MODEL_DIR = "./results/models"
os.makedirs(MODEL_DIR, exist_ok=True)
MODEL_PATH = os.path.join(MODEL_DIR, "dqn_lunarlander.pt")

env = gym.make("LunarLander-v3", render_mode="rgb_array")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Q Network
class Q_Network(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

# Replay Buffer
class ReplayBuffer:
    def __init__(self, capacity, device):
        self.buffer = deque(maxlen=capacity)
        self.device = device

    def add(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        return (
            torch.tensor(states, dtype=torch.float32, device=self.device),
            torch.tensor(actions, dtype=torch.long, device=self.device),
            torch.tensor(rewards, dtype=torch.float32, device=self.device),
            torch.tensor(next_states, dtype=torch.float32, device=self.device),
            torch.tensor(dones, dtype=torch.float32, device=self.device),
        )

    def __len__(self):
        return len(self.buffer)


# Evaluate function
def evaluate_and_record(env, q_net, device, max_steps=1000):
    q_net.eval()
    frames = []

    state, _ = env.reset()

    total_reward = 0

    for _ in range(max_steps):
        frame = env.render()
        frames.append(frame)

        with torch.no_grad():
            q_vals = q_net(
                torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
            )
            action = torch.argmax(q_vals).item()

        next_state, reward, terminated, truncated, _ = env.step(action)


        total_reward += reward
        state = next_state

        if terminated or truncated:
            break

    q_net.train()
    return total_reward, np.array(frames)


q_net = Q_Network(
    input_size=env.observation_space.shape[0],
    hidden_size=64,
    output_size=env.action_space.n
).to(device)

target_q_net = copy.deepcopy(q_net).to(device)
target_q_net.eval()

optimizer = optim.Adam(q_net.parameters(), lr=1e-3)
loss_fn = nn.SmoothL1Loss()

buffer = ReplayBuffer(capacity=50_000, device=device)


# Hyper parameters
num_episodes = 200
batch_size = 32
gamma = 0.98

epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.995

target_update_every = 20
eval_every = 100

reward_list = []
epsilon_list = []
eval_rewards = []
eval_videos = []


# Train loop
for eps in range(num_episodes):
    state, _ = env.reset()
    total_reward = 0

    for step in range(1000):
        # Epsilon-greedy exploration
        if random.random() < epsilon:
            action = env.action_space.sample()
        else:
            with torch.no_grad():
                q_vals = q_net(
                    torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
                )
                action = torch.argmax(q_vals).item()

        next_state, reward, terminated, truncated, _ = env.step(action)

        buffer.add(state, action, reward, next_state, terminated or truncated)
        state = next_state
        total_reward += reward

        if len(buffer) >= batch_size:
            states, actions, rewards, next_states, dones = buffer.sample(batch_size)

            with torch.no_grad():
                target_q = rewards + gamma * torch.max(
                    target_q_net(next_states), dim=1
                )[0] * (1 - dones)

            current_q = q_net(states).gather(1, actions.unsqueeze(1)).squeeze(1)
            loss = loss_fn(current_q, target_q)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if terminated or truncated:
            break

    # Target network update
    if eps % target_update_every == 0:
        target_q_net.load_state_dict(q_net.state_dict())

    # Epsilon decay
    epsilon = max(epsilon_min, epsilon * epsilon_decay)

    reward_list.append(total_reward)
    epsilon_list.append(epsilon)

    print(f"Episode {eps:4d} | Reward: {total_reward:7.2f} | Epsilon: {epsilon:.3f}")

    # Evaluation
    if eps % eval_every == 0:
        eval_reward, frames = evaluate_and_record(env, q_net, device)
        eval_rewards.append(eval_reward)
        eval_videos.append(frames)

        print(f"[EVAL] Episode {eps} | Eval reward: {eval_reward:.2f}")

        video_path = f"./results/lander_eval_ep{eps}.mp4"
        imageio.mimsave(video_path, frames, fps=30)
        print(f"[VIDEO SAVED] {video_path}")

        torch.save(
        {
            "episode": eps,
            "model_state_dict": q_net.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "epsilon": epsilon,
            "eval_reward": eval_reward,
        },
        MODEL_PATH
        )

env.close()


Device: cuda


  torch.tensor(states, dtype=torch.float32, device=self.device),


Episode    0 | Reward: -241.18 | Epsilon: 0.995
[EVAL] Episode 0 | Eval reward: -669.25




[VIDEO SAVED] ./results/lander_eval_ep0.mp4
Episode    1 | Reward:  -79.37 | Epsilon: 0.990
Episode    2 | Reward: -203.36 | Epsilon: 0.985
Episode    3 | Reward: -100.23 | Epsilon: 0.980
Episode    4 | Reward: -108.37 | Epsilon: 0.975
Episode    5 | Reward: -268.84 | Epsilon: 0.970
Episode    6 | Reward: -249.35 | Epsilon: 0.966
Episode    7 | Reward: -108.52 | Epsilon: 0.961
Episode    8 | Reward: -416.53 | Epsilon: 0.956
Episode    9 | Reward: -160.31 | Epsilon: 0.951
Episode   10 | Reward: -105.42 | Epsilon: 0.946
Episode   11 | Reward: -214.12 | Epsilon: 0.942
Episode   12 | Reward: -101.53 | Epsilon: 0.937
Episode   13 | Reward: -113.20 | Epsilon: 0.932
Episode   14 | Reward: -205.45 | Epsilon: 0.928
Episode   15 | Reward: -205.11 | Epsilon: 0.923
Episode   16 | Reward: -172.56 | Epsilon: 0.918
Episode   17 | Reward: -304.20 | Epsilon: 0.914
Episode   18 | Reward: -120.18 | Epsilon: 0.909
Episode   19 | Reward: -101.73 | Epsilon: 0.905
Episode   20 | Reward: -118.28 | Epsilon: 0.



[EVAL] Episode 100 | Eval reward: -208.30
[VIDEO SAVED] ./results/lander_eval_ep100.mp4
Episode  101 | Reward:  -25.66 | Epsilon: 0.600
Episode  102 | Reward: -161.24 | Epsilon: 0.597
Episode  103 | Reward:  -22.47 | Epsilon: 0.594
Episode  104 | Reward:  -38.06 | Epsilon: 0.591
Episode  105 | Reward:  -86.07 | Epsilon: 0.588
Episode  106 | Reward:  -69.64 | Epsilon: 0.585
Episode  107 | Reward: -101.36 | Epsilon: 0.582
Episode  108 | Reward:  -11.95 | Epsilon: 0.579
Episode  109 | Reward: -105.43 | Epsilon: 0.576
Episode  110 | Reward:   33.06 | Epsilon: 0.573
Episode  111 | Reward:  -54.53 | Epsilon: 0.570
Episode  112 | Reward: -104.86 | Epsilon: 0.568
Episode  113 | Reward: -113.93 | Epsilon: 0.565
Episode  114 | Reward:   60.38 | Epsilon: 0.562
Episode  115 | Reward:  -46.40 | Epsilon: 0.559
Episode  116 | Reward: -210.62 | Epsilon: 0.556
Episode  117 | Reward:  -28.70 | Epsilon: 0.554
Episode  118 | Reward:  -53.29 | Epsilon: 0.551
Episode  119 | Reward:  -53.94 | Epsilon: 0.548




[EVAL] Episode 200 | Eval reward: -202.19
[VIDEO SAVED] ./results/lander_eval_ep200.mp4
Episode  201 | Reward: -128.37 | Epsilon: 0.363
Episode  202 | Reward:   -7.52 | Epsilon: 0.361
Episode  203 | Reward:   -4.51 | Epsilon: 0.360
Episode  204 | Reward: -115.46 | Epsilon: 0.358
Episode  205 | Reward:  -30.02 | Epsilon: 0.356
Episode  206 | Reward:  -85.42 | Epsilon: 0.354
Episode  207 | Reward: -155.59 | Epsilon: 0.353
Episode  208 | Reward:  -11.06 | Epsilon: 0.351
Episode  209 | Reward:   43.03 | Epsilon: 0.349
Episode  210 | Reward:  -34.70 | Epsilon: 0.347
Episode  211 | Reward:   54.31 | Epsilon: 0.346
Episode  212 | Reward: -136.73 | Epsilon: 0.344
Episode  213 | Reward:   34.71 | Epsilon: 0.342
Episode  214 | Reward:  -51.30 | Epsilon: 0.340
Episode  215 | Reward:   59.02 | Epsilon: 0.339
Episode  216 | Reward: -165.15 | Epsilon: 0.337
Episode  217 | Reward:  -77.06 | Epsilon: 0.335
Episode  218 | Reward: -113.10 | Epsilon: 0.334
Episode  219 | Reward:  -35.63 | Epsilon: 0.332




[EVAL] Episode 300 | Eval reward: -119.26
[VIDEO SAVED] ./results/lander_eval_ep300.mp4
Episode  301 | Reward:  134.34 | Epsilon: 0.220
Episode  302 | Reward: -102.46 | Epsilon: 0.219
Episode  303 | Reward:   22.63 | Epsilon: 0.218
Episode  304 | Reward:  -77.90 | Epsilon: 0.217
Episode  305 | Reward: -214.16 | Epsilon: 0.216
Episode  306 | Reward: -113.72 | Epsilon: 0.215
Episode  307 | Reward: -129.09 | Epsilon: 0.214
Episode  308 | Reward:  -76.62 | Epsilon: 0.212
Episode  309 | Reward: -172.66 | Epsilon: 0.211
Episode  310 | Reward:  -90.53 | Epsilon: 0.210
Episode  311 | Reward:  -79.48 | Epsilon: 0.209
Episode  312 | Reward:  -67.59 | Epsilon: 0.208
Episode  313 | Reward:    3.38 | Epsilon: 0.207
Episode  314 | Reward: -110.35 | Epsilon: 0.206
Episode  315 | Reward: -134.82 | Epsilon: 0.205
Episode  316 | Reward: -122.11 | Epsilon: 0.204
Episode  317 | Reward: -142.98 | Epsilon: 0.203
Episode  318 | Reward: -142.05 | Epsilon: 0.202
Episode  319 | Reward: -130.03 | Epsilon: 0.201




[EVAL] Episode 400 | Eval reward: 171.82
[VIDEO SAVED] ./results/lander_eval_ep400.mp4
Episode  401 | Reward:  -80.11 | Epsilon: 0.133
Episode  402 | Reward:   57.87 | Epsilon: 0.133
Episode  403 | Reward: -108.41 | Epsilon: 0.132
Episode  404 | Reward:  124.32 | Epsilon: 0.131
Episode  405 | Reward: -172.12 | Epsilon: 0.131
Episode  406 | Reward: -102.49 | Epsilon: 0.130
Episode  407 | Reward:  -98.78 | Epsilon: 0.129
Episode  408 | Reward:    7.84 | Epsilon: 0.129
Episode  409 | Reward:    3.28 | Epsilon: 0.128
Episode  410 | Reward: -127.65 | Epsilon: 0.127
Episode  411 | Reward:  -90.50 | Epsilon: 0.127
Episode  412 | Reward:   96.22 | Epsilon: 0.126
Episode  413 | Reward: -122.33 | Epsilon: 0.126
Episode  414 | Reward:  174.85 | Epsilon: 0.125
Episode  415 | Reward: -134.23 | Epsilon: 0.124
Episode  416 | Reward: -119.86 | Epsilon: 0.124
Episode  417 | Reward:  270.11 | Epsilon: 0.123
Episode  418 | Reward: -147.00 | Epsilon: 0.122
Episode  419 | Reward: -112.31 | Epsilon: 0.122
E



[EVAL] Episode 500 | Eval reward: -83.37
[VIDEO SAVED] ./results/lander_eval_ep500.mp4
Episode  501 | Reward:  176.17 | Epsilon: 0.081
Episode  502 | Reward:  -46.33 | Epsilon: 0.080
Episode  503 | Reward: -108.77 | Epsilon: 0.080
Episode  504 | Reward:  -80.34 | Epsilon: 0.080
Episode  505 | Reward:  141.81 | Epsilon: 0.079
Episode  506 | Reward:  189.24 | Epsilon: 0.079
Episode  507 | Reward:  -19.06 | Epsilon: 0.078
Episode  508 | Reward: -158.34 | Epsilon: 0.078
Episode  509 | Reward:  171.20 | Epsilon: 0.078
Episode  510 | Reward:  -71.63 | Epsilon: 0.077
Episode  511 | Reward: -119.58 | Epsilon: 0.077
Episode  512 | Reward:  173.02 | Epsilon: 0.076
Episode  513 | Reward: -115.03 | Epsilon: 0.076
Episode  514 | Reward:  -64.39 | Epsilon: 0.076
Episode  515 | Reward: -123.29 | Epsilon: 0.075
Episode  516 | Reward:  -95.49 | Epsilon: 0.075
Episode  517 | Reward: -149.41 | Epsilon: 0.075
Episode  518 | Reward:  166.99 | Epsilon: 0.074
Episode  519 | Reward:  -60.20 | Epsilon: 0.074
E



[EVAL] Episode 600 | Eval reward: 143.25
[VIDEO SAVED] ./results/lander_eval_ep600.mp4
Episode  601 | Reward:  -86.39 | Epsilon: 0.050
Episode  602 | Reward:   -7.40 | Epsilon: 0.050
Episode  603 | Reward: -126.81 | Epsilon: 0.050
Episode  604 | Reward:   99.49 | Epsilon: 0.050
Episode  605 | Reward:  207.99 | Epsilon: 0.050
Episode  606 | Reward:  191.70 | Epsilon: 0.050
Episode  607 | Reward:  167.71 | Epsilon: 0.050
Episode  608 | Reward: -123.45 | Epsilon: 0.050
Episode  609 | Reward:  -75.87 | Epsilon: 0.050
Episode  610 | Reward:  157.22 | Epsilon: 0.050
Episode  611 | Reward:  235.49 | Epsilon: 0.050
Episode  612 | Reward:  204.99 | Epsilon: 0.050
Episode  613 | Reward:  267.48 | Epsilon: 0.050
Episode  614 | Reward:  203.16 | Epsilon: 0.050
Episode  615 | Reward:  219.82 | Epsilon: 0.050
Episode  616 | Reward:  253.50 | Epsilon: 0.050
Episode  617 | Reward:  149.80 | Epsilon: 0.050
Episode  618 | Reward:   45.04 | Epsilon: 0.050
Episode  619 | Reward:  177.17 | Epsilon: 0.050
E



[EVAL] Episode 700 | Eval reward: 225.85
[VIDEO SAVED] ./results/lander_eval_ep700.mp4
Episode  701 | Reward: -110.43 | Epsilon: 0.050
Episode  702 | Reward:  137.15 | Epsilon: 0.050
Episode  703 | Reward:  189.19 | Epsilon: 0.050
Episode  704 | Reward:  146.64 | Epsilon: 0.050
Episode  705 | Reward:  219.74 | Epsilon: 0.050
Episode  706 | Reward:  169.69 | Epsilon: 0.050
Episode  707 | Reward:  204.65 | Epsilon: 0.050
Episode  708 | Reward:  135.22 | Epsilon: 0.050
Episode  709 | Reward:  -93.87 | Epsilon: 0.050
Episode  710 | Reward:  202.83 | Epsilon: 0.050
Episode  711 | Reward:   79.55 | Epsilon: 0.050
Episode  712 | Reward:   79.97 | Epsilon: 0.050
Episode  713 | Reward:   66.98 | Epsilon: 0.050
Episode  714 | Reward:  175.82 | Epsilon: 0.050
Episode  715 | Reward:  231.42 | Epsilon: 0.050
Episode  716 | Reward:  -61.35 | Epsilon: 0.050
Episode  717 | Reward:  171.69 | Epsilon: 0.050
Episode  718 | Reward:  162.99 | Epsilon: 0.050
Episode  719 | Reward:  124.35 | Epsilon: 0.050
E



[EVAL] Episode 800 | Eval reward: 13.36
[VIDEO SAVED] ./results/lander_eval_ep800.mp4
Episode  801 | Reward:  140.36 | Epsilon: 0.050
Episode  802 | Reward:  184.96 | Epsilon: 0.050
Episode  803 | Reward:  188.68 | Epsilon: 0.050
Episode  804 | Reward:  266.38 | Epsilon: 0.050
Episode  805 | Reward:  206.91 | Epsilon: 0.050
Episode  806 | Reward:  240.23 | Epsilon: 0.050
Episode  807 | Reward:  126.65 | Epsilon: 0.050
Episode  808 | Reward:  219.54 | Epsilon: 0.050
Episode  809 | Reward:  211.66 | Epsilon: 0.050
Episode  810 | Reward: -199.25 | Epsilon: 0.050
Episode  811 | Reward:   -7.43 | Epsilon: 0.050
Episode  812 | Reward:  161.16 | Epsilon: 0.050
Episode  813 | Reward:  121.71 | Epsilon: 0.050
Episode  814 | Reward:    1.88 | Epsilon: 0.050
Episode  815 | Reward:  182.20 | Epsilon: 0.050
Episode  816 | Reward:  196.86 | Epsilon: 0.050
Episode  817 | Reward:  184.06 | Epsilon: 0.050
Episode  818 | Reward:  190.54 | Epsilon: 0.050
Episode  819 | Reward:  -96.25 | Epsilon: 0.050
Ep



[EVAL] Episode 900 | Eval reward: 133.54
[VIDEO SAVED] ./results/lander_eval_ep900.mp4
Episode  901 | Reward:  282.54 | Epsilon: 0.050
Episode  902 | Reward:  300.66 | Epsilon: 0.050
Episode  903 | Reward:  148.45 | Epsilon: 0.050
Episode  904 | Reward:  132.23 | Epsilon: 0.050
Episode  905 | Reward:  145.60 | Epsilon: 0.050
Episode  906 | Reward:  146.70 | Epsilon: 0.050
Episode  907 | Reward:  146.03 | Epsilon: 0.050
Episode  908 | Reward:  200.35 | Epsilon: 0.050
Episode  909 | Reward:  141.44 | Epsilon: 0.050
Episode  910 | Reward:   68.33 | Epsilon: 0.050
Episode  911 | Reward:  229.59 | Epsilon: 0.050
Episode  912 | Reward:  140.35 | Epsilon: 0.050
Episode  913 | Reward:  187.45 | Epsilon: 0.050
Episode  914 | Reward:   28.70 | Epsilon: 0.050
Episode  915 | Reward:   81.07 | Epsilon: 0.050
Episode  916 | Reward:  184.39 | Epsilon: 0.050
Episode  917 | Reward:  203.62 | Epsilon: 0.050
Episode  918 | Reward:  189.99 | Epsilon: 0.050
Episode  919 | Reward:  172.15 | Epsilon: 0.050
E



[EVAL] Episode 1000 | Eval reward: -3.52
[VIDEO SAVED] ./results/lander_eval_ep1000.mp4
Episode 1001 | Reward:  151.46 | Epsilon: 0.050
Episode 1002 | Reward:  221.81 | Epsilon: 0.050
Episode 1003 | Reward:  194.68 | Epsilon: 0.050
Episode 1004 | Reward:  202.80 | Epsilon: 0.050
Episode 1005 | Reward:  202.09 | Epsilon: 0.050
Episode 1006 | Reward:  -29.99 | Epsilon: 0.050
Episode 1007 | Reward:   -7.36 | Epsilon: 0.050
Episode 1008 | Reward:  226.61 | Epsilon: 0.050
Episode 1009 | Reward:  114.55 | Epsilon: 0.050
Episode 1010 | Reward:  211.70 | Epsilon: 0.050
Episode 1011 | Reward:  225.00 | Epsilon: 0.050
Episode 1012 | Reward:  169.00 | Epsilon: 0.050
Episode 1013 | Reward:  171.00 | Epsilon: 0.050
Episode 1014 | Reward:  158.31 | Epsilon: 0.050
Episode 1015 | Reward:   65.19 | Epsilon: 0.050
Episode 1016 | Reward:   80.78 | Epsilon: 0.050
Episode 1017 | Reward:  194.60 | Epsilon: 0.050
Episode 1018 | Reward:  192.47 | Epsilon: 0.050
Episode 1019 | Reward:  246.24 | Epsilon: 0.050




[EVAL] Episode 1100 | Eval reward: 265.26
[VIDEO SAVED] ./results/lander_eval_ep1100.mp4
Episode 1101 | Reward:  189.30 | Epsilon: 0.050
Episode 1102 | Reward:   -8.03 | Epsilon: 0.050
Episode 1103 | Reward:  173.43 | Epsilon: 0.050
Episode 1104 | Reward:  212.39 | Epsilon: 0.050
Episode 1105 | Reward:  165.38 | Epsilon: 0.050
Episode 1106 | Reward:  125.99 | Epsilon: 0.050
Episode 1107 | Reward:  -26.28 | Epsilon: 0.050
Episode 1108 | Reward:   21.69 | Epsilon: 0.050
Episode 1109 | Reward:  193.19 | Epsilon: 0.050
Episode 1110 | Reward:  145.53 | Epsilon: 0.050
Episode 1111 | Reward:  202.91 | Epsilon: 0.050
Episode 1112 | Reward:  212.44 | Epsilon: 0.050
Episode 1113 | Reward:  221.85 | Epsilon: 0.050
Episode 1114 | Reward:  172.40 | Epsilon: 0.050
Episode 1115 | Reward:  140.97 | Epsilon: 0.050
Episode 1116 | Reward:  179.63 | Epsilon: 0.050
Episode 1117 | Reward:   21.52 | Epsilon: 0.050
Episode 1118 | Reward:  185.88 | Epsilon: 0.050
Episode 1119 | Reward:  -84.22 | Epsilon: 0.050



[EVAL] Episode 1200 | Eval reward: 235.12
[VIDEO SAVED] ./results/lander_eval_ep1200.mp4
Episode 1201 | Reward:  154.03 | Epsilon: 0.050
Episode 1202 | Reward:  145.96 | Epsilon: 0.050
Episode 1203 | Reward:  217.09 | Epsilon: 0.050
Episode 1204 | Reward:  283.14 | Epsilon: 0.050
Episode 1205 | Reward:  235.35 | Epsilon: 0.050
Episode 1206 | Reward:  269.71 | Epsilon: 0.050
Episode 1207 | Reward:   74.77 | Epsilon: 0.050
Episode 1208 | Reward:  183.34 | Epsilon: 0.050
Episode 1209 | Reward:  255.93 | Epsilon: 0.050
Episode 1210 | Reward:   81.08 | Epsilon: 0.050
Episode 1211 | Reward:  282.33 | Epsilon: 0.050
Episode 1212 | Reward:  192.68 | Epsilon: 0.050
Episode 1213 | Reward:  195.05 | Epsilon: 0.050
Episode 1214 | Reward:   69.25 | Epsilon: 0.050
Episode 1215 | Reward:  219.57 | Epsilon: 0.050
Episode 1216 | Reward:   44.55 | Epsilon: 0.050
Episode 1217 | Reward:  297.49 | Epsilon: 0.050
Episode 1218 | Reward:  294.15 | Epsilon: 0.050
Episode 1219 | Reward:  261.68 | Epsilon: 0.050



[EVAL] Episode 1300 | Eval reward: 117.38
[VIDEO SAVED] ./results/lander_eval_ep1300.mp4
Episode 1301 | Reward:  256.41 | Epsilon: 0.050
Episode 1302 | Reward:  154.80 | Epsilon: 0.050
Episode 1303 | Reward:  260.97 | Epsilon: 0.050
Episode 1304 | Reward:  218.76 | Epsilon: 0.050
Episode 1305 | Reward:  201.08 | Epsilon: 0.050
Episode 1306 | Reward:  246.92 | Epsilon: 0.050
Episode 1307 | Reward:  228.06 | Epsilon: 0.050
Episode 1308 | Reward:  247.62 | Epsilon: 0.050
Episode 1309 | Reward:  241.18 | Epsilon: 0.050
Episode 1310 | Reward:  -77.36 | Epsilon: 0.050
Episode 1311 | Reward:  269.37 | Epsilon: 0.050
Episode 1312 | Reward:  242.05 | Epsilon: 0.050
Episode 1313 | Reward:  189.78 | Epsilon: 0.050
Episode 1314 | Reward:   68.51 | Epsilon: 0.050
Episode 1315 | Reward:   49.41 | Epsilon: 0.050
Episode 1316 | Reward:  261.07 | Epsilon: 0.050
Episode 1317 | Reward:  220.73 | Epsilon: 0.050
Episode 1318 | Reward:  215.58 | Epsilon: 0.050
Episode 1319 | Reward:  223.57 | Epsilon: 0.050



[EVAL] Episode 1400 | Eval reward: 242.31
[VIDEO SAVED] ./results/lander_eval_ep1400.mp4
Episode 1401 | Reward:  226.81 | Epsilon: 0.050
Episode 1402 | Reward:  193.93 | Epsilon: 0.050
Episode 1403 | Reward:  217.73 | Epsilon: 0.050
Episode 1404 | Reward:  222.47 | Epsilon: 0.050
Episode 1405 | Reward:   22.13 | Epsilon: 0.050
Episode 1406 | Reward:  195.24 | Epsilon: 0.050
Episode 1407 | Reward:  223.57 | Epsilon: 0.050
Episode 1408 | Reward:  199.39 | Epsilon: 0.050
Episode 1409 | Reward:  250.74 | Epsilon: 0.050
Episode 1410 | Reward:  265.07 | Epsilon: 0.050
Episode 1411 | Reward:  272.51 | Epsilon: 0.050
Episode 1412 | Reward:  272.26 | Epsilon: 0.050
Episode 1413 | Reward:  244.12 | Epsilon: 0.050
Episode 1414 | Reward:  224.94 | Epsilon: 0.050
Episode 1415 | Reward:  242.96 | Epsilon: 0.050
Episode 1416 | Reward:  266.16 | Epsilon: 0.050
Episode 1417 | Reward:  239.05 | Epsilon: 0.050
Episode 1418 | Reward:  233.94 | Epsilon: 0.050
Episode 1419 | Reward:  218.24 | Epsilon: 0.050



[EVAL] Episode 1500 | Eval reward: 266.92
[VIDEO SAVED] ./results/lander_eval_ep1500.mp4
Episode 1501 | Reward:  232.53 | Epsilon: 0.050
Episode 1502 | Reward:  183.74 | Epsilon: 0.050
Episode 1503 | Reward:  142.73 | Epsilon: 0.050
Episode 1504 | Reward:   21.66 | Epsilon: 0.050
Episode 1505 | Reward:  274.48 | Epsilon: 0.050
Episode 1506 | Reward:  -24.00 | Epsilon: 0.050
Episode 1507 | Reward:    4.81 | Epsilon: 0.050
Episode 1508 | Reward:    5.19 | Epsilon: 0.050
Episode 1509 | Reward:  246.60 | Epsilon: 0.050
Episode 1510 | Reward:  270.60 | Epsilon: 0.050
Episode 1511 | Reward:  211.65 | Epsilon: 0.050
Episode 1512 | Reward:   14.89 | Epsilon: 0.050
Episode 1513 | Reward:   65.96 | Epsilon: 0.050
Episode 1514 | Reward:  -12.22 | Epsilon: 0.050
Episode 1515 | Reward:   13.65 | Epsilon: 0.050
Episode 1516 | Reward:  -35.51 | Epsilon: 0.050
Episode 1517 | Reward:   -3.36 | Epsilon: 0.050
Episode 1518 | Reward:  124.60 | Epsilon: 0.050
Episode 1519 | Reward:  270.42 | Epsilon: 0.050



[EVAL] Episode 1600 | Eval reward: -39.52
[VIDEO SAVED] ./results/lander_eval_ep1600.mp4
Episode 1601 | Reward:   38.15 | Epsilon: 0.050
Episode 1602 | Reward:   20.84 | Epsilon: 0.050
Episode 1603 | Reward:  -34.96 | Epsilon: 0.050
Episode 1604 | Reward:   12.32 | Epsilon: 0.050
Episode 1605 | Reward: -148.78 | Epsilon: 0.050
Episode 1606 | Reward:   31.44 | Epsilon: 0.050
Episode 1607 | Reward:  -17.07 | Epsilon: 0.050
Episode 1608 | Reward:  -70.55 | Epsilon: 0.050
Episode 1609 | Reward:  272.57 | Epsilon: 0.050
Episode 1610 | Reward:  296.28 | Epsilon: 0.050
Episode 1611 | Reward:   24.97 | Epsilon: 0.050
Episode 1612 | Reward:  219.63 | Epsilon: 0.050
Episode 1613 | Reward:   -4.95 | Epsilon: 0.050
Episode 1614 | Reward:  214.94 | Epsilon: 0.050
Episode 1615 | Reward:  217.99 | Epsilon: 0.050
Episode 1616 | Reward:  -50.33 | Epsilon: 0.050
Episode 1617 | Reward:   13.19 | Epsilon: 0.050
Episode 1618 | Reward:   35.32 | Epsilon: 0.050
Episode 1619 | Reward:   50.40 | Epsilon: 0.050



[EVAL] Episode 1700 | Eval reward: 225.59
[VIDEO SAVED] ./results/lander_eval_ep1700.mp4
Episode 1701 | Reward:  218.32 | Epsilon: 0.050
Episode 1702 | Reward:  -39.02 | Epsilon: 0.050
Episode 1703 | Reward:  -10.82 | Epsilon: 0.050
Episode 1704 | Reward: -154.11 | Epsilon: 0.050
Episode 1705 | Reward:   -7.70 | Epsilon: 0.050
Episode 1706 | Reward:  -31.38 | Epsilon: 0.050
Episode 1707 | Reward:    2.18 | Epsilon: 0.050
Episode 1708 | Reward:   16.06 | Epsilon: 0.050
Episode 1709 | Reward:   32.77 | Epsilon: 0.050
Episode 1710 | Reward:  227.30 | Epsilon: 0.050
Episode 1711 | Reward:  211.77 | Epsilon: 0.050
Episode 1712 | Reward:   35.58 | Epsilon: 0.050
Episode 1713 | Reward:   39.34 | Epsilon: 0.050
Episode 1714 | Reward:  239.97 | Epsilon: 0.050
Episode 1715 | Reward:  252.65 | Epsilon: 0.050
Episode 1716 | Reward:   27.80 | Epsilon: 0.050
Episode 1717 | Reward:  264.32 | Epsilon: 0.050
Episode 1718 | Reward:  226.10 | Epsilon: 0.050
Episode 1719 | Reward:   -9.35 | Epsilon: 0.050



[EVAL] Episode 1800 | Eval reward: -36.23
[VIDEO SAVED] ./results/lander_eval_ep1800.mp4
Episode 1801 | Reward:   39.48 | Epsilon: 0.050
Episode 1802 | Reward:  257.05 | Epsilon: 0.050
Episode 1803 | Reward:  254.15 | Epsilon: 0.050
Episode 1804 | Reward:  176.87 | Epsilon: 0.050
Episode 1805 | Reward:  -11.39 | Epsilon: 0.050
Episode 1806 | Reward:   -3.59 | Epsilon: 0.050
Episode 1807 | Reward:   10.94 | Epsilon: 0.050
Episode 1808 | Reward:  -11.37 | Epsilon: 0.050
Episode 1809 | Reward:  224.22 | Epsilon: 0.050
Episode 1810 | Reward:  223.10 | Epsilon: 0.050
Episode 1811 | Reward:  253.46 | Epsilon: 0.050
Episode 1812 | Reward:  273.93 | Epsilon: 0.050
Episode 1813 | Reward:  -52.26 | Epsilon: 0.050
Episode 1814 | Reward:   24.78 | Epsilon: 0.050
Episode 1815 | Reward:    9.84 | Epsilon: 0.050
Episode 1816 | Reward:  247.56 | Epsilon: 0.050
Episode 1817 | Reward:   -1.76 | Epsilon: 0.050
Episode 1818 | Reward:  -19.94 | Epsilon: 0.050
Episode 1819 | Reward:  -48.55 | Epsilon: 0.050



[EVAL] Episode 1900 | Eval reward: 115.66
[VIDEO SAVED] ./results/lander_eval_ep1900.mp4
Episode 1901 | Reward:  279.36 | Epsilon: 0.050
Episode 1902 | Reward:  260.59 | Epsilon: 0.050
Episode 1903 | Reward:   17.37 | Epsilon: 0.050
Episode 1904 | Reward: -135.44 | Epsilon: 0.050
Episode 1905 | Reward:  236.43 | Epsilon: 0.050
Episode 1906 | Reward:   -6.40 | Epsilon: 0.050
Episode 1907 | Reward:  231.13 | Epsilon: 0.050
Episode 1908 | Reward:  181.04 | Epsilon: 0.050
Episode 1909 | Reward:  263.50 | Epsilon: 0.050
Episode 1910 | Reward:   19.41 | Epsilon: 0.050
Episode 1911 | Reward:  -38.18 | Epsilon: 0.050
Episode 1912 | Reward:  236.75 | Epsilon: 0.050
Episode 1913 | Reward:   31.45 | Epsilon: 0.050
Episode 1914 | Reward:  252.92 | Epsilon: 0.050
Episode 1915 | Reward:   -6.06 | Epsilon: 0.050
Episode 1916 | Reward:   40.03 | Epsilon: 0.050
Episode 1917 | Reward:  221.37 | Epsilon: 0.050
Episode 1918 | Reward:  189.81 | Epsilon: 0.050
Episode 1919 | Reward:  245.41 | Epsilon: 0.050

In [None]:
!zip -r results.zip results
from google.colab import files
files.download("results.zip")

  adding: results/ (stored 0%)
  adding: results/lander_eval_ep1300.mp4 (deflated 15%)
  adding: results/lander_eval_ep200.mp4 (deflated 7%)
  adding: results/lander_eval_ep600.mp4 (deflated 9%)
  adding: results/lander_eval_ep1400.mp4 (deflated 12%)
  adding: results/lander_eval_ep1700.mp4 (deflated 10%)
  adding: results/lander_eval_ep0.mp4 (deflated 6%)
  adding: results/lander_eval_ep300.mp4 (deflated 7%)
  adding: results/lander_eval_ep1100.mp4 (deflated 13%)
  adding: results/lander_eval_ep900.mp4 (deflated 11%)
  adding: results/lander_eval_ep800.mp4 (deflated 11%)
  adding: results/lander_eval_ep400.mp4 (deflated 10%)
  adding: results/lander_eval_ep700.mp4 (deflated 11%)
  adding: results/lander_eval_ep1600.mp4 (deflated 10%)
  adding: results/models/ (stored 0%)
  adding: results/models/dqn_lunarlander.pt (deflated 21%)
  adding: results/lander_eval_ep1500.mp4 (deflated 12%)
  adding: results/lander_eval_ep1800.mp4 (deflated 12%)
  adding: results/lander_eval_ep100.mp4 (defla

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>