In [None]:
%pip install -q swig
%pip install -q "gymnasium[box2d]"

import numpy as np
from itertools import product

from DQN import DQNConfig
from train import train_loop
from evaluation import evaluate_policy

import warnings
warnings.filterwarnings("ignore", message="pkg_resources is deprecated")

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [None]:
LR_list = [5e-4, 3e-4, 1e-4]
HIDDEN_SIZES_list = [(32, 32), (64, 64)]
TARGET_UPDATE_FREQ_list = [500, 1_000]

cfg_list = []

for lr,  hidde_sizes, target_update_freq in product(LR_list, HIDDEN_SIZES_list, TARGET_UPDATE_FREQ_list):
    cfg = DQNConfig(
        LR=lr,
        HIDDEN_SIZES=hidde_sizes,
        TARGET_UPDATE_FREQ=target_update_freq,
    )
    cfg.variant = f"Double_LR({lr})_HIDDEN_SIZES({hidde_sizes}_TARGET_UPDATE_FREQ({target_update_freq}))"
    cfg_list.append(cfg)

print(cfg_list[-1].variant)

In [4]:
for i, cfg in enumerate(cfg_list):
    print(f"Running experiment {i+1}: {cfg.variant}")
    result, agent_double = train_loop(cfg, variant=cfg.variant, output_mode="progress_bar")
    metrics = evaluate_policy(agent_double.select_deterministic_action, num_episodes=100, seed=0)
    print(f"mean_return: {metrics["mean_return"]:6.1f} solved_rate: {metrics["solved_rate"]}")

Running experiment 1: Double_LR(0.0005)_HIDDEN_SIZES((32, 32)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [03:15<00:00, 2551.25it/s]


mean_return:   63.4 solved_rate: 0.20
Running experiment 2: Double_LR(0.0005)_HIDDEN_SIZES((32, 32)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [04:44<00:00, 1757.35it/s]


mean_return:  100.3 solved_rate: 0.38
Running experiment 3: Double_LR(0.0005)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [03:34<00:00, 2331.00it/s]


mean_return:  101.4 solved_rate: 0.13
Running experiment 4: Double_LR(0.0005)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [03:31<00:00, 2367.96it/s]


mean_return:  157.2 solved_rate: 0.38
Running experiment 5: Double_LR(0.0003)_HIDDEN_SIZES((32, 32)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [27:06<00:00, 307.32it/s]  


mean_return:  246.7 solved_rate: 0.94
Running experiment 6: Double_LR(0.0003)_HIDDEN_SIZES((32, 32)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [04:14<00:00, 1964.98it/s]


mean_return:  191.1 solved_rate: 0.59
Running experiment 7: Double_LR(0.0003)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [03:26<00:00, 2416.38it/s]


mean_return:  122.7 solved_rate: 0.46
Running experiment 8: Double_LR(0.0003)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [03:28<00:00, 2403.23it/s]


mean_return:   90.1 solved_rate: 0.50
Running experiment 9: Double_LR(0.0001)_HIDDEN_SIZES((32, 32)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [03:13<00:00, 2583.11it/s]


mean_return:   -8.4 solved_rate: 0.17
Running experiment 10: Double_LR(0.0001)_HIDDEN_SIZES((32, 32)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [03:14<00:00, 2577.11it/s]


mean_return:   16.7 solved_rate: 0.08
Running experiment 11: Double_LR(0.0001)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [03:26<00:00, 2423.35it/s]


mean_return:  160.5 solved_rate: 0.43
Running experiment 12: Double_LR(0.0001)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [03:23<00:00, 2456.65it/s]


mean_return:   95.5 solved_rate: 0.07


In [5]:
LR_list = [5e-4, 3e-4, 1e-4]
HIDDEN_SIZES_list = [(64, 64), (128, 128)]
TARGET_UPDATE_FREQ_list = [500, 1_000]

cfg_list = []

for lr,  hidde_sizes, target_update_freq in product(LR_list, HIDDEN_SIZES_list, TARGET_UPDATE_FREQ_list):
    cfg = DQNConfig(
        TRAIN_START_SIZE = 10_000,
        LR=lr,
        HIDDEN_SIZES=hidde_sizes,
        TARGET_UPDATE_FREQ=target_update_freq,
    )
    cfg.variant = f"Dueling_LR({lr})_HIDDEN_SIZES({hidde_sizes}_TARGET_UPDATE_FREQ({target_update_freq}))"
    cfg_list.append(cfg)

print(cfg_list[-1].variant)

Dueling_LR(0.0001)_HIDDEN_SIZES((128, 128)_TARGET_UPDATE_FREQ(1000))


In [6]:
for i, cfg in enumerate(cfg_list):
    print(f"Running experiment {i+1}: {cfg.variant}")
    result, agent_double = train_loop(cfg, variant=cfg.variant, output_mode="progress_bar")
    metrics = evaluate_policy(agent_double.select_deterministic_action, num_episodes=100, seed=0)
    print(f"mean_return: {metrics["mean_return"]:6.1f} solved_rate: {metrics["solved_rate"]}")

Running experiment 1: Dueling_LR(0.0005)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [04:00<00:00, 2080.19it/s]


mean_return: -684.8 solved_rate: 0.0
Running experiment 2: Dueling_LR(0.0005)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [04:05<00:00, 2038.14it/s]


mean_return:  190.3 solved_rate: 0.6
Running experiment 3: Dueling_LR(0.0005)_HIDDEN_SIZES((128, 128)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [28:31<00:00, 292.07it/s]  


mean_return:  174.7 solved_rate: 0.45
Running experiment 4: Dueling_LR(0.0005)_HIDDEN_SIZES((128, 128)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [04:37<00:00, 1803.51it/s]


mean_return:  274.9 solved_rate: 0.95
Running experiment 5: Dueling_LR(0.0003)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [03:59<00:00, 2086.13it/s]


mean_return:  267.8 solved_rate: 0.96
Running experiment 6: Dueling_LR(0.0003)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [04:02<00:00, 2064.64it/s]


mean_return:  131.1 solved_rate: 0.27
Running experiment 7: Dueling_LR(0.0003)_HIDDEN_SIZES((128, 128)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [04:32<00:00, 1833.70it/s]


mean_return:   96.2 solved_rate: 0.27
Running experiment 8: Dueling_LR(0.0003)_HIDDEN_SIZES((128, 128)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [04:30<00:00, 1850.27it/s]


mean_return:  164.1 solved_rate: 0.67
Running experiment 9: Dueling_LR(0.0001)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [03:59<00:00, 2084.88it/s]


mean_return:  106.9 solved_rate: 0.39
Running experiment 10: Dueling_LR(0.0001)_HIDDEN_SIZES((64, 64)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [14:47<00:00, 563.47it/s]  


mean_return:   30.8 solved_rate: 0.09
Running experiment 11: Dueling_LR(0.0001)_HIDDEN_SIZES((128, 128)_TARGET_UPDATE_FREQ(500))


100%|██████████| 500000/500000 [04:37<00:00, 1799.15it/s]


mean_return:  244.0 solved_rate: 0.88
Running experiment 12: Dueling_LR(0.0001)_HIDDEN_SIZES((128, 128)_TARGET_UPDATE_FREQ(1000))


100%|██████████| 500000/500000 [04:35<00:00, 1814.89it/s]


mean_return:  266.9 solved_rate: 0.96
