**Imports**

In [None]:
import os
import torch
import numpy as np
import random
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torchinfo import summary
import gymnasium
import ale_py
from utils import make_env, process_state
from IPython.display import Video
from dqn_agent import DQNAgent
from dqn_cnn_model import DQN_CNN_Model
from IPython.display import clear_output
import matplotlib.pyplot as plt
from double_dqn_agent import DoubleDQNAgent


In [2]:
SEED = 23

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic=True # https://discuss.pytorch.org/t/what-is-the-differenc-between-cudnn-deterministic-and-cudnn-benchmark/38054
torch.backends.cudnn.benchmark=True # https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/4
np.random.seed(SEED)
random.seed(SEED)

In [3]:
DEVICE = "cpu"
if torch.cuda.is_available():
    DEVICE = "cuda"  
elif torch.backends.mps.is_available():
    DEVICE = "mps" 

In [4]:
GRAY_SCALE = True 
SCREEN_SIZE = 84 
NUM_STACKED_FRAMES = 4 
SKIP_FRAMES = 4 
ENV_NAME = "ALE/Breakout-v5" 

## Entrenamiento

In [5]:
#Hiperparámetros de entrenamiento del agente DQN
TOTAL_STEPS = 10_000_000
EPISODES = 10_000
STEPS_PER_EPISODE = 20_000

EPSILON_INI = 1
EPSILON_MIN = 0.05
EPSILON_ANNEAL_STEPS = 1_000_000

EPISODE_BLOCK = 100

BATCH_SIZE = 32
BUFFER_SIZE = 50_000

GAMMA = 0.995
LEARNING_RATE = 1e-5

In [8]:
env = make_env(ENV_NAME,
                video_folder='./videos/dqn_training',
                name_prefix="breakout",
                record_every=500,
                grayscale=GRAY_SCALE,
                screen_size=SCREEN_SIZE,
                stack_frames=NUM_STACKED_FRAMES,
                skip_frames=SKIP_FRAMES
                )

net = DQN_CNN_Model(env.observation_space.shape, env.action_space.n).to(DEVICE)

dqn_agent = DQNAgent(env, net, process_state, BUFFER_SIZE, BATCH_SIZE, LEARNING_RATE, GAMMA, 
                     epsilon_i=EPSILON_INI, epsilon_f=EPSILON_MIN, 
                     epsilon_anneal_steps=EPSILON_ANNEAL_STEPS, 
                     episode_block=EPISODE_BLOCK, device=DEVICE)



rewards_dqn = dqn_agent.train(EPISODES, STEPS_PER_EPISODE, TOTAL_STEPS)

env.close()

  logger.warn(
Training:   0%|          | 0/10000 [00:00<?, ?episode/s]


AssertionError: Torch not compiled with CUDA enabled

In [7]:
env = make_env(ENV_NAME,
                video_folder='./videos/ddqn_training',
                name_prefix="breakout",
                record_every=500,
                grayscale=GRAY_SCALE,
                screen_size=SCREEN_SIZE,
                stack_frames=NUM_STACKED_FRAMES,
                skip_frames=SKIP_FRAMES
                )


modelo_a = DQN_CNN_Model(env.observation_space.shape, env.action_space.n).to(DEVICE)
modelo_b = DQN_CNN_Model(env.observation_space.shape, env.action_space.n).to(DEVICE)

ddqn_agent = DoubleDQNAgent(env, modelo_a, modelo_b, process_state, BUFFER_SIZE, BATCH_SIZE, LEARNING_RATE, GAMMA, epsilon_i= EPSILON_INI, epsilon_f=EPSILON_MIN, epsilon_anneal_steps=EPSILON_ANNEAL_STEPS, episode_block = EPISODE_BLOCK, device=DEVICE)

ddqn_rewards = ddqn_agent.train(EPISODES, STEPS_PER_EPISODE, TOTAL_STEPS)

ddqn_agent.save_checkpoint("DDQN.pth")

  logger.warn(
Training:   0%|          | 0/10000 [00:00<?, ?episode/s]


AssertionError: Torch not compiled with CUDA enabled