# Notebook de Prueba de algoritmos DQN y DDQN

In [1]:
import numpy as np
import gym
import random
import torch
import mario_utils

In [2]:
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Running on {DEVICE}")

Running on cuda:0


In [3]:
import torch.nn as nn
import torch.nn.functional as F

class DQN_Linear_Model(nn.Module):
    def __init__(self,  n_features, n_actions):
        super().__init__()
        self.fc1 = nn.Linear(in_features=n_features, out_features=512)
        self.fc2 = nn.Linear(512, 256)
        self.output = nn.Linear(in_features=256, out_features= n_actions)

    def forward(self, env_input):
        result = env_input.reshape((-1, self.fc1.in_features))
        result = F.relu(self.fc1(result))
        result = F.relu(self.fc2(result))
        return self.output(result)


In [4]:
ENV_NAME = 'MountainCar-v0'
env = gym.make(ENV_NAME)
n_features = env.observation_space.shape[0]
n_actions = env.action_space.n

In [5]:
print(env.observation_space.low)
print(env.observation_space.high)

[-1.2  -0.07]
[0.6  0.07]


In [6]:
import numpy as np


def discretization(obs):
    # observation_space low y high nos permiten conocer los límites de nuestro ambiente para los valores de Posicion y Velocidad.
    env_low = np.array([-1.2, -0.07])
    env_high = np.array([0.6, 0.07])

    env_den = (env_high - env_low) / 40
    pos_den = env_den[0]
    vel_den = env_den[1]

    pos_low = env_low[0]
    vel_low = env_low[1]

    pos_scaled = int((obs[0] - pos_low) / pos_den)
    vel_scaled = int((obs[1] - vel_low) / vel_den)

    return pos_scaled, vel_scaled

In [7]:
def process_state(observation):
    # Transforme la observacion en un tensor de floats.
    observation = discretization(observation)
    observation = torch.tensor(observation).float()   
    return observation

In [8]:
from SDFA import Agent
# Cada vez que hacemos un experimento reseteamos la semilla para tener reproducibilidad
env.seed(42)
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True


In [21]:
#Hiperparámetros de entrenamiento del agente DQN
TOTAL_STEPS =10000000
EPISODES = 2000
STEPS = 10000000

EPSILON_INI = 1
EPSILON_MIN = 0.10
EPSILON_DECAY = 0.99998599985
EPSILON_TIME = EPISODES
EPISODE_BLOCK = 100
SAVE_EVERY = 200

BATCH_SIZE = 64
BUFFER_SIZE = 300

GAMMA = 0.99
LEARNING_RATE = 1e-4


### Entrenamiento DQN

In [22]:
net = DQN_Linear_Model(n_features,n_actions).to(DEVICE)

In [23]:
from dqn_agent import DQNAgent
agent = Agent(env, net, process_state, BUFFER_SIZE, BATCH_SIZE, LEARNING_RATE, GAMMA, epsilon_i= EPSILON_INI, epsilon_f=EPSILON_MIN, epsilon_anneal_time=EPSILON_TIME, epsilon_decay = EPSILON_DECAY, episode_block = EPISODE_BLOCK)

In [24]:
rewards = agent.train(EPISODES, STEPS, TOTAL_STEPS, writer_name = ENV_NAME)

  0%|          | 0/200000 [00:00<?, ? episodes/s]

  0%|          | 1/200000 [00:01<79:13:19,  1.43s/ episodes]

Episode 0 - Avg. Reward over the last 100 episodes -200.0 epsilon 0.9991045000000001 total steps 200


  0%|          | 27/200000 [01:09<160:06:37,  2.88s/ episodes]

In [20]:
rewards

[-200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0,
 -200.0]

### Video DQN

In [14]:
#initial environment
env = gym.make(ENV_NAME)
wrapped_env = mario_utils.wrap_env(env)
agent.record_test_episode(wrapped_env)
wrapped_env.close()

AttributeError: 'ImageData' object has no attribute 'data'

### Entrenamiento DDQN

In [None]:
neta = DQN_Linear_Model(n_features,n_actions).to(DEVICE)
netb = DQN_Linear_Model(n_features,n_actions).to(DEVICE)

In [None]:
from double_dqn_agent import DoubleDQNAgent
agent = DoubleDQNAgent(env, neta,netb, process_state, BUFFER_SIZE, BATCH_SIZE, LEARNING_RATE, GAMMA, epsilon_i= EPSILON_INI, epsilon_f=EPSILON_MIN, epsilon_anneal_time=EPSILON_TIME, epsilon_decay = EPSILON_DECAY, episode_block = EPISODE_BLOCK)

In [None]:
rewards = agent.train(EPISODES, STEPS, TOTAL_STEPS, writer_name = ENV_NAME)

In [None]:
#Windows cambiar para Linux, copiar del obligatorio
from IPython.display import HTML
from pyvirtualdisplay import Display

display = Display(visible=0, size=(1400, 900),color_depth=16)
display.start()

### Video DDQN

In [None]:
env = gym.make(ENV_NAME)
wrapped_env = mario_utils.wrap_env(env)
agent.record_test_episode(wrapped_env)
wrapped_env.close()

### Carga de datos y continuación entrenamiento (Ej DQN)

In [None]:
# Specify the path to your .dat file
weights_path = '/content/GenericDQNAgent(-139.51).dat'

# Create an instance of the model
model = DQN_Linear_Model(n_features,n_actions).to(DEVICE)

# Load the saved model weights
model.load_state_dict(torch.load(weights_path))
model.to(DEVICE)

agent3 = DQNAgent(env, model, process_state, BUFFER_SIZE, BATCH_SIZE, LEARNING_RATE, GAMMA, epsilon_i= EPSILON_INI, epsilon_f=EPSILON_MIN, epsilon_anneal_time=EPSILON_TIME, epsilon_decay = EPSILON_DECAY, episode_block = EPISODE_BLOCK,save_every = 1000)

In [None]:
#initial environment
env = gym.make(ENV_NAME)
wrapped_env = mario_utils.wrap_env(env)
agent3.record_test_episode(wrapped_env)
wrapped_env.close()