# DQN Flappy Bird y Cargar Modelo entrenado

In [1]:
import time
import flappy_bird_gymnasium
import gymnasium
env = gymnasium.make("FlappyBird-v0")

In [2]:
print("_____OBSERVATION SPACE_____ \n")
print("Observation Space", env.observation_space)
print("Sample observation", env.observation_space.sample()) # Get a random observation

_____OBSERVATION SPACE_____ 

Observation Space Box(-inf, inf, (12,), float64)
Sample observation [ 0.32662476 -0.64422425  0.28158268  0.98390701  0.43359274 -1.10963593
 -0.4105028  -0.04454359  0.89713509  0.8021827  -0.35538383 -2.49038922]


In [3]:
print("\n _____ACTION SPACE_____ \n")
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample()) # Take a random action


 _____ACTION SPACE_____ 

Action Space Shape 2
Action Space Sample 1


In [4]:
# Modulos
import torch
import gymnasium as gym
from stable_baselines3 import DQN
import os

In [5]:
models_dir="models/Dqn"
logdir="logs_flappy"

if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
if not os.path.exists(logdir):
    os.makedirs(logdir)

In [16]:
#!pip install tensorboard

## Entrenar al Agente

In [6]:
from torch.utils.tensorboard import SummaryWriter

In [7]:
#Modelo
model = DQN("MlpPolicy", env, verbose=1, tensorboard_log=logdir, buffer_size=10_000) #device="cuda"

#Entrenamiento
TIMESTEPS=4_000_000

model.learn(total_timesteps=TIMESTEPS, tb_log_name="DQN_flappybird")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs_flappy\DQN_flappybird_5


<stable_baselines3.dqn.dqn.DQN at 0x1e5bf93d460>

In [23]:
#!pip install tensorboard
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [24]:
%tensorboard --logdir logs_flappy

ERROR: Could not find `tensorboard`. Please ensure that your PATH
contains an executable `tensorboard` program, or explicitly specify
the path to a TensorBoard binary by setting the `TENSORBOARD_BINARY`
environment variable.

In [None]:
# Para guardar varias fases del entrenamiento
# Para no tener que entrenar desde 0 el modelo: reset_num_timesteps=False

#for i in range(1,100):
#    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name="DQN_flappybird")
#    model.save(f"{models_dir}/{TIMESTEPS*i}")

### Evaluar al Agente

In [None]:
import time
import flappy_bird_gymnasium
import gymnasium
env = gymnasium.make("FlappyBird-v0")

obs, _ = env.reset()
while True:
    # Next action:
    # (feed the observation to your agent here)
    action = model.predict(obs, deterministic=False)

    # Processing:
    obs, reward, terminated, _, info = env.step(action)
    
    # Rendering the game:
    # (remove this two lines during training)
    env.render()
    time.sleep(1 / 30)  # FPS
    
    # Checking if the player is still alive
    if terminated:
        break

env.close()

## Cargar Modelo ya entrenado

In [9]:
# Cargar Modelo de Colab
model = DQN.load("dqn_flappybird.zip")

Exception: Can't get attribute '_make_function' on <module 'cloudpickle.cloudpickle' from 'C:\\Users\\driao\\AppData\\Roaming\\Python\\Python39\\site-packages\\cloudpickle\\cloudpickle.py'>
Exception: Can't get attribute '_make_function' on <module 'cloudpickle.cloudpickle' from 'C:\\Users\\driao\\AppData\\Roaming\\Python\\Python39\\site-packages\\cloudpickle\\cloudpickle.py'>


## Probar el Agente entrenado

In [None]:
import gymnasium
import time

import gymnasium
import numpy as np
import pygame
import tensorflow as tf

import flappy_bird_gymnasium

def play(epoch=10, audio_on=True, render=True):
    env = gymnasium.make("FlappyBird-v0", audio_on=audio_on)

    # init models
    #model = DQN("MlpPolicy", env, verbose=1, tensorboard_log=logdir, buffer_size=10_000)
    model = DQN.load("dqn_flappybird.zip")

    # run
    for _ in range(epoch):
        clock = pygame.time.Clock()
        score = 0

        state, _ = env.reset(seed=123)
        state = np.expand_dims(state, axis=0)
        while True:
            if render:
                env.render()

            # Getting action
            action, _ = model.predict(state, deterministic=False)
            action = np.array(action, copy=False, dtype=env.env.action_space.dtype)

            if render:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        pygame.quit()

            # Processing action
            next_state, reward, done, _, info = env.step(action)

            state = np.expand_dims(next_state, axis=0)
            score += reward
            print(f"Obs: {state}\n" f"Action: {action}\n" f"Score: {score}\n")

            if render:
                clock.tick(30)

            if done:
                if render:
                    env.render()
                    time.sleep(0.6)
                break

    env.close()
    assert state.shape == (1, 12)
    assert info["score"] > 0
    assert score > 10.999999999999977


def test_play():
    play(epoch=1, audio_on=False, render=False)


if __name__ == "__main__":
    play()