In [1]:
# ---------  Algoritmo A2C aplicado a alunizaje   -------- #
#
# ------  UBA - Aprendizaje por Refuerzo II - 2025  ------ #
#


!apt-get update
!apt-get install -y xvfb python-opengl ffmpeg swig build-essential > /dev/null 2>&1
!pip install swig
!pip install gymnasium[box2d]
!pip install stable-baselines3[extra] moviepy
!pip install pyvirtualdisplay > /dev/null 2>&1
print("--- Instalación de dependencias completada (o intentada). ---")

import os
import gymnasium
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecVideoRecorder, VecEnv
print("Stable Baselines3 importado correctamente.")

env_id = 'LunarLander-v3'
num_cpu = 4  # numero de procesos/entornos paralelos
video_folder = 'logs/videos/'  # sub carpeta de colab
video_length = 2000 # Duración del video en pasos
log_dir = "/tmp/gym/"
os.makedirs(log_dir, exist_ok=True)
os.makedirs(video_folder, exist_ok=True)

train_env = make_vec_env(env_id, n_envs=num_cpu)  # Entorno vectorizado para entrenamiento

# esto es para renderizar en un video
record_env_raw = gymnasium.make(env_id, render_mode='rgb_array')

# creo el agente A2C
model = A2C(
    policy='MlpPolicy',
    env=train_env,
    n_steps=5,
    gamma=0.99,
    gae_lambda=1.0,
    ent_coef=0.0,
    vf_coef=0.5,
    max_grad_norm=0.5,
    learning_rate=7e-4,
    use_rms_prop=True,
    verbose=1,
    seed=42
)

print("Iniciando entrenamiento...")
model.learn(total_timesteps=300_000) # probar con 200_000
print("Entrenamiento completado.")

print("Grabando video del alunizaje...")
record_env = VecVideoRecorder(make_vec_env(lambda: record_env_raw, n_envs=1), video_folder,
                           record_video_trigger=lambda x: x == 0, video_length=video_length,
                           name_prefix=f"a2c-{env_id}")

obs = record_env.reset()  # VecEnv requiere un estado inicial
for _ in range(video_length + 1):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = record_env.step(action)

record_env.close()
print(f"Video guardado en la carpeta: {video_folder}")

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,659 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:8 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [77.5 kB]
Get:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:12 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,715 kB]
Hit:13 https:

  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)


Using cpu device
Iniciando entrenamiento...
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 116      |
|    ep_rew_mean        | -229     |
| time/                 |          |
|    fps                | 2224     |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.04    |
|    explained_variance | 0.12     |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -7.72    |
|    value_loss         | 104      |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 145      |
|    ep_rew_mean        | -258     |
| time/                 |          |
|    fps                | 2149     |
|    iterations         | 200      |
|    time_elapsed       | 1        |
|    total_timesteps    | 4000 

  """


Moviepy - Building video /content/logs/videos/a2c-LunarLander-v3-step-0-to-step-2000.mp4.
Moviepy - Writing video /content/logs/videos/a2c-LunarLander-v3-step-0-to-step-2000.mp4





Moviepy - Done !
Moviepy - video ready /content/logs/videos/a2c-LunarLander-v3-step-0-to-step-2000.mp4
Video guardado en la carpeta: logs/videos/
