# **<p style="text-align: center;">Aprendizaje por refuerzo - Ingeniería en Inteligencia Artificial</p>**
## **<p style="text-align: center;">Trabajo práctico Final - SAC</p>**

#### <p style="text-align: center;">Pettinari Fausto, Schuemer Ignacio, Torres Santiago </p>
#### <p style="text-align: center;">Profesores: Claudio Pose, Gabriel Torre, Nicolás Romero, Tomás Chimenti</p>

# **Donkey Car VAE Enviroment**

In [1]:
import sys
from pathlib import Path

# Add project roots to import path
REPO_ROOT = Path("..").resolve()
sys.path.append(str(REPO_ROOT))
sys.path.append(str(REPO_ROOT / "learning-to-drive-in-5-minutes"))


In [2]:
import sys
import numpy as np
import gymnasium as gym
import torch

# Alias gymnasium as gym for donkey_gym, which expects the old API
sys.modules.setdefault("gym", gym)
sys.modules.setdefault("gym.spaces", gym.spaces)
sys.modules.setdefault("gym.envs", gym.envs)
sys.modules.setdefault("gym.envs.registration", gym.envs.registration)
sys.modules.setdefault("gym.utils", gym.utils)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
import os
from pathlib import Path
import numpy as np
import gymnasium as gym
from gymnasium.envs.registration import EnvSpec
from typing import Any
from donkey_gym.envs.vae_env import DonkeyVAEEnv
from config import LEVEL, FRAME_SKIP, MIN_THROTTLE, MAX_THROTTLE, MAX_CTE_ERROR
from aae_train_donkeycar.ae.autoencoder import load_ae

# Configure simulator path/port here
# os.environ.setdefault("DONKEY_SIM_PATH", str(Path("/home/san/Documents/Ingenieria UdeSA/RL/donkey_simulator/build_sdsandbox.x86_64")))
os.environ['DONKEY_SIM_PATH'] = '/home/san/Documents/Ingenieria UdeSA/RL/donkey_simulator/build_sdsandbox.x86_64'
# os.environ['DONKEY_SIM_PATH'] = '/home/san/Documents/Ingenieria UdeSA/RL/DonkeySimLinux/donkey_sim.x86_64'

# Default Unity build listens on 9090; change if you launch with --port X
# os.environ.setdefault("DONKEY_SIM_PORT", "9091")
os.environ['DONKEY_SIM_PORT'] = '9090'
# Optional: fail fast instead of waiting forever
os.environ.setdefault("DONKEY_WAIT_TIMEOUT", "30")
# os.environ['DONKEY_SIM_HEADLESS'] = '1'
os.environ.setdefault("DONKEY_SKIP_WAIT", "1")

MAX_EPISODE_STEPS = 1000  # fallback horizon for wrappers expecting a finite episode length
# vae_archive_dir = REPO_ROOT / "learning-to-drive-in-5-minutes" / "ae-32_avc" / "archive" / "data.pkl"
vae_archive_dir = REPO_ROOT / "learning-to-drive-in-5-minutes" / "ae-32_avc" / "vae.pth"
print(f"Loading VAE weights from extracted archive dir: {vae_archive_dir}")
assert vae_archive_dir.exists(), "VAE archive folder not found."

class GymnasiumDonkeyWrapper(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}

    def __init__(self, donkey_env: DonkeyVAEEnv):
        self.donkey_env = donkey_env
        self._step_count = 0
        obs_dim = int(np.prod(donkey_env.observation_space.shape))
        self.observation_space = gym.spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(obs_dim,),
            dtype=np.float32,
        )
        self.action_space = gym.spaces.Box(
            low=donkey_env.action_space.low.astype(np.float32),
            high=donkey_env.action_space.high.astype(np.float32),
            dtype=np.float32,
        )
        self.spec = EnvSpec(id="DonkeyVae-v0", max_episode_steps=MAX_EPISODE_STEPS)
        self.render_mode = "human"
        self.max_episode_steps = MAX_EPISODE_STEPS

    @staticmethod
    def _flatten_obs(obs: Any) -> np.ndarray:
        return np.asarray(obs, dtype=np.float32).reshape(-1)

    def reset(self, seed=None, options=None):
        self._step_count = 0
        if seed is not None:
            self.donkey_env.seed(seed)
        obs = self.donkey_env.reset()
        return self._flatten_obs(obs), {}

    def step(self, action):
        obs, reward, done, info = self.donkey_env.step(action)
        self._step_count += 1
        terminated = bool(done)
        truncated = self._step_count >= self.max_episode_steps
        obs = self._flatten_obs(obs)
        info = info or {}
        if truncated:
            info.setdefault("TimeLimit.truncated", True)
        return obs, reward, terminated, truncated, info

    def render(self, mode="human"):
        return self.donkey_env.render(mode=mode)

    def close(self):
        try:
            self.donkey_env.close()
        except Exception:
            pass


def make_donkey_vae_env(device: torch.device):
    vae = load_ae(vae_archive_dir, z_size=32)
    donkey_env = DonkeyVAEEnv(
        level=LEVEL,
        frame_skip=FRAME_SKIP,
        vae=vae,
        const_throttle=None,
        min_throttle=MIN_THROTTLE,
        max_throttle=MAX_THROTTLE,
        max_cte_error=MAX_CTE_ERROR,
        n_command_history=0,
    )
    return GymnasiumDonkeyWrapper(donkey_env)


env = make_donkey_vae_env(device=device)
print(f"Env ready with obs shape {env.observation_space.shape} and action shape {env.action_space.shape}")


Loading VAE weights from extracted archive dir: /home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/ae-32_avc/vae.pth
Dim AE = 32
PyTorch 2.7.1+cu126
Starting DonkeyGym env
Donkey subprocess started
Waiting for sim to start...if the simulation is running, press EXIT to go back to the menu
Binding to ('0.0.0.0', 9090)


Gtk-Message: 11:14:08.139: Failed to load module "gail"
Gtk-Message: 11:14:08.139: Failed to load module "atk-bridge"

























GTK+ 2.x symbols detected. Using GTK+ 2.x and GTK+ 3 in the same process is not supported.
Gtk-Message: 11:14:08.174: Failed to load module "canberra-gtk-module"


Waiting for sim to start...if the simulation is running, press EXIT to go back to the menu
Waiting for sim to start...if the simulation is running, press EXIT to go back to the menu
Got a new client ('127.0.0.1', 52582)
Scene Selection Ready
Available scene names: ['generated_road', 'warehouse', 'sparkfun_avc', 'generated_track']


Unhandled exception in client_connected_cb
transport: <_SelectorSocketTransport fd=129 read=idle write=<idle, bufsize=0>>
Traceback (most recent call last):
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/core/tcp_server.py", line 75, in handle_client
    await self.sim_handler.run()
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/core/tcp_server.py", line 149, in run
    self.close()
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/core/tcp_server.py", line 173, in close
    self.msg_handler.on_disconnect()
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/envs/donkey_sim.py", line 147, in on_disconnect
    self.sock.close()
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/core/tcp_server.py", line 173, in close
    self.msg_handler.on_disconnect()

Got a new client ('127.0.0.1', 52588)
Env ready with obs shape (32,) and action shape (2,)


Unhandled exception in client_connected_cb
transport: <_SelectorSocketTransport fd=137 read=idle write=<idle, bufsize=0>>
Traceback (most recent call last):
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/core/tcp_server.py", line 75, in handle_client
    await self.sim_handler.run()
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/core/tcp_server.py", line 149, in run
    self.close()
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/core/tcp_server.py", line 173, in close
    self.msg_handler.on_disconnect()
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/envs/donkey_sim.py", line 147, in on_disconnect
    self.sock.close()
  File "/home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/learning-to-drive-in-5-minutes/donkey_gym/core/tcp_server.py", line 173, in close
    self.msg_handler.on_disconnect()

#### **Logging & Debugging (Random Agent)**

In [4]:
from torch.utils.tensorboard import SummaryWriter
from sac.random_agent import random_agent_loop

2025-11-29 11:14:22.469446: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-29 11:14:22.640979: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-29 11:14:27.001888: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [5]:
seed = 42
num_episodes = 1000


## **SAC Agent**

In [6]:
import yaml
from sac.agent import SAC


In [7]:
with open('configs/donkey_car.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Log using the Donkey Car VAE environment name
config['logger']['env_name'] = 'DonkeyVae-v0' 


### **TRAIN**

In [8]:
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"Is CUDA available? {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    print(f"Current GPU index: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA not available. PyTorch is running on CPU.")


PyTorch version: 2.7.1+cu126
Is CUDA available? True
CUDA version: 12.6
Number of GPUs: 1
Current GPU index: 0
Device name: NVIDIA GeForce RTX 4050 Laptop GPU


In [9]:
SAC_agent = SAC(env=env, config=config)
SAC_agent.run_training_loop(num_episodes=num_episodes, tqdm_disable=False, print_rewards=False)

  3%|▎         | 29/1000 [01:54<1:03:39,  3.93s/it]


Preloaded type GtkListStore
Preloaded type GtkWindow
Preloaded type GtkVBox
Preloaded type GtkImage
Preloaded type GtkNotebook
Preloaded type GtkHBox
Preloaded type GtkFrame
Preloaded type GtkAlignment
Preloaded type GtkTreeView
Preloaded type GtkLabel
Preloaded type GtkCheckButton
Preloaded type GtkScrolledWindow
Preloaded type GtkComboBox


KeyboardInterrupt: 

In [10]:
# Load trained agent and run in environment
config['logger']['enabled'] = True  # Disable logging for loaded agent
SAC_agent_loaded = SAC(env=env, config=config)
SAC_agent_loaded.load_agent('../notebooks/runs/DonkeyVae-v0/SAC/sac-donkey-vae-2025_11_28-18_53_32/sac_agent.pth')

In [11]:
SAC_agent_loaded.run_training_loop(num_episodes=num_episodes, tqdm_disable=False, print_rewards=False)

  0%|          | 0/1000 [00:00<?, ?it/s]socket.send() raised exception.
  0%|          | 0/1000 [00:05<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# SAC_agent_loaded.eval_agent(num_episodes=1)

100%|██████████| 1/1 [00:16<00:00, 16.69s/it]


329.24566867947584

Preloaded type GtkListStore
Preloaded type GtkWindow
Preloaded type GtkVBox
Preloaded type GtkImage
Preloaded type GtkNotebook
Preloaded type GtkHBox
Preloaded type GtkFrame
Preloaded type GtkAlignment
Preloaded type GtkTreeView
Preloaded type GtkLabel
Preloaded type GtkCheckButton
Preloaded type GtkScrolledWindow
Preloaded type GtkComboBox
