# **<p style="text-align: center;">Aprendizaje por refuerzo - Ingeniería en Inteligencia Artificial</p>**
## **<p style="text-align: center;">Trabajo práctico Final - SAC</p>**

#### <p style="text-align: center;">Pettinari Fausto, Schuemer Ignacio, Torres Santiago </p>
#### <p style="text-align: center;">Profesores: Claudio Pose, Gabriel Torre, Nicolás Romero, Tomás Chimenti</p>

# **Donkey Car VAE Enviroment**

In [1]:
import sys
from pathlib import Path

# Add project roots to import path
REPO_ROOT = Path("..").resolve()
sys.path.append(str(REPO_ROOT))


In [2]:
import sys
import numpy as np
import gymnasium as gym
import torch

# Alias gymnasium as gym for donkey_gym, which expects the old API
sys.modules.setdefault("gym", gym)
sys.modules.setdefault("gym.spaces", gym.spaces)
sys.modules.setdefault("gym.envs", gym.envs)
sys.modules.setdefault("gym.envs.registration", gym.envs.registration)
sys.modules.setdefault("gym.utils", gym.utils)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
import os
from pathlib import Path
import numpy as np
import gymnasium as gym
from gymnasium.envs.registration import EnvSpec
from typing import Any
from DonkeyCarEnv.donkey_gym.envs.vae_env import DonkeyVAEEnv
from DonkeyCarEnv.config_env import LEVEL, FRAME_SKIP, MIN_THROTTLE, MAX_THROTTLE, MAX_CTE_ERROR, N_STACK, N_COMMAND_HISTORY
from DonkeyCarEnv.ae.autoencoder import load_ae
import gdown
import zipfile

In [4]:
#Check wether the simulator is present, if not, download the zip and unzip it from gdrive using gdown
SIM_PATH =  str( REPO_ROOT / "DonkeyCarEnv" / "donkey_simulator" / "build_sdsandbox.x86_64")

if not os.path.exists(SIM_PATH):
    print("Donkey simulator not found, downloading...")
    url = "https://drive.google.com/uc?id=1h2VfpGHlZetL5RAPZ79bhDRkvlfuB4Wb"
    output = str( REPO_ROOT / "DonkeyCarEnv" / "donkey_simulator.zip")
    gdown.download(url, output, quiet=False)
    # Unzip the file
    with zipfile.ZipFile(output, 'r') as zip_ref:
        zip_ref.extractall(str( REPO_ROOT / "DonkeyCarEnv" / "donkey_simulator"))
    os.remove(output)
    # Provide execute permissions to the simulator
    os.chmod(SIM_PATH, 0o755)
    print("Download complete.")


In [5]:

# Configure simulator path/port here
os.environ['DONKEY_SIM_PATH'] = SIM_PATH
# Default Unity build listens on 9090; change if you launch with --port X
os.environ['DONKEY_SIM_PORT'] = '9090'
# Optional: fail fast instead of waiting forever
os.environ.setdefault("DONKEY_WAIT_TIMEOUT", "30")
os.environ['DONKEY_SIM_HEADLESS'] = '0'
os.environ.setdefault("DONKEY_SKIP_WAIT", "1")

MAX_EPISODE_STEPS = 1000  # fallback horizon for wrappers expecting a finite episode length
vae_archive_dir = REPO_ROOT / "DonkeyCarEnv" / "ae-32_avc" / "vae.pth"
print(f"Loading VAE weights from extracted archive dir: {vae_archive_dir}")
assert vae_archive_dir.exists(), "VAE archive folder not found."

Loading VAE weights from extracted archive dir: /home/ignacio/Documents/Ignacio UdeSa/4to/2do semestre/RL/TPS/RL-SAC/DonkeyCarEnv/ae-32_avc/vae.pth


In [6]:
class GymnasiumDonkeyWrapper(gym.Env):
    """A Gymnasium wrapper for the DonkeyVAEEnv environment.
    This wrapper flattens observations and adapts the API to Gymnasium standards.
    """
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 30}

    def __init__(self, donkey_env: DonkeyVAEEnv):
        self.donkey_env = donkey_env
        self._step_count = 0
        obs_dim = int(np.prod(donkey_env.observation_space.shape))
        self.observation_space = gym.spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(obs_dim,),
            dtype=np.float32,
        )
        self.action_space = gym.spaces.Box(
            low=donkey_env.action_space.low.astype(np.float32),
            high=donkey_env.action_space.high.astype(np.float32),
            dtype=np.float32,
        )
        self.spec = EnvSpec(id="DonkeyVae-v0", max_episode_steps=MAX_EPISODE_STEPS)
        self.render_mode = "human"
        self.max_episode_steps = MAX_EPISODE_STEPS

    def reset(self, seed=None, options=None):
        self._step_count = 0
        if seed is not None:
            self.donkey_env.seed(seed)
        obs = self.donkey_env.reset()
        return self._flatten_obs(obs), {}

    def step(self, action):
        obs, reward, done, info = self.donkey_env.step(action)
        self._step_count += 1
        terminated = bool(done)
        truncated = self._step_count >= self.max_episode_steps
        obs = self._flatten_obs(obs)
        info = info or {}
        if truncated:
            info.setdefault("TimeLimit.truncated", True)
        return obs, reward, terminated, truncated, info

    def render(self, mode="human"):
        return self.donkey_env.render(mode=mode)

    def close(self):
        try:
            self.donkey_env.close()
        except Exception:
            pass
        
    @staticmethod
    def _flatten_obs(obs: Any) -> np.ndarray:
        return np.asarray(obs, dtype=np.float32).reshape(-1)


def make_donkey_vae_env(device: torch.device):
    vae = load_ae(vae_archive_dir, z_size=32)
    donkey_env = DonkeyVAEEnv(
        level=LEVEL,
        frame_skip=FRAME_SKIP,
        vae=vae,
        const_throttle=None,
        min_throttle=MIN_THROTTLE,
        max_throttle=MAX_THROTTLE,
        max_cte_error=MAX_CTE_ERROR,
        n_command_history=N_COMMAND_HISTORY,
        n_stack=N_STACK,
    )
    return GymnasiumDonkeyWrapper(donkey_env)


env = make_donkey_vae_env(device=device)
print(f"Env ready with obs shape {env.observation_space.shape} and action shape {env.action_space.shape}")

Dim AE = 32
PyTorch 2.9.1+cu128
Starting DonkeyGym env
Donkey subprocess started
Waiting for sim to start...if the simulation is running, press EXIT to go back to the menu
Binding to ('0.0.0.0', 9090)


Gtk-Message: 15:33:20.313: Failed to load module "gail"
Gtk-Message: 15:33:20.313: Failed to load module "atk-bridge"

























GTK+ 2.x symbols detected. Using GTK+ 2.x and GTK+ 3 in the same process is not supported.
Gtk-Message: 15:33:20.322: Failed to load module "canberra-gtk-module"


Waiting for sim to start...if the simulation is running, press EXIT to go back to the menu
Got a new client ('127.0.0.1', 54738)
Scene Selection Ready
Available scene names: ['generated_road', 'warehouse', 'sparkfun_avc', 'generated_track']
Connection closed
Connection dropped
Connection closed
Waiting for sim to start...if the simulation is running, press EXIT to go back to the menu
Got a new client ('127.0.0.1', 54744)
Env ready with obs shape (126,) and action shape (2,)


In [7]:
num_episodes = 2000


## **SAC Agent**

In [8]:
import yaml
from sac.agent import SAC


In [9]:
with open('configs/donkey_car_new.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Log using the Donkey Car VAE environment name
config['logger']['env_name'] = 'DonkeyVae-v0' 


In [10]:
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"Is CUDA available? {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    print(f"Current GPU index: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA not available. PyTorch is running on CPU.")


PyTorch version: 2.9.1+cu128
Is CUDA available? True
CUDA version: 12.8
Number of GPUs: 1
Current GPU index: 0
Device name: NVIDIA GeForce RTX 4060 Laptop GPU


## **TRAIN**

In [12]:
SAC_agent = SAC(env=env, config=config)
SAC_agent.show_config()
SAC_agent.print_net_arqhitectures()

{   'buffer': {'capacity': 50000},
    'logger': {   'agent_name': 'SAC',
                  'enabled': True,
                  'env_name': 'DonkeyVae-v0',
                  'flush_secs': 10,
                  'log_dir': 'runs',
                  'log_episode_stats': True,
                  'log_q_values': False,
                  'run_name': 'sac-donkey-vae',
                  'save_model': {'enabled': True, 'path': None},
                  'timestamp_format': '%Y_%m_%d-%H_%M_%S',
                  'use_timestamp': True},
    'policy_net': {   'action_scale': 1.0,
                      'hidden_layers_act': 'elu',
                      'hidden_sizes': [256, 256, 64],
                      'log_std_max': 2,
                      'log_std_min': -20,
                      'output_activation': 'identity'},
    'q_net': {   'hidden_layers_act': 'elu',
                 'hidden_sizes': [256, 256],
                 'output_activation': 'identity'},
    'sac': {   'actor_lr': 0.00075,
          

In [None]:
SAC_agent.run_training_loop(num_episodes=num_episodes, tqdm_disable=False, print_rewards=False)

  0%|          | 3/2000 [00:19<3:31:39,  6.36s/it]


Connection closed
Connection dropped
Connection closed


KeyboardInterrupt: 

Preloaded type GtkListStore
Preloaded type GtkWindow
Preloaded type GtkVBox
Preloaded type GtkImage
Preloaded type GtkNotebook
Preloaded type GtkHBox
Preloaded type GtkFrame
Preloaded type GtkAlignment
Preloaded type GtkTreeView
Preloaded type GtkLabel
Preloaded type GtkCheckButton
Preloaded type GtkScrolledWindow
Preloaded type GtkComboBox


## **Load pre-trained agent**

In [None]:
# # Load trained agent and run in environment
# config['logger']['enabled'] = True  # Disable logging for loaded agent
# SAC_agent_loaded = SAC(env=env, config=config)
# SAC_agent_loaded.load_agent('../notebooks/runs/DonkeyVae-v0/SAC/sac-donkey-vae-2025_11_28-18_53_32/sac_agent.pth')

### **Eval pre-trained agent**

In [None]:
# SAC_agent_loaded.eval_agent(num_episodes=1)

100%|██████████| 1/1 [00:16<00:00, 17.00s/it]


335.8437662820018

### **Re-Training the agent**

In [None]:
# SAC_agent_loaded.run_training_loop(num_episodes=num_episodes, tqdm_disable=False, print_rewards=False)

### **Evaluation of the agent**

In [None]:
# SAC_agent_loaded.eval_agent(num_episodes=1)

Preloaded type GtkListStore
Preloaded type GtkWindow
Preloaded type GtkVBox
Preloaded type GtkImage
Preloaded type GtkNotebook
Preloaded type GtkHBox
Preloaded type GtkFrame
Preloaded type GtkAlignment
Preloaded type GtkTreeView
Preloaded type GtkLabel
Preloaded type GtkCheckButton
Preloaded type GtkScrolledWindow
Preloaded type GtkComboBox
