# **<p style="text-align: center;">Aprendizaje por refuerzo - Ingeniería en Inteligencia Artificial</p>**
## **<p style="text-align: center;">Trabajo práctico Final - SAC</p>**

#### <p style="text-align: center;">Pettinari Fausto, Schuemer Ignacio, Torres Santiago </p>
#### <p style="text-align: center;">Profesores: Claudio Pose, Gabriel Torre, Nicolás Romero, Tomás Chimenti</p>

# **Donkey Car VAE Enviroment**

In [1]:
import sys
from pathlib import Path

REPO_ROOT = Path("..").resolve()
sys.path.append(str(REPO_ROOT))


In [2]:
import sys
import numpy as np
import gymnasium as gym
import torch

# Alias gymnasium as gym for donkey_gym, which expects the old API
sys.modules.setdefault("gym", gym)
sys.modules.setdefault("gym.spaces", gym.spaces)
sys.modules.setdefault("gym.envs", gym.envs)
sys.modules.setdefault("gym.envs.registration", gym.envs.registration)
sys.modules.setdefault("gym.utils", gym.utils)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
import os
from pathlib import Path
import numpy as np
import gymnasium as gym
import gdown
import zipfile
from DonkeyCarEnv.env_wrapper import make_donkey_vae_env

In [4]:
# Check whether the simulator is present, if not, download the zip and unzip it from gdrive using gdown
SIM_PATH =  str( REPO_ROOT / "DonkeyCarEnv" / "donkey_simulator" / "build_sdsandbox.x86_64")

if not os.path.exists(SIM_PATH):
    print("Donkey simulator not found, downloading...")
    url = "https://drive.google.com/uc?id=1h2VfpGHlZetL5RAPZ79bhDRkvlfuB4Wb"
    output = str( REPO_ROOT / "DonkeyCarEnv" / "donkey_simulator.zip")
    gdown.download(url, output, quiet=False)
    # Unzip the file
    with zipfile.ZipFile(output, 'r') as zip_ref:
        zip_ref.extractall(str( REPO_ROOT / "DonkeyCarEnv" / "donkey_simulator"))
    os.remove(output)
    # Provide execute permissions to the simulator
    os.chmod(SIM_PATH, 0o755)
    print("Download complete.")

In [5]:
# Configure simulator path/port and timeout settings
os.environ['DONKEY_SIM_PATH'] = SIM_PATH
os.environ['DONKEY_SIM_PORT'] = '9090'
os.environ.setdefault("DONKEY_WAIT_TIMEOUT", "30")
os.environ['DONKEY_SIM_HEADLESS'] = '0'
os.environ.setdefault("DONKEY_SKIP_WAIT", "1")

# VAE paths - choose one of the following pretrained VAE models depending on the level
vae_archive_dir = REPO_ROOT / "DonkeyCarEnv" / "vae-level-0-dim-32.pkl" # Pretrained VAE for level 0
# vae_archive_dir = REPO_ROOT / "DonkeyCarEnv" / "ae-32_avc" / "vae.pth" # Pretrained VAE for level 2

os.environ["VAE_ARCHIVE_DIR"] = str(vae_archive_dir)

print(f"Loading VAE weights from extracted archive dir: {vae_archive_dir}")
assert vae_archive_dir.exists(), "VAE archive folder not found."

Loading VAE weights from extracted archive dir: /home/san/Documents/Ingenieria UdeSA/RL/RL-SAC/DonkeyCarEnv/vae-level-0-dim-32.pkl


In [None]:
env = make_donkey_vae_env(device=device, vae_path=str(vae_archive_dir))
print(f"Env ready with obs shape {env.observation_space.shape} and action shape {env.action_space.shape}")

In [7]:
num_episodes = 1000

## **SAC Agent**

In [None]:
import yaml
from sac.agent import SAC

In [9]:
with open('configs/donkey_car_new.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Log using the Donkey Car VAE environment name
config['logger']['env_name'] = 'DonkeyVae-v0' 


In [10]:
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"Is CUDA available? {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    print(f"Current GPU index: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("CUDA not available. PyTorch is running on CPU.")


PyTorch version: 2.7.1+cu126
Is CUDA available? True
CUDA version: 12.6
Number of GPUs: 1
Current GPU index: 0
Device name: NVIDIA GeForce RTX 4050 Laptop GPU


## **TRAIN**

In [11]:
SAC_agent = SAC(env=env, config=config)
SAC_agent.show_config()
SAC_agent.print_net_architectures()

{   'buffer': {'capacity': 50000},
    'logger': {   'agent_name': 'SAC',
                  'enabled': True,
                  'env_name': 'DonkeyVae-v0',
                  'flush_secs': 10,
                  'log_dir': 'runs',
                  'log_episode_stats': True,
                  'log_q_values': False,
                  'run_name': 'sac-donkey-vae',
                  'save_model': {'enabled': True, 'path': None},
                  'timestamp_format': '%Y_%m_%d-%H_%M_%S',
                  'use_timestamp': True},
    'policy_net': {   'action_scale': 1.0,
                      'hidden_layers_act': 'elu',
                      'hidden_sizes': [256, 256, 32],
                      'log_std_max': 2,
                      'log_std_min': -20,
                      'output_activation': 'identity'},
    'q_net': {   'hidden_layers_act': 'elu',
                 'hidden_sizes': [256, 256, 32],
                 'output_activation': 'identity'},
    'sac': {   'actor_lr': 0.0004,
       

In [None]:
SAC_agent.run_training_loop(num_episodes=num_episodes, tqdm_disable=False, print_rewards=False)

In [None]:
# If you stopped the training and want to save the results
from sac.utils.logger_utils import *
save_dir = REPO_ROOT / "notebooks" / "runs" / "DonkeyVae-v0" / "SAC" / "sac-donkey-vae-2025_11_30-14_11_59"
SAC_agent.save_agent(filepath=save_dir / "sac_agent.pth")
save_rewards(run_dir=save_dir, rewards=SAC_agent.logger.episode_rewards)
save_lengths(run_dir=save_dir, lengths=SAC_agent.logger.episode_lengths)

## **Load pre-trained agent**

In [None]:
# Load trained agent and run in environment
config['logger']['enabled'] = False  # Disable logging for loaded agent
SAC_agent_loaded = SAC(env=env, config=config)
SAC_agent_loaded.load_agent(save_dir / "sac_agent.pth")
SAC_agent_loaded.show_config()
SAC_agent_loaded.print_net_architectures()

### **Eval pre-trained agent**

In [None]:
# For this you have to make the env again so the simulator is launched
SAC_agent_loaded.eval_agent(num_episodes=10)

### **Re-training of the pre-trained agent**

In [None]:
# For this you have to make the env again so the simulator is launched
SAC_agent_loaded.run_training_loop(num_episodes=num_episodes, tqdm_disable=False, print_rewards=False)

In [None]:
from sac.utils.logger_utils import *
save_dir = REPO_ROOT / "notebooks" / "runs" / "DonkeyVae-v0" / "SAC" / "sac-donkey-vae-2025_11_30-14_11_59"
SAC_agent_loaded.save_agent(filepath=save_dir / "sac_agent.pth")
save_rewards(run_dir=save_dir, rewards=SAC_agent_loaded.logger.episode_rewards)
save_lengths(run_dir=save_dir, lengths=SAC_agent_loaded.logger.episode_lengths)

### **Evaluation of the re-trained agent**

In [None]:
SAC_agent_loaded.eval_agent(num_episodes=20)