# **<p style="text-align: center;">Aprendizaje por refuerzo - Ingeniería en Inteligencia Artificial</p>**
## **<p style="text-align: center;">Trabajo práctico Final - SAC</p>**

#### <p style="text-align: center;">Pettinari Fausto, Schuemer Ignacio, Torres Santiago </p>
#### <p style="text-align: center;">Profesores: Claudio Pose, Gabriel Torre, Nicolás Romero, Tomás Chimenti</p>

# **Bipedal Walker Enviroment**

In [14]:
import sys
sys.path.append('..')

In [15]:
import gymnasium as gym
from gymnasium.wrappers import TimeLimit
from sac.utils.logger_utils import *

In [16]:
env = gym.make("BipedalWalker-v3")
env = TimeLimit(env, max_episode_steps=1600)

#### **Logging & Debugging (Random Agent)**

In [17]:
from torch.utils.tensorboard import SummaryWriter
from sac.random_agent import random_agent_loop

In [None]:
seed = 42
num_episodes = 400

## **SAC Agent**

In [19]:
import yaml
from sac.agent import SAC

In [20]:
with open('configs/bipedal_walker.yaml', 'r') as f:
    config = yaml.safe_load(f)

### **TRAIN**

In [None]:
SAC_agent = SAC(env=env, config=config)
SAC_agent.run_training_loop(num_episodes=num_episodes, tqdm_disable=False, print_rewards=False)

#### **Eval trained Agent**

In [None]:
SAC_agent.eval_agent(num_episodes=1, render_mode="human")

### **Load trained agent & Eval**

In [None]:
# Load trained agent and run in environment
config['logger']['enabled'] = False  # Disable logging for loaded agent
SAC_agent_loaded = SAC(env=env, config=config)
SAC_agent_loaded.load_agent('../notebooks/runs/BipedalWalker-v3/SAC/sac-bipedal-walker-2025_11_25-16_41_19/sac_agent.pth')

In [None]:
SAC_agent_loaded.eval_agent(num_episodes=1, render_mode="human")

Creating new environment for evaluation with render_mode='human'


100%|██████████| 1/1 [00:26<00:00, 26.69s/it]


np.float32(318.96457)

## **SB3 SAC Agent**

In [11]:
dir_sb3 = "runs/BipedalWalker-v3/sac_sb3-3/"

In [None]:
from stable_baselines3 import SAC as SB3_SAC
from sac.utils.stable_baseline_params import get_sb3_sac_params
from sac.utils.stable_baseline_logger import RobustEpisodeLogger

with open('configs/bipedal_walker_sb3.yaml', 'r') as f:
    config = yaml.safe_load(f)

sb3_params = get_sb3_sac_params(env, config, seed, env_id="BipedalWalker-v3")

keys_to_keep = ["policy", "env"]
sb3_params = {k: v for k, v in sb3_params.items() if k in keys_to_keep}

writer = SummaryWriter(dir_sb3)
sb3_sac = SB3_SAC(**sb3_params)
callback = RobustEpisodeLogger(
    writer=writer,
    max_episodes=num_episodes,
    save_dir=dir_sb3,
    verbose=0
)

sb3_sac.learn(
    total_timesteps=num_episodes * env.spec.max_episode_steps,
    callback=callback,
    progress_bar=True
)
# save the trained agent
sb3_sac.save(f"{dir_sb3}/sb3_sac_agent")

writer.close()
env.close()

Output()