In [1]:
%pip install imageio imageio-ffmpeg pygame numerize pathlib casadi stable-baselines3 tensorboard "stable-baselines3[extra]"  pyvirtualdisplay ipywidgets --quiet
%pip install "gymnasium[other]" --quiet

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import copy
import gymnasium as gym
import stable_baselines3
from SimulationConfigLoader import SimulationLoader
from Simulation import MapEntity, Map, ArticulatedVehicle, Simulation
from ParkingEnv import ParkingEnv
import random
import Visualization as Visualization
from casadi import cos, sin, tan
from typing import Any, SupportsFloat
from stable_baselines3 import PPO, SAC
import torch
import os
from IPython.display import HTML, display
from numerize import numerize
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
import platform
from IPython.display import clear_output

ModuleNotFoundError: No module named 'src'

In [None]:
log_dir = "logs"

#### Funções de treinamento e avaliação

In [None]:
def evaluate_model(model: PPO, iterations: int = 10):
    rewards = []
    for _ in range(iterations):
        rewards.append(run_episode(model, int(random.random() * 1000)))
    return rewards
    
def run_episode_and_save_video(model):
    video_recorder = Visualization.VideoRecorder("simulation.mp4", fps=10)
    env = ParkingEnv()
    observation, info = env.reset()
    total_reward = 0.0

    while(True):
        action, _ = model.predict(observation, deterministic=True)
        observation, reward, terminated, truncated, info = env.step(action)
        total_reward += float(reward)
        video_recorder.append(env.render())
        if terminated or truncated:
            break

    video_recorder.close()
    env.close()
    return total_reward

def run_episode(model, seed = None):
    env = ParkingEnv(seed)
    observation, info = env.reset()
    total_reward = 0.0

    while(True):
        action, _ = model.predict(observation, deterministic=True)
        observation, reward, terminated, truncated, info = env.step(action)
        total_reward += float(reward)
        if terminated or truncated:
            break

    env.close()
    return total_reward


from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
import platform


#### Funções auxiliares para visualização

In [None]:
# @title Play Video function
from IPython.display import HTML
from base64 import b64encode
import platform

# Only import and use pyvirtualdisplay on Linux
if platform.system() != 'Windows':
    from pyvirtualdisplay import Display
else:
    Display = None

# create the directory to store the video(s)
os.makedirs("./video", exist_ok=True)

# Only start virtual display on Linux (not needed on Windows)
display = None
if platform.system() != 'Windows' and Display is not None:
    display = Display(visible=False, size=(2000, 1500))
    _ = display.start()

"""
Utility functions to enable video recording of gym environment
and displaying it.
To enable video, just do "env = wrap_env(env)""
"""
def render_mp4(videopath: str) -> str:
  """
  Gets a string containing a b4-encoded version of the MP4 video
  at the specified path.
  """
  if not os.path.exists(videopath):
      return f'<p>Video file not found: {videopath}</p>'
  mp4 = open(videopath, 'rb').read()
  base64_encoded_mp4 = b64encode(mp4).decode()
  return f'<video width=400 controls><source src="data:video/mp4;' \
         f'base64,{base64_encoded_mp4}" type="video/mp4"></video>'

def record_and_display_video_manual(env, model, video_name, num_episodes=1):
    """
    Records a video manually using Visualization.VideoRecorder (more reliable).
    
    Args:
        env: The gymnasium environment.
        model: The trained model.
        video_name (str): The name to use for the video file.
        num_episodes (int): The number of episodes to record (default is 1).
    """
    os.makedirs("./video", exist_ok=True)
    
    video_path = f"video/{video_name}.mp4"
    video_recorder = Visualization.VideoRecorder(video_path, fps=10)
    
    total_reward = 0.0
    episode_count = 0
    
    for episode in range(num_episodes):
        observation, info = env.reset()
        episode_reward = 0.0
        
        while(True):
            action, _ = model.predict(observation, deterministic=True)
            observation, reward, terminated, truncated, info = env.step(action)
            episode_reward += float(reward)
            video_recorder.append(env.render())
            
            if terminated or truncated:
                break
        
        total_reward += episode_reward
        episode_count += 1
    
    video_recorder.close()
    print(f"\nTotal reward: {total_reward}")
    print(f"Video saved to: {video_path}")
    
    html = render_mp4(video_path)
    return HTML(html)

def record_and_display_video(env, model, video_name, num_episodes=1):
    """
    Records a video of the agent performing in the environment and displays it.

    Args:
        env: The gymnasium environment.
        model: The trained model.
        video_name (str): The name to use for the video file.
        num_episodes (int): The number of episodes to record (default is 1).
    """
    import glob
    
    # create the directory to store the video(s)
    os.makedirs("./video", exist_ok=True)

    # Use a virtual display for rendering (only on Linux)
    display = None
    if platform.system() != 'Windows' and Display is not None:
        display = Display(visible=False, size=(1400, 900))
        _ = display.start()

    env_name = "ParkingEnv"

    env = gym.wrappers.RecordVideo(
        env,
        video_folder="video",
        name_prefix=f"{env_name}_{video_name}",
        episode_trigger=lambda episode_id: episode_id < num_episodes
    )

    observation, _ = env.reset()
    total_reward = 0
    done = False
    episode_count = 0

    while not done:
        action, states = model.predict(observation, deterministic=True)
        observation, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        total_reward += reward
        if done:
            episode_count += 1
            if episode_count < num_episodes:
                observation, _ = env.reset()
                done = False

    env.close()
    # Stop the virtual display if it was started
    if display is not None:
        display.stop()

    print(f"\nTotal reward: {total_reward}")

    # Find the video file that was created
    video_pattern = f"video/{env_name}_{video_name}*.mp4"
    video_files = glob.glob(video_pattern)
    
    if not video_files:
        # Try alternative pattern
        video_pattern = f"video/*{video_name}*.mp4"
        video_files = glob.glob(video_pattern)
    
    if not video_files:
        # List all video files for debugging
        all_videos = glob.glob("video/*.mp4")
        print(f"Warning: Expected video file not found. Available video files: {all_videos}")
        return HTML("<p>Video file not found. Check the video directory.</p>")
    
    # Use the first matching video file
    video_path = video_files[0]
    print(f"Found video file: {video_path}")
    
    # show video
    html = render_mp4(video_path)
    return HTML(html)

In [None]:
from train import train_sac

In [None]:
algorithm = "SAC"
model_name = "SAC_Improved_V1"
model_save_dir = "models"
total_training_timesteps = 20000000
save_every = 100000

os.makedirs(model_save_dir, exist_ok=True)

In [None]:
#se modelo salvo já existe, carrega
if(algorithm == "SAC"):
    if(os.path.exists(os.path.join(model_save_dir, model_name + ".zip"))):
        model_save_path = os.path.join(model_save_dir, model_name + ".zip")
        model = SAC.load(model_save_path)
    else: #senão, cria novo
        model = None   
    model = train_sac(model, total_timesteps=total_training_timesteps, save_every=save_every, save_path=model_save_dir, save_name=model_name)
else:
    raise ValueError(f"Algoritmo {algorithm} não suportado")


#### Carregar modelo já existente

In [8]:
model_save_path = os.path.join(model_save_dir, model_name + ".zip")
model = SAC.load(model_save_path)

In [16]:
env = ParkingEnv()
record_and_display_video_manual(env, model, model_name, num_episodes=1)

[in#0/rawvideo @ 0x44e62200] Error during demuxing: Immediate exit requested
[out#0/mp4 @ 0x44e76cc0] Error writing trailer: Immediate exit requested
[out#0/mp4 @ 0x44e76cc0] Error closing file: Immediate exit requested


KeyboardInterrupt: 

In [None]:
total_rewards = evaluate_model(model, 10)
total_rewards = np.array(total_rewards)
print(f"mean reward {total_rewards.mean()}")
print(f"std reward {total_rewards.std()}")
print(f"min reward {total_rewards.min()}")
print(f"max reward {total_rewards.max()}")
