<a href="https://colab.research.google.com/github/ezzeddinegasmi/DRL_comparative_study/blob/main/sac_breakout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 1 — Installation des dépendances

In [1]:
!pip install stable-baselines3[extra] pygame moviepy

Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cublas_cu12-12.4.5.8-py

In [None]:
import sys

IN_COLAB = "google.colab" in sys.modules

if IN_COLAB:
    !pip install gymnasium==1.0.0

In [8]:
import random
from typing import List, Tuple

import gymnasium as gym
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from IPython.display import clear_output
from torch.distributions import Normal

In [9]:
def seed_torch(seed):
    torch.manual_seed(seed)
    if torch.backends.cudnn.enabled:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True


seed = 777
random.seed(seed)
np.random.seed(seed)
seed_torch(seed)

2 — Vérification du GPU

In [10]:
import torch
print("GPU disponible ?", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Nom du GPU :", torch.cuda.get_device_name(0))

GPU disponible ? False


3 — Définition de l’environnement
    BreakoutContinuous

In [11]:
import gym
from gym import spaces
import numpy as np
import random

class BreakoutContinuousEnv(gym.Env):
    metadata = {'render.modes': ['rgb_array']}

    def __init__(self):
        super(BreakoutContinuousEnv, self).__init__()
        self.screen_width = 400
        self.screen_height = 300
        self.paddle_width = 60
        self.paddle_height = 10
        self.ball_size = 8

        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)
        self.observation_space = spaces.Box(
            low=np.array([0, 0, -5, -5, 0]),
            high=np.array([self.screen_width, self.screen_height, 5, 5, self.screen_width]),
            dtype=np.float32
        )
        self.reset()
        self.render_mode = False

    def reset(self):
        self.paddle_x = self.screen_width / 2
        self.ball_x = self.screen_width / 2
        self.ball_y = self.screen_height / 2
        self.ball_vx = random.choice([-3, 3])
        self.ball_vy = -3
        self.score = 0
        self.done = False
        return self._get_obs()

    def _get_obs(self):
        return np.array([self.ball_x, self.ball_y, self.ball_vx, self.ball_vy, self.paddle_x], dtype=np.float32)

    def step(self, action):
        move = float(action[0]) * 10
        self.paddle_x = np.clip(self.paddle_x + move, 0, self.screen_width - self.paddle_width)

        self.ball_x += self.ball_vx
        self.ball_y += self.ball_vy

        if self.ball_x <= 0 or self.ball_x >= self.screen_width:
            self.ball_vx *= -1
        if self.ball_y <= 0:
            self.ball_vy *= -1

        if (self.ball_y + self.ball_size >= self.screen_height - self.paddle_height) and \
           (self.paddle_x <= self.ball_x <= self.paddle_x + self.paddle_width):
            self.ball_vy *= -1
            self.score += 1
            reward = 1.0
        elif self.ball_y > self.screen_height:
            self.done = True
            reward = -10.0
        else:
            reward = -0.01

        return self._get_obs(), reward, self.done, {}

    def render(self, mode='rgb_array'):
        import pygame
        if not self.render_mode:
            pygame.init()
            self.screen = pygame.Surface((self.screen_width, self.screen_height))
            self.render_mode = True

        self.screen.fill((0, 0, 0))
        pygame.draw.circle(self.screen, (255, 255, 255), (int(self.ball_x), int(self.ball_y)), self.ball_size)
        pygame.draw.rect(
            self.screen,
            (0, 255, 0),
            pygame.Rect(int(self.paddle_x), self.screen_height - self.paddle_height, self.paddle_width, self.paddle_height)
        )
        return pygame.surfarray.array3d(self.screen).swapaxes(0, 1)

    def close(self):
        self.render_mode = False

4 — Entraînement du modèle SAC

In [13]:
from stable_baselines3 import SAC
from stable_baselines3.common.monitor import Monitor
import os

log_dir = "./logs/"
os.makedirs(log_dir, exist_ok=True)

env = Monitor(BreakoutContinuousEnv(), log_dir)
model = SAC("MlpPolicy", env, verbose=1, device="cuda")
model.learn(total_timesteps=50000)

model.save("sac_breakout_continuous")
del model
env.close()


AssertionError: 

In [14]:
from stable_baselines3.common.results_plotter import load_results, ts2xy
import matplotlib.pyplot as plt

results = load_results(log_dir)
x, y = ts2xy(results, 'timesteps')

plt.figure(figsize=(12, 6))
plt.plot(x, y, label='Reward per episode')
plt.xlabel("Timesteps")
plt.ylabel("Reward")
plt.title("SAC Training Rewards (via Monitor)")
plt.legend()
plt.grid(True)
plt.show()


LoadMonitorResultsError: No monitor files of the form *monitor.csv found in ./logs/

5 — Chargement du modèle et préparation de la vidéo

In [15]:
env = BreakoutContinuousEnv()
model = SAC.load("sac_breakout_continuous", env=env, device="cuda")

from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder

vec_env = DummyVecEnv([lambda: BreakoutContinuousEnv()])
video_folder = "./videos/"
video_length = 300

vec_env = VecVideoRecorder(vec_env, video_folder,
                           record_video_trigger=lambda x: x == 0,
                           video_length=video_length,
                           name_prefix="sac-breakout")

obs = vec_env.reset()
for i in range(video_length):
    action, _ = model.predict(obs, deterministic=True)
    obs, _, dones, _ = vec_env.step(action)
    if dones:
        break

vec_env.close()


FileNotFoundError: [Errno 2] No such file or directory: 'sac_breakout_continuous.zip'

6 — Affichage de la vidéo dans Colab
python
Copier
Modifier


In [16]:
import glob
from IPython.display import HTML
from base64 import b64encode

video_file = glob.glob("./videos/*.mp4")[0]

mp4 = open(video_file, 'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML(f"""
<video width=500 controls>
    <source src="{data_url}" type="video/mp4">
</video>
""")


IndexError: list index out of range