<a href="https://colab.research.google.com/github/ezzeddinegasmi/DRL_comparative_study/blob/main/SAC23Avr_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [83]:
import stable_baselines3, gymnasium, sys
print("SB3:", stable_baselines3.__version__)
print("Gymnasium:", gymnasium.__version__)

SB3: 2.3.2
Gymnasium: 0.29.1


In [None]:
IN_COLAB = "google.colab" in globals()
if IN_COLAB:
    !pip install -U "stable-baselines3[extra]==2.3.2" gymnasium pygame --quiet

# =========================================================
# 2) Imports et fix headless ------------------------------
# ---------------------------------------------------------
import os, random, numpy as np
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import SAC
from stable_baselines3.common.env_checker import check_env

# Colab (sans écran) → éviter le crash SDL
os.environ.setdefault("SDL_VIDEODRIVER", "dummy")

# =========================================================
# 3) Environnement Breakout continu -----------------------
# ---------------------------------------------------------
class BreakoutContinuousEnv(gym.Env):
    """Breakout minimal compatible Gymnasium (obs 5, action 1)."""
    metadata = {"render_modes": ["human"], "render_fps": 60}

    def __init__(self, render_mode: str | None = None):
        super().__init__()
        self.render_mode = render_mode
        # Dimensions de l'aire de jeu
        self.W, self.H = 400, 300
        # Paddle & balle
        self.P_W, self.P_H, self.BALL = 60, 10, 8
        # Espaces
        self.action_space = spaces.Box(low=-1.0, high=1.0,
                                       shape=(1,), dtype=np.float32)
        low  = np.array([0,       0, -5, -5,    0], dtype=np.float32)
        high = np.array([self.W, self.H,  5,  5, self.W], dtype=np.float32)
        self.observation_space = spaces.Box(low=low, high=high,
                                            dtype=np.float32)

        # PyGame
        self._pygame_ready, self.clock = False, None
        self.reset(seed=42)

    # ---------- helpers ----------
    def _obs(self):
        return np.array(
            [self.ball_x, self.ball_y, self.ball_vx,
             self.ball_vy, self.pad_x], dtype=np.float32)

    # ---------- API Gymnasium ----------
    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        rng = random.Random(seed)
        self.pad_x = self.W / 2
        self.ball_x, self.ball_y = self.W / 2, self.H / 2
        self.ball_vx, self.ball_vy = rng.choice([-3, 3]), -3
        self.score, self.t = 0, 0
        self.terminated = self.truncated = False
        return self._obs(), {}

    def step(self, action):
        self.t += 1
        # 1) paddle
        self.pad_x = np.clip(self.pad_x + float(action[0]) * 10,
                             0, self.W - self.P_W)
        # 2) balle
        self.ball_x += self.ball_vx
        self.ball_y += self.ball_vy
        if self.ball_x <= 0 or self.ball_x >= self.W:
            self.ball_vx *= -1
        if self.ball_y <= 0:
            self.ball_vy *= -1
        # 3) collisions paddle / sol
        reward = -0.01
        if (self.ball_y + self.BALL >= self.H - self.P_H
                and self.pad_x <= self.ball_x <= self.pad_x + self.P_W):
            self.ball_vy *= -1
            self.score += 1
            reward = 1.0
        elif self.ball_y > self.H:
            self.terminated = True
            reward = -10.0
        # 4) time-limit
        if self.t >= 500:
            self.truncated = True
        # rendu
        if self.render_mode == "human":
            self.render()
        return self._obs(), reward, self.terminated, self.truncated, {}

    # ---------- rendu ----------
    def _init_pygame(self):
        import pygame
        pygame.init()
        self.screen = pygame.display.set_mode((self.W, self.H))
        pygame.display.set_caption("Breakout continuous")
        self.clock = pygame.time.Clock()
        self._pygame_ready = True

    def render(self):
        import pygame
        if not self._pygame_ready:
            self._init_pygame()
        self.screen.fill((0, 0, 0))
        pygame.draw.circle(self.screen, (255, 255, 255),
                           (int(self.ball_x), int(self.ball_y)), self.BALL)
        pygame.draw.rect(self.screen, (0, 255, 0),
                         pygame.Rect(int(self.pad_x), self.H - self.P_H,
                                     self.P_W, self.P_H))
        pygame.display.flip()
        self.clock.tick(self.metadata["render_fps"])

    def close(self):
        if self._pygame_ready:
            import pygame
            pygame.quit()
            self._pygame_ready = False

# =========================================================
# 4) Validation rapide ------------------------------------
# ---------------------------------------------------------
try:
    env = BreakoutContinuousEnv()
    check_env(env, warn=True)
    print("✅ check_env passed")

    model = SAC("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=1_000)
    print("✅ learn 1 000 steps OK")

except Exception as e:
    import traceback, sys
    traceback.print_exc(file=sys.stdout)

✅ check_env passed
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 198      |
|    ep_rew_mean     | -11.7    |
| time/              |          |
|    episodes        | 4        |
|    fps             | 33       |
|    time_elapsed    | 23       |
|    total_timesteps | 792      |
| train/             |          |
|    actor_loss      | 43.2     |
|    critic_loss     | 36.3     |
|    ent_coef        | 1.23     |
|    ent_coef_loss   | -0.608   |
|    learning_rate   | 0.0003   |
|    n_updates       | 691      |
---------------------------------
