In [None]:
# Cellule 1: Imports et Configuration globale
import pygame
import numpy as np
import math
import random

# Configuration de l'√©cran
SCREEN_WIDTH = 1200
SCREEN_HEIGHT = 800
FPS = 60

# Palette de couleurs
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
BLUE = (0, 100, 255)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
ORANGE = (255, 165, 0)
YELLOW = (255, 255, 0)
DARK_RED = (139, 0, 0)
PURPLE = (128, 0, 128)
CYAN = (0, 255, 255)

print("‚úì Configuration charg√©e")
print(f"R√©solution: {SCREEN_WIDTH}x{SCREEN_HEIGHT}")


pygame 2.6.1 (SDL 2.28.4, Python 3.12.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
‚úì Configuration charg√©e
R√©solution: 1200x800


  from pkg_resources import resource_stream, resource_exists


In [2]:
# Cellule 2: D√©finition des bou√©es (obstacles)
class Buoy:
    """
    Repr√©sente une bou√©e obstacle dans l'environnement
    Utilis√© pour la d√©tection de collision
    """
    
    def __init__(self, x, y, radius=15, color=ORANGE):
        self.x = x
        self.y = y
        self.radius = radius
        self.color = color
        self.collision = False  # Flag pour visualisation collision
        
    def draw(self, screen):
        """Affichage de la bou√©e"""
        current_color = DARK_RED if self.collision else self.color
        
        # Cercle principal
        pygame.draw.circle(screen, current_color, (int(self.x), int(self.y)), self.radius)
        pygame.draw.circle(screen, BLACK, (int(self.x), int(self.y)), self.radius, 2)
        
        # Point central
        pygame.draw.circle(screen, WHITE, (int(self.x), int(self.y)), 3)
        
    def check_collision_with_boat(self, boat):
        """
        D√©tection de collision cercle-rectangle
        Retourne True si collision d√©tect√©e
        """
        # Test rapide: distance entre centres
        dx = self.x - boat.x
        dy = self.y - boat.y
        distance_center = math.sqrt(dx**2 + dy**2)
        
        if distance_center < self.radius + boat.width / 2:
            self.collision = True
            return True
        
        # Test pr√©cis: distance aux coins du bateau
        corners = boat.get_corners()
        for corner in corners:
            dx = self.x - corner[0]
            dy = self.y - corner[1]
            distance = math.sqrt(dx**2 + dy**2)
            
            if distance < self.radius:
                self.collision = True
                return True
        
        self.collision = False
        return False
    
    def get_position(self):
        """Retourne (x, y, radius) pour int√©gration path planning"""
        return (self.x, self.y, self.radius)

print("‚úì Classe Buoy d√©finie")


‚úì Classe Buoy d√©finie


In [3]:
# Cellule 3: Points de d√©part et d'arriv√©e
class Waypoint:
    """
    Repr√©sente un point de navigation (START ou GOAL)
    Utilis√© pour d√©finir les objectifs de navigation
    """
    
    def __init__(self, x, y, waypoint_type='goal', radius=25):
        self.x = x
        self.y = y
        self.radius = radius
        self.type = waypoint_type  # 'start' ou 'goal'
        self.reached = False
        
    def draw(self, screen, font):
        """Affichage du waypoint avec label"""
        if self.type == 'start':
            color = GREEN
            text = 'S'
            inner_color = (0, 200, 0)
        else:
            color = RED if not self.reached else CYAN
            text = 'G'
            inner_color = (200, 0, 0) if not self.reached else (0, 200, 200)
        
        # Cercles concentriques
        pygame.draw.circle(screen, color, (int(self.x), int(self.y)), self.radius)
        pygame.draw.circle(screen, inner_color, (int(self.x), int(self.y)), self.radius - 5)
        pygame.draw.circle(screen, BLACK, (int(self.x), int(self.y)), self.radius, 3)
        
        # Label
        text_surface = font.render(text, True, WHITE)
        text_rect = text_surface.get_rect(center=(int(self.x), int(self.y)))
        screen.blit(text_surface, text_rect)
        
    def check_reached(self, boat, tolerance=30):
        """V√©rifie si le bateau a atteint ce waypoint"""
        dx = self.x - boat.x
        dy = self.y - boat.y
        distance = math.sqrt(dx**2 + dy**2)
        
        if distance < tolerance:
            self.reached = True
            return True
        return False
    
    def get_distance(self, boat):
        """Distance euclidienne au bateau"""
        dx = self.x - boat.x
        dy = self.y - boat.y
        return math.sqrt(dx**2 + dy**2)

print("‚úì Classe Waypoint d√©finie")


‚úì Classe Waypoint d√©finie


In [4]:
# Cellule 4: Mod√®le cin√©matique du bateau diff√©rentiel
class DifferentialBoat:
    """
    Bateau avec entra√Ænement diff√©rentiel (2 roues)
    Impl√©mente la cin√©matique: v = (v_r + v_l)/2, œâ = (v_r - v_l)/L
    """
    
    def __init__(self, x, y, theta, wheel_base=40, wheel_radius=10):
        # √âtat courant
        self.x = x
        self.y = y
        self.theta = theta  # Orientation en radians
        
        # √âtat initial (pour reset)
        self.initial_x = x
        self.initial_y = y
        self.initial_theta = theta
        
        # Param√®tres physiques
        self.wheel_base = wheel_base      # Distance entre roues (L)
        self.wheel_radius = wheel_radius  # Rayon des roues (r)
        
        # Commandes (vitesses angulaires des roues)
        self.omega_left = 0.0
        self.omega_right = 0.0
        
        # Vitesses du robot
        self.v = 0.0      # Vitesse lin√©aire
        self.omega = 0.0  # Vitesse angulaire
        
        # Dimensions pour affichage
        self.length = 50
        self.width = 30
        
        # Trajectoire
        self.trail = []
        self.max_trail_length = 300
        
        # M√©triques
        self.collision_count = 0
        self.total_distance = 0.0
        
    def set_wheel_velocities(self, omega_left, omega_right):
        """D√©finit les commandes (vitesses angulaires des roues)"""
        self.omega_left = omega_left
        self.omega_right = omega_right
        
    def update_kinematics(self, dt):
        """
        Mise √† jour de la cin√©matique diff√©rentielle
        √âquations:
        - v_wheel = r * œâ_wheel
        - v = (v_right + v_left) / 2
        - œâ = (v_right - v_left) / L
        - dx/dt = v * cos(Œ∏)
        - dy/dt = v * sin(Œ∏)
        - dŒ∏/dt = œâ
        """
        # Vitesses lin√©aires des roues
        v_left = self.wheel_radius * self.omega_left
        v_right = self.wheel_radius * self.omega_right
        
        # Vitesses du robot
        self.v = (v_right + v_left) / 2.0
        self.omega = (v_right - v_left) / self.wheel_base
        
        # Tracking distance
        distance_step = abs(self.v * dt)
        self.total_distance += distance_step
        
        # Int√©gration (m√©thode d'Euler)
        self.x += self.v * math.cos(self.theta) * dt
        self.y += self.v * math.sin(self.theta) * dt
        self.theta += self.omega * dt
        
        # Normalisation de l'angle [-œÄ, œÄ]
        self.theta = (self.theta + math.pi) % (2 * math.pi) - math.pi
        
        # Mise √† jour trajectoire
        self.trail.append((int(self.x), int(self.y)))
        if len(self.trail) > self.max_trail_length:
            self.trail.pop(0)
    
    def reset_to_start(self):
        """R√©initialise le bateau √† sa position initiale"""
        self.x = self.initial_x
        self.y = self.initial_y
        self.theta = self.initial_theta
        self.trail = []
        self.collision_count = 0
        self.total_distance = 0.0
        self.omega_left = 0.0
        self.omega_right = 0.0
        self.v = 0.0
        self.omega = 0.0
    
    def get_corners(self):
        """Calcule les 4 coins du rectangle repr√©sentant le bateau"""
        corners = [
            (-self.length / 2, -self.width / 2),
            (self.length / 2, -self.width / 2),
            (self.length / 2, self.width / 2),
            (-self.length / 2, self.width / 2)
        ]
        
        # Rotation + translation
        rotated_corners = []
        for corner in corners:
            x_rot = corner[0] * math.cos(self.theta) - corner[1] * math.sin(self.theta)
            y_rot = corner[0] * math.sin(self.theta) + corner[1] * math.cos(self.theta)
            rotated_corners.append((self.x + x_rot, self.y + y_rot))
        
        return rotated_corners
    
    def draw(self, screen):
        """Affichage du bateau et de sa trajectoire"""
        # Trajectoire
        if len(self.trail) > 1:
            pygame.draw.lines(screen, (100, 200, 100), False, self.trail, 2)
        
        # Corps du bateau
        corners = self.get_corners()
        pygame.draw.polygon(screen, BLUE, corners)
        pygame.draw.polygon(screen, BLACK, corners, 2)
        
        # Indicateur de direction (avant)
        front_x = self.x + (self.length / 2) * math.cos(self.theta)
        front_y = self.y + (self.length / 2) * math.sin(self.theta)
        pygame.draw.circle(screen, RED, (int(front_x), int(front_y)), 5)
        
    def get_state(self):
        """Retourne l'√©tat complet (utile pour RL)"""
        return {
            'x': self.x,
            'y': self.y,
            'theta': self.theta,
            'v': self.v,
            'omega': self.omega,
            'omega_left': self.omega_left,
            'omega_right': self.omega_right,
            'collision_count': self.collision_count,
            'total_distance': self.total_distance
        }

print("‚úì Classe DifferentialBoat d√©finie")


‚úì Classe DifferentialBoat d√©finie


In [5]:
# Cellule 5: Environnement de simulation complet
class BoatSimulation:
    """
    Gestionnaire de la simulation compl√®te
    Int√®gre: bateau, bou√©es, waypoints, collisions, rendering
    Mission: Retour au point de d√©part
    """
    
    def __init__(self):
        pygame.init()
        self.screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
        pygame.display.set_caption("Simulation Bateau Diff√©rentiel - Mission Circulaire")
        self.clock = pygame.time.Clock()
        self.font = pygame.font.Font(None, 24)
        self.large_font = pygame.font.Font(None, 48)
        
        # Waypoints - Mission circulaire (retour au d√©part)
        self.start_point = Waypoint(100, 400, 'start')
        self.goal_point = Waypoint(self.start_point.x, self.start_point.y, 'goal')
        
        # Bateau initialis√© au start
        self.boat = DifferentialBoat(
            x=self.start_point.x,
            y=self.start_point.y,
            theta=0
        )
        
        # === CONFIGURATION DES BOU√âES ICI ===
        self.buoys = []
        # Exemple: une seule bou√©e
        self.buoys.append(Buoy(x=600, y=400, radius=20))
        
        # OU g√©n√©rer al√©atoirement:
        # self.generate_buoys(num_buoys=8)
        
        # √âtat de la simulation
        self.running = True
        self.paused = False
        self.goal_reached = False
        self.setting_goal = False
        
        # === Mission circulaire: Flag pour √©viter victoire imm√©diate ===
        self.mission_started = False  # Mission commence apr√®s s'√™tre √©loign√© du start
        self.min_distance_from_start = 100  # Distance minimale pour valider d√©part
        
    def generate_buoys(self, num_buoys=8):
        """G√©n√®re des bou√©es al√©atoires en √©vitant start, goal et bateau"""
        self.buoys = []
        
        for _ in range(num_buoys):
            valid_position = False
            attempts = 0
            
            while not valid_position and attempts < 100:
                x = random.randint(50, SCREEN_WIDTH - 50)
                y = random.randint(50, SCREEN_HEIGHT - 50)
                
                # V√©rifier distances minimales
                dist_start = math.sqrt((x - self.start_point.x)**2 + (y - self.start_point.y)**2)
                dist_goal = math.sqrt((x - self.goal_point.x)**2 + (y - self.goal_point.y)**2)
                dist_boat = math.sqrt((x - self.boat.x)**2 + (y - self.boat.y)**2)
                
                if dist_start > 80 and dist_goal > 80 and dist_boat > 80:
                    valid_position = True
                
                attempts += 1
            
            if valid_position:
                radius = random.randint(12, 18)
                self.buoys.append(Buoy(x, y, radius))
        
    def handle_input(self):
        """Contr√¥le manuel du bateau par clavier"""
        keys = pygame.key.get_pressed()
        
        if self.paused or self.goal_reached:
            return
        
        omega_left = 0.0
        omega_right = 0.0
        
        # Contr√¥le diff√©rentiel
        if keys[pygame.K_UP]:
            omega_left = 5.0
            omega_right = 5.0
        elif keys[pygame.K_DOWN]:
            omega_left = -5.0
            omega_right = -5.0
            
        if keys[pygame.K_LEFT]:
            omega_left -= 3.0
            omega_right += 3.0
        elif keys[pygame.K_RIGHT]:
            omega_left += 3.0
            omega_right -= 3.0
        
        self.boat.set_wheel_velocities(omega_left, omega_right)
        
    def check_collisions(self):
        """V√©rifie toutes les collisions bateau-bou√©es"""
        collision_detected = False
        
        for buoy in self.buoys:
            if buoy.check_collision_with_boat(self.boat):
                collision_detected = True
                
        if collision_detected:
            self.boat.collision_count += 1
            
        return collision_detected
    
    def check_goal_reached(self):
        """
        V√©rifie si le goal est atteint (seulement apr√®s s'√™tre √©loign√© du d√©part)
        Phase 1: S'√©loigner du d√©part (min_distance_from_start)
        Phase 2: Revenir au d√©part pour atteindre le goal
        """
        distance_to_start = self.start_point.get_distance(self.boat)
        
        # √âtape 1: V√©rifier si la mission a commenc√© (s'√©loigner du d√©part)
        if not self.mission_started:
            if distance_to_start > self.min_distance_from_start:
                self.mission_started = True
                print(f"‚úÖ Mission commenc√©e! (distance du d√©part: {distance_to_start:.1f} px)")
            return  # Ne pas v√©rifier le goal tant que pas parti
        
        # √âtape 2: Une fois parti, v√©rifier si on est revenu au goal
        if self.goal_point.check_reached(self.boat):
            if not self.goal_reached:
                self.goal_reached = True
                state = self.boat.get_state()
                print(f"\nüéâ MISSION ACCOMPLIE! Retour au d√©part r√©ussi!")
                print(f"   Distance parcourue: {state['total_distance']:.1f} px")
                print(f"   Collisions: {state['collision_count']}")
        
    def draw_info(self):
        """Affiche les informations de debug/t√©l√©m√©trie"""
        state = self.boat.get_state()
        distance_to_start = self.start_point.get_distance(self.boat)
        
        # Statut mission
        if not self.mission_started:
            mission_status = f"D√©part - S'√©loigner ({distance_to_start:.0f}/{self.min_distance_from_start:.0f}px)"
        else:
            mission_status = "En cours - Revenir au d√©part"
        
        info_texts = [
            f"Mission: {mission_status}",
            f"Position: ({state['x']:.1f}, {state['y']:.1f})",
            f"Angle: {math.degrees(state['theta']):.1f}¬∞",
            f"Vitesse: {state['v']:.2f} px/s",
            f"Distance au d√©part: {distance_to_start:.1f} px",
            f"Distance totale: {state['total_distance']:.1f} px",
            f"Collisions: {state['collision_count']}",
            "",
            "Contr√¥les:",
            "‚Üë/‚Üì: Avancer/Reculer",
            "‚Üê/‚Üí: Tourner",
            "R: Reset | G: Nouveau goal (clic)",
            "N: Nouveaux obstacles",
            "SPACE: Pause | ESC: Quitter"
        ]
        
        y_offset = 10
        for text in info_texts:
            surface = self.font.render(text, True, BLACK)
            self.screen.blit(surface, (10, y_offset))
            y_offset += 25
            
    def draw_success_message(self):
        """Overlay de succ√®s quand goal atteint"""
        if self.goal_reached:
            # Overlay vert semi-transparent
            overlay = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
            overlay.set_alpha(128)
            overlay.fill((0, 255, 0))
            self.screen.blit(overlay, (0, 0))
            
            # Message principal
            success_text = self.large_font.render("MISSION ACCOMPLIE!", True, WHITE)
            text_rect = success_text.get_rect(center=(SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2))
            self.screen.blit(success_text, text_rect)
            
            # Instructions
            restart_text = self.font.render("Appuyez sur R pour recommencer", True, WHITE)
            restart_rect = restart_text.get_rect(center=(SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2 + 50))
            self.screen.blit(restart_text, restart_rect)
            
    def reset(self):
        """R√©initialise l'√©pisode de navigation"""
        self.boat.reset_to_start()
        self.goal_reached = False
        self.goal_point.reached = False
        self.mission_started = False  # R√©initialiser le flag de mission
        for buoy in self.buoys:
            buoy.collision = False
    
    def run(self):
        """Boucle principale de simulation"""
        while self.running:
            dt = self.clock.tick(FPS) / 1000.0  # Delta time en secondes
            
            # === √âv√©nements ===
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    self.running = False
                elif event.type == pygame.KEYDOWN:
                    if event.key == pygame.K_ESCAPE:
                        self.running = False
                    elif event.key == pygame.K_r:
                        self.reset()
                    elif event.key == pygame.K_g:
                        self.setting_goal = True
                        print("Cliquez pour placer le nouveau goal")
                    elif event.key == pygame.K_n:
                        self.generate_buoys(random.randint(6, 12))
                        self.reset()
                    elif event.key == pygame.K_SPACE:
                        self.paused = not self.paused
                
                elif event.type == pygame.MOUSEBUTTONDOWN and self.setting_goal:
                    mouse_x, mouse_y = pygame.mouse.get_pos()
                    self.goal_point = Waypoint(mouse_x, mouse_y, 'goal')
                    self.setting_goal = False
                    self.reset()
                    print(f"Nouveau goal: ({mouse_x}, {mouse_y})")
            
            # === Update ===
            if not self.paused and not self.goal_reached:
                self.handle_input()
                self.boat.update_kinematics(dt)
                self.check_collisions()
                self.check_goal_reached()
            
            # === Rendering ===
            self.screen.fill(WHITE)
            
            # Waypoints - Afficher seulement le start (goal = start)
            self.start_point.draw(self.screen, self.font)
            
            # Obstacles
            for buoy in self.buoys:
                buoy.draw(self.screen)
            
            # Bateau
            self.boat.draw(self.screen)
            
            # UI
            self.draw_info()
            
            if self.paused:
                pause_text = self.font.render("PAUSE", True, RED)
                self.screen.blit(pause_text, (SCREEN_WIDTH // 2 - 30, 10))
            
            if self.setting_goal:
                goal_text = self.font.render("Cliquez pour placer le GOAL", True, PURPLE)
                self.screen.blit(goal_text, (SCREEN_WIDTH // 2 - 120, 10))
            
            self.draw_success_message()
            
            pygame.display.flip()
        
        pygame.quit()

print("‚úì Classe BoatSimulation d√©finie (Mission circulaire)")


‚úì Classe BoatSimulation d√©finie (Mission circulaire)


In [6]:
# Cellule 6: Ex√©cution de la simulation
# ‚ö†Ô∏è Cette cellule bloque jusqu'√† fermeture de la fen√™tre Pygame

if __name__ == "__main__":
    sim = BoatSimulation()
    sim.run()
    print("\n‚úì Simulation termin√©e")



‚úì Simulation termin√©e


In [7]:
# ============================================================================
# CELLULE 6: Environnement RL OPTIMIS√â - Approche progressive
# ============================================================================

import gymnasium as gym
from gymnasium import spaces
import numpy as np

class BoatNavigationEnv(gym.Env):
    '''
    Environnement simplifi√© avec r√©compenses progressives claires
    '''
    metadata = {'render_modes': ['human'], 'render_fps': 60}
    
    def __init__(self, render_mode=None):
        super().__init__()
        
        # Actions: vitesses des roues
        self.action_space = spaces.Box(
            low=np.array([-5.0, -5.0]),
            high=np.array([5.0, 5.0]),
            dtype=np.float32
        )
        
        # Observation: 8 valeurs
        self.observation_space = spaces.Box(
            low=-np.ones(8),
            high=np.ones(8),
            dtype=np.float32
        )
        
        # Positions
        self.start_position = np.array([100.0, 400.0])
        self.buoy_position = np.array([600.0, 400.0])
        
        # Point de passage derri√®re la bou√©e
        self.waypoint_position = np.array([800.0, 400.0])
        
        # Cr√©er le bateau
        self.boat = DifferentialBoat(
            x=self.start_position[0],
            y=self.start_position[1],
            theta=0
        )
        
        # Cr√©er la bou√©e
        self.buoy = Buoy(x=self.buoy_position[0], y=self.buoy_position[1], radius=25)
        
        # √âtats de la mission (3 phases)
        self.phase = 0  # 0: aller au waypoint, 1: retour, 2: termin√©
        self.steps = 0
        self.max_steps = 1000
        
        self.waypoint_reached = False
        self.goal_reached = False
        
        # R√©compenses cumul√©es (pour debug)
        self.episode_reward = 0
        
        self.render_mode = render_mode
        
    def _get_observation(self):
        '''Observation normalis√©e et claire'''
        boat_pos = np.array([self.boat.x, self.boat.y])
        
        # Objectif selon la phase
        if self.phase == 0:
            target = self.waypoint_position
        else:
            target = self.start_position
        
        # Vecteur vers l'objectif
        vec_to_target = target - boat_pos
        dist_to_target = np.linalg.norm(vec_to_target)
        angle_to_target = math.atan2(vec_to_target[1], vec_to_target[0])
        relative_angle = self._normalize_angle(angle_to_target - self.boat.theta)
        
        # Distance √† la bou√©e
        dist_to_buoy = np.linalg.norm(self.buoy_position - boat_pos)
        
        # Observation normalis√©e
        obs = np.array([
            boat_pos[0] / 600.0 - 1,           # Position X normalis√©e [-1, 1]
            boat_pos[1] / 400.0 - 1,           # Position Y normalis√©e [-1, 1]
            np.cos(self.boat.theta),           # Orientation (cos)
            np.sin(self.boat.theta),           # Orientation (sin)
            dist_to_target / 1000.0,           # Distance √† l'objectif
            relative_angle / np.pi,            # Angle vers l'objectif [-1, 1]
            self.boat.v / 50.0,                # Vitesse
            float(self.phase)                  # Phase de la mission
        ], dtype=np.float32)
        
        return np.clip(obs, -1, 1)
    
    def _normalize_angle(self, angle):
        '''Normalise un angle entre -œÄ et œÄ'''
        return (angle + math.pi) % (2 * math.pi) - math.pi
    
    def _compute_reward(self, collision):
        '''R√©compense SIMPLE et CLAIRE'''
        reward = 0.0
        
        boat_pos = np.array([self.boat.x, self.boat.y])
        dist_buoy = np.linalg.norm(self.buoy_position - boat_pos)
        
        # PHASE 0: Aller au waypoint derri√®re la bou√©e
        if self.phase == 0:
            dist_waypoint = np.linalg.norm(self.waypoint_position - boat_pos)
            
            # R√©compense proportionnelle: plus on est proche, mieux c'est
            # Distance initiale ‚âà 700px, on veut aller vers 0
            reward = (700 - dist_waypoint) / 100.0
            
            # Bonus pour avancer vite
            reward += self.boat.v * 0.1
            
            # V√©rifier si waypoint atteint
            if dist_waypoint < 50 and not self.waypoint_reached:
                self.waypoint_reached = True
                self.phase = 1
                reward += 500.0  # GROS bonus pour avoir atteint le waypoint
                print(f"‚úÖ Waypoint atteint! Retour au d√©part. Steps: {self.steps}")
        
        # PHASE 1: Retour au d√©part
        elif self.phase == 1:
            dist_start = np.linalg.norm(self.start_position - boat_pos)
            
            # R√©compense proportionnelle
            # Distance initiale ‚âà 700px
            reward = (700 - dist_start) / 100.0
            
            # Bonus pour avancer vite
            reward += self.boat.v * 0.1
            
            # V√©rifier si arriv√© au d√©part
            if dist_start < 40 and not self.goal_reached:
                self.goal_reached = True
                self.phase = 2
                reward += 10000.0  # √âNORME bonus pour mission accomplie
                print(f"üéâ MISSION ACCOMPLIE! Total steps: {self.steps}")
        
        # P√âNALIT√â pour collision avec la bou√©e
        if collision:
            reward = -50000.0
        
        # P√©nalit√© si trop proche de la bou√©e (zone de danger)
        if dist_buoy < 35:
            reward -= 100.0
        
        # Petite p√©nalit√© temporelle (encourage efficacit√©)
        reward -= 10
        
        return reward
    
    def reset(self, seed=None, options=None):
        '''Reset l'environnement'''
        super().reset(seed=seed)
        
        # Reset bateau
        self.boat.x = self.start_position[0]
        self.boat.y = self.start_position[1]
        self.boat.theta = 0
        self.boat.v = 0.0
        self.boat.omega = 0.0
        self.boat.omega_left = 0.0
        self.boat.omega_right = 0.0
        self.boat.trail = []
        
        # Reset mission
        self.phase = 0
        self.waypoint_reached = False
        self.goal_reached = False
        self.steps = 0
        self.episode_reward = 0
        
        return self._get_observation(), {}
    
    def step(self, action):
        '''Ex√©cute une action'''
        # Appliquer l'action
        omega_left, omega_right = np.clip(action, -5.0, 5.0)
        self.boat.set_wheel_velocities(float(omega_left), float(omega_right))
        self.boat.update_kinematics(1/60.0)
        
        self.steps += 1
        
        # V√©rifier collision
        collision = self.buoy.check_collision_with_boat(self.boat)
        
        # Calculer r√©compense
        reward = self._compute_reward(collision)
        self.episode_reward += reward
        
        # Conditions de terminaison
        terminated = self.goal_reached or collision
        truncated = self.steps >= self.max_steps
        
        # Sortie d'√©cran
        if (self.boat.x < 0 or self.boat.x > SCREEN_WIDTH or 
            self.boat.y < 0 or self.boat.y > SCREEN_HEIGHT):
            reward -= 10000.0
            terminated = True
        
        observation = self._get_observation()
        
        # Info
        info = {
            'phase': self.phase,
            'waypoint_reached': self.waypoint_reached,
            'goal_reached': self.goal_reached,
            'collision': collision,
            'episode_reward': self.episode_reward,
            'distance_to_buoy': np.linalg.norm(self.buoy_position - np.array([self.boat.x, self.boat.y]))
        }
        
        return observation, reward, terminated, truncated, info
    
    def render(self):
        pass
    
    def close(self):
        pass

print("‚úì Classe BoatNavigationEnv d√©finie (Version Optimale)")


‚úì Classe BoatNavigationEnv d√©finie (Version Optimale)


In [8]:
# ============================================================================
# CELLULE 7: Entra√Ænement OPTIMIS√â
# ============================================================================

from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# Cr√©er l'environnement
print("Cr√©ation de l'environnement...")
env = BoatNavigationEnv()

# V√©rifier
check_env(env)
print("‚úì Environnement valide!\n")

# PPO avec hyperparam√®tres optimis√©s pour ce probl√®me
print("Cr√©ation du mod√®le PPO...")
model = PPO(
    "MlpPolicy",
    env,
    learning_rate=0.03,
    n_steps=2048,
    batch_size=64,
    n_epochs=10,
    gamma=0.9,
    gae_lambda=0.95,
    clip_range=0.2,
    ent_coef=0.01,  # Encourage l'exploration
    verbose=1
)

# Entra√Æner
print("\nüöÄ D√©but de l'entra√Ænement (300k steps)...\n")
model.learn(total_timesteps=300000)

# Sauvegarder
model.save("boat_navigation_ppo")
print("\n‚úÖ Entra√Ænement termin√©!")


Cr√©ation de l'environnement...
‚úì Environnement valide!

Cr√©ation du mod√®le PPO...
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  gym.logger.warn(
  gym.logger.warn(



üöÄ D√©but de l'entra√Ænement (300k steps)...

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1e+03     |
|    ep_rew_mean     | -1.01e+04 |
| time/              |           |
|    fps             | 1735      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 2048      |
----------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 221       |
|    ep_rew_mean          | -1.17e+04 |
| time/                   |           |
|    fps                  | 1149      |
|    iterations           | 2         |
|    time_elapsed         | 3         |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 46.190575 |
|    clip_fraction        | 0.9       |
|    clip_range           | 0.2       |
|    entropy_loss         | -3.9      |
|    explained_variance   | -0.0004   |
|

In [9]:
# ============================================================================
# CELLULE 8: Test du mod√®le entra√Æn√© AVEC VISUALISATION (VERSION CORRIG√âE)
# ============================================================================

import pygame

# Charger le mod√®le
model = PPO.load("boat_navigation_ppo", env=env)

# Initialiser Pygame pour la visualisation
pygame.init()
screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
pygame.display.set_caption("RL Boat Navigation - Test du mod√®le")
clock = pygame.time.Clock()
font = pygame.font.Font(None, 24)
large_font = pygame.font.Font(None, 36)

# Tester sur plusieurs √©pisodes
print("\nüéÆ Test du mod√®le entra√Æn√© avec visualisation:\n")
episodes = 5
success_count = 0

for ep in range(episodes):
    obs, info = env.reset()
    done = False
    total_reward = 0
    steps = 0
    running = True
    
    print(f"\n--- √âpisode {ep+1}/{episodes} ---")
    
    while not done and running:
        # G√©rer les √©v√©nements Pygame
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
                done = True
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE:
                    running = False
                    done = True
                elif event.key == pygame.K_SPACE:
                    # Pause
                    paused = True
                    while paused:
                        for e in pygame.event.get():
                            if e.type == pygame.KEYDOWN and e.key == pygame.K_SPACE:
                                paused = False
        
        if not running:
            break
        
        # Pr√©dire l'action
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = env.step(action)
        total_reward += reward
        steps += 1
        done = terminated or truncated
        
        # === AFFICHAGE ===
        screen.fill(WHITE)
        
        # Dessiner le point de d√©part (vert)
        pygame.draw.circle(screen, GREEN, (int(env.start_position[0]), int(env.start_position[1])), 25)
        pygame.draw.circle(screen, (0, 200, 0), (int(env.start_position[0]), int(env.start_position[1])), 20)
        text_s = font.render('START', True, WHITE)
        text_rect = text_s.get_rect(center=(int(env.start_position[0]), int(env.start_position[1])))
        screen.blit(text_s, text_rect)
        
        # Dessiner le waypoint (point cible derri√®re la bou√©e) - CYAN
        pygame.draw.circle(screen, CYAN, (int(env.waypoint_position[0]), int(env.waypoint_position[1])), 20)
        pygame.draw.circle(screen, BLACK, (int(env.waypoint_position[0]), int(env.waypoint_position[1])), 20, 2)
        text_w = font.render('WP', True, BLACK)
        text_rect = text_w.get_rect(center=(int(env.waypoint_position[0]), int(env.waypoint_position[1])))
        screen.blit(text_w, text_rect)
        
        # Dessiner la bou√©e
        buoy_color = DARK_RED if info['collision'] else ORANGE
        pygame.draw.circle(screen, buoy_color, (int(env.buoy_position[0]), int(env.buoy_position[1])), 25)
        pygame.draw.circle(screen, BLACK, (int(env.buoy_position[0]), int(env.buoy_position[1])), 25, 2)
        pygame.draw.circle(screen, WHITE, (int(env.buoy_position[0]), int(env.buoy_position[1])), 3)
        
        # Dessiner la trajectoire du bateau
        if len(env.boat.trail) > 1:
            pygame.draw.lines(screen, (100, 200, 100), False, env.boat.trail, 2)
        
        # Dessiner le bateau
        corners = env.boat.get_corners()
        pygame.draw.polygon(screen, BLUE, corners)
        pygame.draw.polygon(screen, BLACK, corners, 2)
        
        # Indicateur de direction (avant du bateau)
        front_x = env.boat.x + (env.boat.length / 2) * math.cos(env.boat.theta)
        front_y = env.boat.y + (env.boat.length / 2) * math.sin(env.boat.theta)
        pygame.draw.circle(screen, RED, (int(front_x), int(front_y)), 5)
        
        # Phase texte
        phase_text = {
            0: "Phase 1 - Aller au waypoint",
            1: "Phase 2 - Retour au d√©part",
            2: "Mission termin√©e"
        }
        
        # Informations √† l'√©cran
        info_texts = [
            f"√âpisode: {ep+1}/{episodes}",
            f"Steps: {steps}",
            f"R√©compense: {total_reward:.2f}",
            f"{phase_text.get(info['phase'], 'Phase inconnue')}",
            f"Distance bou√©e: {info['distance_to_buoy']:.1f} px",
            f"Vitesse: {env.boat.v:.2f} px/s",
            f"Waypoint atteint: {'‚úÖ' if info['waypoint_reached'] else '‚ùå'}",
            f"Mission r√©ussie: {'‚úÖ' if info['goal_reached'] else '‚ùå'}",
            "",
            "SPACE: Pause | ESC: Quitter"
        ]
        
        y_offset = 10
        for text in info_texts:
            surface = font.render(text, True, BLACK)
            screen.blit(surface, (10, y_offset))
            y_offset += 25
        
        # Message de succ√®s
        if info['goal_reached']:
            overlay = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
            overlay.set_alpha(100)
            overlay.fill(GREEN)
            screen.blit(overlay, (0, 0))
            
            success_text = large_font.render("‚úÖ MISSION R√âUSSIE!", True, WHITE)
            text_rect = success_text.get_rect(center=(SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2))
            screen.blit(success_text, text_rect)
        
        # Message de collision
        if info['collision']:
            collision_text = large_font.render("‚ùå COLLISION!", True, RED)
            text_rect = collision_text.get_rect(center=(SCREEN_WIDTH // 2, 50))
            screen.blit(collision_text, text_rect)
        
        pygame.display.flip()
        clock.tick(60)  # 60 FPS
    
    if not running:
        break
    
    if info['goal_reached']:
        success_count += 1
    
    print(f"√âpisode {ep+1}: R√©compense={total_reward:.2f}, Steps={steps}, Succ√®s={'‚úÖ' if info['goal_reached'] else '‚ùå'}")
    
    # Pause de 2 secondes entre les √©pisodes
    pygame.time.wait(2000)

pygame.quit()
print(f"\nüìä Taux de r√©ussite: {success_count}/{episodes} ({100*success_count/episodes:.0f}%)")


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.

üéÆ Test du mod√®le entra√Æn√© avec visualisation:


--- √âpisode 1/5 ---
√âpisode 1: R√©compense=-51495.58, Steps=547, Succ√®s=‚ùå

--- √âpisode 2/5 ---
√âpisode 2: R√©compense=-51495.58, Steps=547, Succ√®s=‚ùå

--- √âpisode 3/5 ---
√âpisode 3: R√©compense=-51495.58, Steps=547, Succ√®s=‚ùå

--- √âpisode 4/5 ---
√âpisode 4: R√©compense=-51495.58, Steps=547, Succ√®s=‚ùå

--- √âpisode 5/5 ---
√âpisode 5: R√©compense=-51495.58, Steps=547, Succ√®s=‚ùå

üìä Taux de r√©ussite: 0/5 (0%)
