In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

In [2]:
from ultralytics import YOLO
import cv2
import torch

In [3]:
print(torch.cuda.is_available())

True


### Modèle de détection 

In [48]:
import cv2
import torch
import pyttsx3
import threading
import queue
import time
from ultralytics import YOLO

# ================== CONFIGURATION ==================
class Config:
    MODEL_PATH = "model_after_data3.pt"
    VIDEO_PATH = "video2.mp4"  # Mettre ici le chemin de la vidéo
    KNOWN_WIDTH = 0.5  # En mètres (largeur estimée de l'objet)
    FOCAL_LENGTH = 1000  # A ajuster selon la caméra
    ALERT_DISTANCE_MIN = 0.5
    ALERT_DISTANCE_MAX = 4.0


# ================== INITIALISATIONS ==================
model = YOLO(Config.MODEL_PATH)
engine = pyttsx3.init()
alert_queue = queue.Queue()
last_alert_message_added = ""


# ================== ESTIMATION DE DISTANCE ==================
KNOWN_OBJECT_HEIGHTS = {
    "person": 1.7,         
    "bicycle": 1.2,        
    "car": 1.5,             
    "motorcycle": 1.4,     
    "airplane": 10.0,       
    "bus": 3.0,            
    "train": 4.0,           
    "truck": 3.0,          
    "boat": 2.0,           
    "traffic light": 3.0,   
    "fire hydrant": 1.0,    
    "stop sign": 0.75,     
    "parking meter": 1.0,  
    "bench": 1.0,          
    "bird": 0.3,           
    "cat": 0.4,            
    "dog": 0.6,             
    "horse": 1.5,           
    "sheep": 1.0,          
    "cow": 1.5,           
    "elephant": 3.0,       
    "bear": 1.2,          
    "zebra": 1.2,         
    "giraffe": 5.0,         
    "backpack": 0.5,      
    "umbrella": 1.0,       
    "handbag": 0.3,        
    "tie": 0.3,             
    "suitcase": 0.6,       
    "frisbee": 0.2,      
    "skis": 1.8,          
    "snowboard": 1.2,       
    "sports ball": 0.25,    
    "kite": 1.0,            
    "baseball bat": 1.0,   
    "baseball glove": 0.3,  
    "skateboard": 0.2,      
    "surfboard": 2.0,      
    "tennis racket": 0.5,   
    "bottle": 0.3,        
    "wine glass": 0.25,     
    "cup": 0.2,            
    "fork": 0.2,            
    "knife": 0.25,        
    "spoon": 0.2,          
    "bowl": 0.3,           
    "banana": 0.2,         
    "apple": 0.2,         
    "sandwich": 0.1,       
    "orange": 0.2,          
    "broccoli": 0.3,       
    "carrot": 0.3,          
    "hot dog": 0.1,        
    "pizza": 0.3,         
    "donut": 0.2,        
    "cake": 0.3,        
    "chair": 1.0,         
    "couch": 1.0,          
    "potted plant": 0.5,    
    "bed": 1.0,             
    "dining table": 1.0,    
    "toilet": 0.75,         
    "tv": 1.0,             
    "laptop": 0.3,          
    "mouse": 0.05,          
    "remote": 0.1,        
    "keyboard": 0.2,      
    "cell phone": 0.2,    
    "microwave": 0.5,     
    "oven": 1.0,          
    "toaster": 0.3,        
    "sink": 0.75,          
    "refrigerator": 1.8,    
    "book": 0.25,          
    "clock": 0.3,          
    "vase": 0.3,            
    "scissors": 0.2,     
    "teddy bear": 0.4,    
    "hair drier": 0.25,     
    "toothbrush": 0.2,      
}

def estimate_distance_perspective(box_height, y_bottom, frame_height, label, focal_length=1000):
    """
    Estime la distance avec prise en compte de la perspective et de la hauteur réelle de l’objet.
    
    box_height : hauteur de la bounding box (en pixels)
    y_bottom : coordonnée Y du bas de la box (plus bas dans l'image = plus proche)
    frame_height : hauteur totale de l’image (pixels)
    label : nom de l’objet détecté
    focal_length : focale estimée (à ajuster)
    """
    # Obtenir la hauteur réelle selon le label
    known_height = KNOWN_OBJECT_HEIGHTS.get(label, 1.0)

    if box_height <= 0:
        return -1

    # Distance brute classique
    distance = (known_height * focal_length) / box_height

    # Correction selon la perspective (plus bas = plus proche)
    perspective_ratio = 1 - (y_bottom / frame_height)
    correction = 1 + (perspective_ratio * 0.5)  # corrige jusqu'à 50% selon la hauteur

    corrected_distance = distance * correction

    # Clamp à une plage réaliste
    corrected_distance = max(0.1, min(corrected_distance, 20.0))
    return round(corrected_distance, 2)

# ================== SYNTHÈSE VOCALE ==================
def speak_alert():
    while True:
        message = alert_queue.get()
        if message == "exit":
            break
        engine.say(message)
        engine.runAndWait()


# ================== TRAITEMENT DES DÉTECTIONS ==================
LAST_ALERT_TIME = 0  # Variable pour suivre le dernier temps d'alerte

def process_detection(results, frame, frame_width):
    global last_alert_message_added, LAST_ALERT_TIME
    obstacles = []
    current_time = time.time()  # Temps actuel en secondes

    for result in results:
        boxes = result.boxes
        if boxes is None or boxes.xyxy is None:
            continue

        for i in range(len(boxes.xyxy)):
            x1, y1, x2, y2 = map(int, boxes.xyxy[i])
            # Vérifier que 'cls' est un indice valide
            cls = int(boxes.cls[i])

            # S'assurer que 'cls' est dans les limites de 'model.names'
            if cls < len(model.names):
                label = model.names[cls]
            else:
                label = "Classe inconnue"  # Ou une autre valeur par défaut


            # Calcul de la hauteur de la boîte
            box_height = y2 - y1
            y_bottom = y2  # Position verticale du bas de la boîte
            frame_height = frame.shape[0]

            # Estimation de la distance en utilisant la perspective et la taille du label
            distance = estimate_distance_perspective(
                box_height=box_height,
                y_bottom=y_bottom,
                frame_height=frame_height,
                label=label
            )

            # Filtrer les objets selon la distance (entre ALERT_DISTANCE_MIN et ALERT_DISTANCE_MAX)
            if Config.ALERT_DISTANCE_MIN < distance < Config.ALERT_DISTANCE_MAX:
                # Calcul de la position de l'objet (gauche, droite ou devant)
                center_x = (x1 + x2) // 2
                position = (
                    "à gauche" if center_x < frame_width // 3
                    else "à droite" if center_x > (2 * frame_width) // 3
                    else "devant"
                )

                # Ajouter l'obstacle à la liste
                obstacles.append((distance, position, label, x1, y1, x2, y2))

    # Trier les obstacles par distance (les plus proches d'abord)
    obstacles.sort(key=lambda x: x[0])

    # Si des obstacles sont détectés
    if obstacles:
        _, position, label, _, _, _, _ = obstacles[0]  # Le plus proche
        alert_message = f"{label} {position}"
        print(alert_message)

        # Vérifier que l'alerte n'a pas été donnée trop récemment
        if alert_message != last_alert_message_added and (current_time - LAST_ALERT_TIME) > 0.5:
            alert_queue.put(alert_message)
            last_alert_message_added = alert_message
            LAST_ALERT_TIME = current_time  # Mettre à jour le temps de la dernière alerte
    else:
        last_alert_message_added = ""

    return obstacles


# ================== FONCTION PRINCIPALE ==================
def main():
    cap = cv2.VideoCapture(Config.VIDEO_PATH)
    if not cap.isOpened():
        print("❌ Erreur : Impossible d'ouvrir la vidéo")
        return

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

    alert_thread = threading.Thread(target=speak_alert, daemon=True)
    alert_thread.start()

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)
        obstacles = process_detection(results, frame, frame_width)

        for distance, position, label, x1, y1, x2, y2 in obstacles:
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(
                frame,
                f"{label} {distance:.2f}m ({position})",
                (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0, 255, 0),
                2,
            )

        cv2.imshow("Détection d'obstacles", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    alert_queue.put("exit")
    cap.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    main()


0: 384x640 7 persons, 1 vehicle, 9.0ms
Speed: 1.5ms preprocess, 9.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)


Exception in thread Thread-123 (speak_alert):
Traceback (most recent call last):
  File "c:\Users\adela\anaconda3\Lib\threading.py", line 1075, in _bootstrap_inner
    self.run()
  File "c:\Users\adela\anaconda3\Lib\threading.py", line 1012, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\adela\AppData\Local\Temp\ipykernel_18732\26090170.py", line 146, in speak_alert
  File "c:\Users\adela\anaconda3\Lib\site-packages\pyttsx3\engine.py", line 180, in runAndWait
    raise RuntimeError('run loop already started')
RuntimeError: run loop already started


person à droite

0: 384x640 8 persons, 1 vehicle, 11.4ms
Speed: 1.4ms preprocess, 11.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)
person à droite

0: 384x640 6 persons, 8.8ms
Speed: 2.4ms preprocess, 8.8ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)
person à droite

0: 384x640 1 barrier, 1 bike, 8 persons, 10.3ms
Speed: 1.3ms preprocess, 10.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)
person à droite

0: 384x640 1 barrier, 1 bike, 8 persons, 10.7ms
Speed: 1.7ms preprocess, 10.7ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)
person à droite

0: 384x640 1 bike, 8 persons, 7.6ms
Speed: 1.7ms preprocess, 7.6ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)
person à droite

0: 384x640 1 bike, 7 persons, 7.7ms
Speed: 1.2ms preprocess, 7.7ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
person devant

0: 384x640 1 bike, 8 persons, 5.9ms
Speed: 1.1ms preprocess, 5.9ms i