In [1]:
import cv2
from ultralytics import YOLO
import time
import numpy as np
import supervision as sv
import pyttsx3
import threading
import pygame # <<< For directional audio
import math # <<< For distance calculation

# --- 1. Pygame Audio Engine (Directional) ---
print("Initializing Audio (Pygame Mixer)...")
try:
    pygame.mixer.init(frequency=22050, size=-16, channels=2, buffer=4096)
    # ### MAKE SURE YOU HAVE THIS FILE ###
    alert_sound = pygame.mixer.Sound('danger.mp3') 
    print("Pygame Mixer ready. Loaded 'danger.mp3'.")
except pygame.error as e:
    print(f"Error initializing pygame.mixer or loading file: {e}")
    print("### PLEASE MAKE SURE 'danger.mp3' IS IN THE FOLDER ###")
    alert_sound = None

# --- 2. Text-to-Speech Engine ---
print("Initializing Text-to-Speech engine...")
tts_engine = pyttsx3.init()
tts_engine.setProperty('rate', 175)
tts_engine.setProperty('volume', 1.0)
print("TTS engine ready.")

# --- 3. Model & Supervision Components ---
print("Loading YOLOv8 model...")
model = YOLO('yolov8n.pt')
print("Model loaded.")

print("Initializing Supervision components...")
tracker = sv.ByteTrack()
box_annotator = sv.BoxAnnotator(thickness=2)
label_annotator = sv.LabelAnnotator(text_thickness=1, text_scale=0.5, text_color=sv.Color.BLACK)
print("Supervision components ready.")

personal_zone = None # (RENAMED) Will be defined on the first frame
personal_zone_annotator = None # (RENAMED) Will be defined on the first frame

# --- 4. Logic & State Variables ---
object_history = {} # {track_id: [cx, cy, area, approach_count, last_seen_frame]}
last_announced_state = "SAFE"
last_alert_time = 0
TTS_COOLDOWN = 3  # Cooldown in seconds between TTS alerts
is_speaking = False

# --- 5. Class Filtering ---
# 0: person, 1: bicycle, 2: car, 3: motorcycle, 5: bus, 6: train, 7: truck, 9: traffic light
TARGET_CLASS_IDS = [0, 1, 2, 3, 5, 6, 7, 9]
VEHICLE_CLASS_IDS = [1, 2, 3, 5, 6, 7]
CLASS_NAMES = model.model.names

# --- 6. Tunables ---
YOLO_CONF = 0.35              # Confidence threshold
MIN_MOVE_DIST = 4.0             # Pixels an object must move to be 'moving'
APPROACH_MULTIPLIER = 1.04      # 4% area growth to be 'approaching'
APPROACH_SUSTAIN_FRAMES = 3     # Must grow for this many frames
IOU_COLLISION_THRESHOLD = 0.05  # 10% overlap for IoU collision
STALE_TRACK_FRAMES = 30         # Drop tracks not seen in this many frames
STATE_STABLE_FRAMES = 3         # Require state to be stable for this long
PATH_ALIGNMENT_THRESHOLD = 200  # Max X-distance for secondary "approaching" check
CAUTION_X_DIST_MULTIPLIER = 1.75 # (NEW) Makes caution-zone 75% wider than danger-zone

# --- 7. Helper Functions ---
def say_alert(text_to_say):
    """Function to say text and manage speaking state."""
    global is_speaking
    try:
        tts_engine.say(text_to_say)
        tts_engine.runAndWait()
    except Exception as e:
        print(f"Error in TTS: {e}")
    finally:
        is_speaking = False

def play_panned_alert(car_center_x, frame_width):
    """Plays the 'danger.mp3' sound, panned left or right."""
    if not alert_sound:
        return
    
    pan = max(0.0, min(1.0, car_center_x / frame_width))
    left_volume = 1.0 - pan
    right_volume = pan
    
    try:
        channel = pygame.mixer.find_channel(True) 
        channel.set_volume(left_volume, right_volume)
        channel.play(alert_sound)
    except Exception as e:
        print(f"Error playing panned sound: {e}")

def is_collision(boxA, boxB, iou_threshold=0.02):
    """Check if two xyxy boxes intersect above an IoU threshold."""
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    inter_w = max(0, xB - xA)
    inter_h = max(0, yB - yA)
    inter_area = inter_w * inter_h
    if inter_area == 0:
        return False

    areaA = max(1, (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
    areaB = max(1, (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
    iou = inter_area / float(areaA + areaB - inter_area)
    return iou >= iou_threshold

def box_center_and_area(xyxy):
    cx = (xyxy[0] + xyxy[2]) / 2.0
    cy = (xyxy[1] + xyxy[3]) / 2.0
    area = max(1, (xyxy[2] - xyxy[0]) * (xyxy[3] - xyxy[1]))
    return cx, cy, area

# --- 8. Main Processing Loop ---
video_path = 'sample3.mp4' # ### CHECK YOUR VIDEO PATH ###
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print(f"Error: Could not open video file at {video_path}")
else:
    print("Starting improved video processing loop...")
    try:
        stable_state = last_announced_state
        stable_counter = 0
        frame_idx = 0

        while True:
            success, frame = cap.read()
            if not success:
                print("Video finished or failed to read a frame.")
                break

            frame_idx += 1
            # Get frame width/height for audio panning
            frame_height, frame_width, _ = frame.shape

           # --- NEW: Define Multi-Layered Proximity Zones (Tuned) ---
            if frame_idx == 1:
                print("Defining tuned multi-layered proximity zones...")
                
                # 1. DANGER ZONE (Red) - Your customized values
                danger_points = np.array([
                    (int(frame_width * 0.1), frame_height), # Bottom-left
                    (int(frame_width * 0.9), frame_height), # Bottom-right
                    (int(frame_width * 0.70), int(frame_height * 0.875)), # Top-right
                    (int(frame_width * 0.30), int(frame_height * 0.875))  # Top-left
                ], dtype=np.int32)
                
                danger_zone = sv.PolygonZone(polygon=danger_points)
                danger_zone_annotator = sv.PolygonZoneAnnotator(
                    zone=danger_zone, color=sv.Color.from_hex("#FF0000"), # Red
                    thickness=2, text_thickness=1, text_scale=0.5
                )

                # 2. CAUTION ZONE (Yellow) - Your customized values
                caution_points = np.array([
                    (int(frame_width * -0.2), frame_height), # Bottom-left
                    (int(frame_width * 1.2), frame_height), # Bottom-right
                    (int(frame_width * 0.80), int(frame_height * 0.75)), # Top-right
                    (int(frame_width * 0.20), int(frame_height * 0.75))  # Top-left
                ], dtype=np.int32)

                caution_zone = sv.PolygonZone(polygon=caution_points)
                caution_zone_annotator = sv.PolygonZoneAnnotator(
                    zone=caution_zone, color=sv.Color.from_hex("#FFFF00"), # Yellow
                    thickness=2, text_thickness=1, text_scale=0.5
                )

                # 3. AWARENESS ZONE (Green) - Your customized values
                awareness_points = np.array([
                    (int(frame_width * -0.5), frame_height), # Bottom-left
                    (int(frame_width * 1.5), frame_height), # Bottom-right
                    (int(frame_width * 0.90), int(frame_height * 0.65)), # Top-right
                    (int(frame_width * 0.10), int(frame_height * 0.65))  # Top-left
                ], dtype=np.int32)
                
                awareness_zone = sv.PolygonZone(polygon=awareness_points)
                awareness_zone_annotator = sv.PolygonZoneAnnotator(
                    zone=awareness_zone, color=sv.Color.from_hex("#00FF00"), # Green
                    thickness=2, text_thickness=1, text_scale=0.5
                )

                print("All 3 custom zones defined.")
            # --- END ---

            # 1. DETECT
            results = model(frame, conf=YOLO_CONF, verbose=False)[0]

            # 2. CONVERT & FILTER
            detections = sv.Detections.from_ultralytics(results)
            mask = np.isin(detections.class_id, TARGET_CLASS_IDS)
            detections = detections[mask]

            # 3. TRACK
            tracked_detections = tracker.update_with_detections(detections)

            # --- 4. ANALYZE ---
            labels = []
            pedestrians = []
            vehicles = [] # List to hold all vehicle dicts
            traffic_light_detected = False
            seen_track_ids = set()

            for xyxy, confidence, class_id, tracker_id in zip(
                tracked_detections.xyxy,
                tracked_detections.confidence,
                tracked_detections.class_id,
                tracked_detections.tracker_id
            ):
                xyxy = [float(x) for x in xyxy]
                seen_track_ids.add(tracker_id)
                class_name = CLASS_NAMES.get(class_id, "Unknown")
                cx, cy, area = box_center_and_area(xyxy)

                # --- State Calculation (FIXED) ---
                is_moving = False
                is_approaching = False
                approach_count = 0

                if tracker_id not in object_history:
                    # First time seeing this object
                    object_history[tracker_id] = [cx, cy, area, 0, frame_idx]
                else:
                    # Compare to previous state
                    prev_x, prev_y, prev_area, prev_approach_count, _ = object_history[tracker_id]
                    
                    movement_distance = math.sqrt((cx - prev_x)**2 + (cy - prev_y)**2)
                    is_moving = movement_distance > MIN_MOVE_DIST

                    if area > (prev_area * APPROACH_MULTIPLIER):
                        approach_count = min(10, prev_approach_count + 1) # Increment
                    else:
                        approach_count = max(0, prev_approach_count - 1) # Decrement
                    
                    is_approaching = approach_count >= APPROACH_SUSTAIN_FRAMES

                # Update history with new values
                object_history[tracker_id] = [cx, cy, area, approach_count, frame_idx]
                # --- End State Calculation ---

                # Build label text
                label_text = f"ID={tracker_id} {class_name} {confidence:.2f}"
                if is_moving:
                    label_text += " (Moving)"
                if is_approaching:
                    label_text += " (Approaching)"

                # Class-specific groupings
                if class_id == 0:  # pedestrian
                    pedestrians.append(xyxy)
                elif class_id in VEHICLE_CLASS_IDS:
                    vehicles.append({
                        'xyxy': xyxy,
                        'cx': cx,
                        'area': area,
                        'is_moving': is_moving,
                        'is_approaching': is_approaching
                    })
                elif class_id == 9:
                    traffic_light_detected = True

                # Add label (once per loop)
                labels.append(label_text)

            # Prune stale tracks
            stale_ids = [tid for tid, val in object_history.items() if (frame_idx - val[4]) > STALE_TRACK_FRAMES]
            for tid in stale_ids:
                del object_history[tid]

            
           # --- 5. DECIDE (Safety State) - NEW ESCALATION LOGIC ---
            
            # Get all potential obstacles
            obstacle_mask = np.isin(tracked_detections.class_id, TARGET_CLASS_IDS) & (tracked_detections.class_id != 9)
            obstacle_detections = tracked_detections[obstacle_mask]

            # --- Check Zones in Priority Order ---
            obstacles_in_danger_zone = danger_zone.trigger(detections=obstacle_detections)
            obstacles_in_caution_zone = caution_zone.trigger(detections=obstacle_detections)
            obstacles_in_awareness_zone = awareness_zone.trigger(detections=obstacle_detections)

            # --- Set Defaults ---
            current_state = "SAFE"
            status_message = "STATUS: SAFE"
            tts_message = None
            danger_context = None

            # --- Priority 1: DANGER ZONE (Red) ---
            if np.any(obstacles_in_danger_zone):
                # THIS IS THE KEY: Check if the *last* state was already a danger state
                if stable_state == "DANGER_PROBABLE" or stable_state == "DANGER_IMMINENT":
                    # It's a SUSTAINED danger. ESCALATE!
                    current_state = "DANGER_IMMINENT"
                    tts_message = "Collision Imminent!"
                    status_message = "DANGER: COLLISION IMMINENT!"
                else:
                    # It's a NEW danger. Set to PROBABLE (Caution).
                    current_state = "DANGER_PROBABLE"
                    tts_message = "Caution! Probable collision!"
                    status_message = "CAUTION: PROBABLE COLLISION"
                
                obs_coords = obstacle_detections[obstacles_in_danger_zone].xyxy[0]
                obs_cx, _, _ = box_center_and_area(obs_coords)
                danger_context = {'center_x': obs_cx}

            # --- Priority 2: CAUTION ZONE (Yellow) ---
            elif np.any(obstacles_in_caution_zone):
                # Treat Yellow Zone as "Probable" danger
                current_state = "DANGER_PROBABLE"
                tts_message = "Caution! Too close!"
                status_message = "CAUTION: TOO CLOSE!"
                
                obs_coords = obstacle_detections[obstacles_in_caution_zone].xyxy[0]
                obs_cx, _, _ = box_center_and_area(obs_coords)
                danger_context = {'center_x': obs_cx}

            # --- Priority 3: AWARENESS ZONE (Green) ---
            elif np.any(obstacles_in_awareness_zone):
                current_state = "CAUTION_NEARBY"
                tts_message = "Caution. Obstacle nearby."
                status_message = "CAUTION: OBSTACLE NEARBY"
                danger_context = None

            # --- Priority 4: Approaching Vehicle (Outside Zones) ---
            elif vehicles:
                caution_found = False
                for v in vehicles:
                    x_dist_from_center = abs(v['cx'] - (frame_width / 2.0))
                    if (v['is_approaching'] and v['is_moving'] and 
                        x_dist_from_center < (PATH_ALIGNMENT_THRESHOLD * 1.5)):
                        
                        current_state = "CAUTION_NEARBY" # Use the new state
                        tts_message = "Caution. Approaching vehicle."
                        status_message = "CAUTION: APPROACHING VEHICLE"
                        danger_context = {'center_x': v['cx']}
                        caution_found = True
                        break
                
                if not caution_found and traffic_light_detected:
                    current_state = "CAUTION_NEARBY" # Use the new state
                    tts_message = "Caution. Traffic light detected."
                    status_message = "CAUTION: TRAFFIC LIGHT"
            
            # Priority 5: All Clear
            else:
                current_state = "SAFE"
                status_message = "STATUS: SAFE"
                tts_message = None
                danger_context = None

            # --- 6. SPEAK (TTS + Directional Audio) ---
            current_time = time.time()
            if current_state == stable_state:
                stable_counter += 1
            else:
                stable_counter = 0
                stable_state = current_state

            # --- NEW STABILITY BYPASS (FIXED) ---
            is_stable_enough = (stable_counter >= STATE_STABLE_FRAMES)

            # BYPASS: Instantly alert ONLY for "Imminent" danger
            if current_state == "DANGER_IMMINENT": # <<< FIX 1: Only bypass for imminent
                is_stable_enough = True
            # --- END BYPASS ---

            # Check if we should speak
            if ( is_stable_enough
                 and current_state != last_announced_state
                 and (current_time - last_alert_time) > TTS_COOLDOWN
                 and not is_speaking
                 and tts_message is not None):
                
                # --- Trigger Audio ---
                # Play panned sound ONLY for "Imminent" danger
                if current_state == "DANGER_IMMINENT" and danger_context: # <<< FIX 2: Only beep for imminent
                    print(f"!!! DANGER: Panning audio to X={danger_context['center_x']} !!!")
                    play_panned_alert(danger_context['center_x'], frame_width)

                # --- Start non-blocking TTS ---
                is_speaking = True
                last_alert_time = current_time
                last_announced_state = current_state 
                print(f"[FRAME {frame_idx}] STATE CHANGE -> {current_state} | TTS: '{tts_message}'")
                threading.Thread(target=say_alert, args=(tts_message,), daemon=True).start()
                # --- End Audio ---

            # --- 7. ANNOTATE ---
            annotated_frame = box_annotator.annotate(scene=frame.copy(), detections=tracked_detections)
            annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=tracked_detections, labels=labels)

           # --- NEW: Draw All Proximity Zones ---
            if awareness_zone_annotator:
                annotated_frame = awareness_zone_annotator.annotate(scene=annotated_frame, label="NEARBY")
            if caution_zone_annotator:
                annotated_frame = caution_zone_annotator.annotate(scene=annotated_frame, label="TOO CLOSE")
            if danger_zone_annotator:
                annotated_frame = danger_zone_annotator.annotate(scene=annotated_frame, label="DANGER ZONE")
            # --- END NEW ---

            # --- NEW: Highlight boxes based on ZONE ---
            if current_state == "DANGER" or current_state == "CAUTION":
                # Get all obstacle boxes that are in the zones
                danger_boxes = obstacle_detections[obstacles_in_danger_zone].xyxy
                caution_boxes = obstacle_detections[obstacles_in_caution_zone].xyxy
                
                # Draw red boxes for danger
                for x1, y1, x2, y2 in danger_boxes:
                    cv2.rectangle(annotated_frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 3) # Red, thick
                
                # Draw yellow boxes for caution
                for x1, y1, x2, y2 in caution_boxes:
                    cv2.rectangle(annotated_frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,255,255), 2) # Yellow
            # --- END NEW ---

            # Status text
            status_color = (0, 0, 255) # Default Red for DANGER
            if current_state == "SAFE":
                status_color = (0, 255, 0) # Green
            elif current_state == "CAUTION_NEARBY" or current_state == "DANGER_PROBABLE":
                status_color = (0, 255, 255) # Yellow
            
            # Use the status_message from the DECIDE block
            cv2.putText(annotated_frame, status_message, (50, 50), cv2.FONT_HERSHEY_SIMPLEX,
                        1.2, status_color, 3, cv2.LINE_AA)

            # --- 8. DISPLAY & DEBUG OUTPUT ---
            cv2.imshow("YOLOv8 + Supervision Tracking (Improved)", annotated_frame)
            
            # Debug print (FIXED)
            if frame_idx % 30 == 0:
                print(f"Frame {frame_idx} | Peds: {len(pedestrians)} | Vehicles: {len(vehicles)} | State: {current_state}")

            if cv2.waitKey(1) & 0xFF == ord('q'):
                print("'q' key pressed. Exiting...")
                break

    except KeyboardInterrupt:
        print("Process interrupted by user.")
    finally:
        print("Cleaning up resources...")
        cap.release()
        cv2.destroyAllWindows()
        pygame.mixer.quit() # Quit pygame
        print("Script finished.")

pygame 2.6.1 (SDL 2.28.4, Python 3.11.9)
Hello from the pygame community. https://www.pygame.org/contribute.html
Initializing Audio (Pygame Mixer)...
Pygame Mixer ready. Loaded 'danger.mp3'.
Initializing Text-to-Speech engine...
TTS engine ready.
Loading YOLOv8 model...
Model loaded.
Initializing Supervision components...
Supervision components ready.
Starting improved video processing loop...
Defining tuned multi-layered proximity zones...
All 3 custom zones defined.
[FRAME 4] STATE CHANGE -> DANGER_PROBABLE | TTS: 'Caution! Too close!'
Frame 30 | Peds: 1 | Vehicles: 5 | State: DANGER_PROBABLE
Frame 60 | Peds: 1 | Vehicles: 6 | State: DANGER_PROBABLE
Frame 90 | Peds: 1 | Vehicles: 6 | State: DANGER_PROBABLE
Frame 120 | Peds: 0 | Vehicles: 5 | State: DANGER_PROBABLE
Frame 150 | Peds: 0 | Vehicles: 7 | State: DANGER_PROBABLE
Frame 180 | Peds: 0 | Vehicles: 7 | State: DANGER_PROBABLE
Frame 210 | Peds: 0 | Vehicles: 7 | State: DANGER_PROBABLE
Frame 240 | Peds: 0 | Vehicles: 5 | State: DAN