Human Detection, Object Detection, Crowd Detection, Hazard Detection, Anomaly Detection, and Vehicle Detection Using YOLOv8

In [None]:
# Install necessary libraries
!pip install ultralytics
from typing import Dict, List

# Import necessary libraries
import torch
import cv2
import numpy as np
from ultralytics import YOLO

# YOLO model for comprehensive detection
class ComprehensiveDetection:
    def __init__(self):
        print("Initializing YOLO for comprehensive detection tasks...")
        # Load YOLOv8 model for object detection
        self.model = YOLO('yolov8x.pt')  # You can replace this with a smaller model if needed

    def detect_objects(self, frame: np.ndarray) -> Dict:
        results = self.model(frame)[0]
        return {
            'boxes': results.boxes.xyxy.cpu().numpy() if results.boxes is not None else np.array([]),
            'labels': results.names,
            'class_ids': results.boxes.cls.cpu().numpy() if results.boxes is not None else np.array([]),
            'confidence': results.boxes.conf.cpu().numpy() if results.boxes is not None else np.array([])
        }

# Function to display all detected objects with bounding boxes and labels
def display_objects(frame, detection_results):
    # Loop over all detected objects
    for i, box in enumerate(detection_results['boxes']):
        x1, y1, x2, y2 = box.astype(int)
        class_id = detection_results['class_ids'][i]
        label = detection_results['labels'][class_id]
        confidence = detection_results['confidence'][i]

        # Draw bounding boxes and labels for all detected objects
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green box for all objects
        cv2.putText(frame, f"{label}: {confidence:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

# Function to process real-time webcam stream
def process_real_time_webcam(detector: ComprehensiveDetection):
    # Open the webcam feed
    cap = cv2.VideoCapture(0)  # 0 for the default webcam

    if not cap.isOpened():
        print("Error: Could not open video stream.")
        return

    while True:
        ret, frame = cap.read()  # Capture frame-by-frame
        if not ret:
            print("Failed to grab frame")
            break

        # Detect objects
        detection_results = detector.detect_objects(frame)

        # Display all detected objects (no if-else filtering)
        display_objects(frame, detection_results)

        # Display the frame with bounding boxes and labels
        cv2.imshow("Webcam - Comprehensive Object Detection", frame)

        # Press 'q' to quit the real-time stream
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Instantiate the YOLO model and start real-time webcam stream
detector = ComprehensiveDetection()
process_real_time_webcam(detector)


Human Detection, Object Detection, Crowd Detection, Hazard Detection, Anomaly Detection, and Vehicle Detection Using YOLOv5

In [4]:
!git clone https://github.com/ultralytics/yolov5.git


Cloning into 'yolov5'...


In [5]:
%cd yolov5
!pip install -r requirements.txt


D:\Hadi e Learning\yolov5
Collecting gitpython>=3.1.30 (from -r requirements.txt (line 5))
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting numpy>=1.23.5 (from -r requirements.txt (line 7))
  Downloading numpy-2.1.2-cp310-cp310-win_amd64.whl.metadata (59 kB)
     -------------------------------------- 59.7/59.7 kB 315.3 kB/s eta 0:00:00
Collecting pillow>=10.3.0 (from -r requirements.txt (line 9))
  Downloading pillow-11.0.0-cp310-cp310-win_amd64.whl.metadata (9.3 kB)
Collecting requests>=2.32.2 (from -r requirements.txt (line 12))
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting thop>=0.1.1 (from -r requirements.txt (line 14))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting tqdm>=4.66.3 (from -r requirements.txt (line 17))
  Downloading tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 57.6/57.6 kB 1.0 MB/s eta 0:00:00
Collecting setuptools>=70.0.0 (fr

  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
djoser 2.1.0 requires djangorestframework-simplejwt<5.0.0,>=4.3.0, but you have djangorestframework-simplejwt 5.1.0 which is incompatible.
tensorflow-intel 2.13.0 requires numpy<=1.24.3,>=1.22, but you have numpy 1.26.4 which is incompatible.

[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import torch
import cv2
import numpy as np
from typing import Dict, List
# YOLOv5 model for comprehensive detection
class YOLOv5Detection:
    def __init__(self, model_name='yolov5x'):
        print(f"Initializing YOLOv5 ({model_name}) )
        # Load YOLOv5 model (yolov5s, yolov5m, yolov5l, yolov5x)
        self.model = torch.hub.load('ultralytics/yolov5', model_name, pretrained=True)

    def detect_objects(self, frame: np.ndarray) -> dict:
        # YOLOv5 inference
        results = self.model(frame)
        return results.pandas().xyxy[0]  

def display_objects(frame, detection_results):
    for _, row in detection_results.iterrows():
        x1, y1, x2, y2 = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        label = row['name']
        confidence = row['confidence']

        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Green box for all objects
        cv2.putText(frame, f"{label}: {confidence:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

def process_real_time_webcam(detector: YOLOv5Detection):
    # Open the webcam feed
    cap = cv2.VideoCapture(0)  

    if not cap.isOpened():
        print("Error: Could not open video stream.")
        return

    while True:
        ret, frame = cap.read() 
        if not ret:
            print("Failed to grab frame")
            break

        detection_results = detector.detect_objects(frame)
        display_objects(frame, detection_results)
        cv2.imshow("Webcam - YOLOv5 Object Detection", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Instantiate the YOLOv5 model (you can use 'yolov5s' for faster, lighter model or 'yolov5x' for more accuracy)
detector = YOLOv5Detection(model_name='yolov5x')
process_real_time_webcam(detector)


Initializing YOLOv5 (yolov5s) for comprehensive detection tasks...


Using cache found in C:\Users\aoons/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-24 Python-3.10.11 torch-2.0.0+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|█████████████████████████████████████████████████████████████████████████████| 14.1M/14.1M [00:03<00:00, 3.75MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


Motion Detection, Object detection and Human Detection Implemeted Using Yolov5

In [2]:
!pip install torch torchvision
!pip install ultralytics
!pip install supervision
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt

import io
import torch
import cv2
import numpy as np
import supervision as sv
import threading
from queue import Queue
import time
from IPython.display import display, clear_output
import PIL.Image
import ipywidgets as widgets
from threading import Thread, Event

class VideoStreamWidget:
    def __init__(self, src=0):
        self.capture = cv2.VideoCapture(src)
        self.status = True
        self.thread = Thread(target=self._update_frame, args=(), daemon=True)
        self.stopped = False
        self.q = Queue(maxsize=128)
        
    def start(self):
        self.thread.start()
        return self
    
    def _update_frame(self):
        while True:
            if self.stopped:
                break
            if not self.q.full():
                ret, frame = self.capture.read()
                if not ret:
                    self.stop()
                    break
                self.q.put(frame)
            else:
                time.sleep(0.001)  
    
    def read(self):
        return self.q.get()
    
    def stop(self):
        self.stopped = True
        if self.thread.is_alive():
            self.thread.join()
        self.capture.release()
        
    def __exit__(self, exc_type, exc_value, traceback):
        self.stop()

class RealTimeVisionAnalyzer:
    def __init__(self):
        # Initialize YOLO model
        self.model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)
        self.back_sub = cv2.createBackgroundSubtractorMOG2(history=500, 
                                                          varThreshold=16, 
                                                          detectShadows=True)
        
        self.enable_motion = True
        self.enable_detection = True
        self.enable_pose = True
        
        self.stop_stream = False
        self.stream_thread = None
        self.output_widget = None
        
    def process_frame(self, frame):
        processed_frame = frame.copy()
        
        if self.enable_motion:
            fg_mask = self.back_sub.apply(frame)
            _, thresh = cv2.threshold(fg_mask, 30, 255, cv2.THRESH_BINARY)
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, 
                                         cv2.CHAIN_APPROX_SIMPLE)
            
            for contour in contours:
                if cv2.contourArea(contour) > 500:
                    (x, y, w, h) = cv2.boundingRect(contour)
                    cv2.rectangle(processed_frame, (x, y), (x+w, y+h), 
                                (0, 255, 0), 2)
                    cv2.putText(processed_frame, 'Motion', (x, y-10),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        if self.enable_detection:
            # Object detection
            results = self.model(processed_frame)
            detections = results.pandas().xyxy[0]
            
            for idx, detection in detections.iterrows():
                x1, y1, x2, y2 = map(int, detection[['xmin', 'ymin', 'xmax', 'ymax']])
                label = f"{detection['name']} {detection['confidence']:.2f}"
                cv2.rectangle(processed_frame, (x1, y1), (x2, y2), 
                            (255, 0, 0), 2)
                cv2.putText(processed_frame, label, (x1, y1-10),
                          cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        
        return processed_frame
    
    def start_stream(self, source=0):
        """Start the video stream with processing"""
        self.stop_stream = False
        self.stream_thread = Thread(target=self._stream_worker, args=(source,))
        self.stream_thread.daemon = True
        self.stream_thread.start()
    
    def _stream_worker(self, source):
        """Worker thread for video streaming"""
        stream = VideoStreamWidget(source)
        stream.start()
        if self.output_widget is None:
            self.output_widget = widgets.Image(format='jpeg')
            display(self.output_widget)
        
        try:
            while not self.stop_stream:
                frame = stream.read()
                processed_frame = self.process_frame(frame)
                rgb_frame = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
                jpg_data = PIL.Image.fromarray(rgb_frame)
                img_byte_arr = io.BytesIO()
                jpg_data.save(img_byte_arr, format='JPEG')
                self.output_widget.value = img_byte_arr.getvalue()
                
        except Exception as e:
            print(f"Streaming error: {str(e)}")
        finally:
            stream.stop()
    
    def stop_streaming(self):
        """Stop the video stream"""
        self.stop_stream = True
        if self.stream_thread and self.stream_thread.is_alive():
            self.stream_thread.join()

def create_streaming_interface():
    analyzer = RealTimeVisionAnalyzer()
    source_dropdown = widgets.Dropdown(
        options=[('Webcam', 0), ('IP Camera', 'rtsp://your_camera_ip')],
        value=0,
        description='Source:'
    )
    
    motion_checkbox = widgets.Checkbox(
        value=True,
        description='Motion Detection'
    )
    
    detection_checkbox = widgets.Checkbox(
        value=True,
        description='Object Detection'
    )
    
    pose_checkbox = widgets.Checkbox(
        value=True,
        description='Pose Estimation'
    )
    
    start_button = widgets.Button(description='Start Stream')
    stop_button = widgets.Button(description='Stop Stream', disabled=True)
    
    def on_start_click(b):
        analyzer.enable_motion = motion_checkbox.value
        analyzer.enable_detection = detection_checkbox.value
        analyzer.enable_pose = pose_checkbox.value
        
        start_button.disabled = True
        stop_button.disabled = False
        source_dropdown.disabled = True
        
        analyzer.start_stream(source_dropdown.value)
    
    def on_stop_click(b):
        analyzer.stop_streaming()
        start_button.disabled = False
        stop_button.disabled = True
        source_dropdown.disabled = False
    
    start_button.on_click(on_start_click)
    stop_button.on_click(on_stop_click)
    
    # Display widgets
    controls = widgets.VBox([
        source_dropdown,
        widgets.HBox([motion_checkbox, detection_checkbox, pose_checkbox]),
        widgets.HBox([start_button, stop_button])
    ])
    
    display(controls)

if __name__ == "__main__":
    print("Starting Real-Time Video Analytics...")
    create_streaming_interface()




[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


D:\Hadi e Learning\yolov5\yolov5


Cloning into 'yolov5'...

[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Starting Real-Time Video Analytics...


Using cache found in C:\Users\aoons/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-24 Python-3.10.11 torch-2.0.0+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|█████████████████████████████████████████████████████████████████████████████| 14.1M/14.1M [00:01<00:00, 11.1MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


VBox(children=(Dropdown(description='Source:', options=(('Webcam', 0), ('IP Camera', 'rtsp://your_camera_ip'))…

Image(value=b'', format='jpeg')

In [3]:
!pip install torch torchvision
!pip install ultralytics
!pip install supervision
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt

import torch
import cv2
import numpy as np
import supervision as sv
import threading
from queue import Queue
import time
from IPython.display import display, clear_output
import PIL.Image
import ipywidgets as widgets
from threading import Thread, Event
import io  
import os
import sys
from datetime import datetime

class VideoStreamWidget:
    def __init__(self, src=0):
        self.capture = cv2.VideoCapture(src)
        self.status = True
        self.thread = Thread(target=self._update_frame, args=(), daemon=True)
        self.stopped = False
        self.q = Queue(maxsize=128)
        
    def start(self):
        self.thread.start()
        return self
    
    def _update_frame(self):
        while True:
            if self.stopped:
                break
            if not self.q.full():
                ret, frame = self.capture.read()
                if not ret:
                    self.stop()
                    break
                self.q.put(frame)
            else:
                time.sleep(0.001)  
    
    def read(self):
        return self.q.get()
    
    def stop(self):
        self.stopped = True
        if self.thread.is_alive():
            self.thread.join()
        self.capture.release()
        
    def __exit__(self, exc_type, exc_value, traceback):
        self.stop()

class RealTimeVisionAnalyzer:
    def __init__(self):
        self.model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)
        self.back_sub = cv2.createBackgroundSubtractorMOG2(history=500, 
                                                          varThreshold=16, 
                                                          detectShadows=True)
        
        self.enable_motion = True
        self.enable_detection = True
        self.enable_pose = True
        
        self.stop_stream = False
        self.stream_thread = None
        self.output_widget = None
        
        self.output_dir = 'output_frames'
        os.makedirs(self.output_dir, exist_ok=True)
        
    def process_frame(self, frame):
        """Process frame with all enabled analytics"""
        processed_frame = frame.copy()
        
        if self.enable_motion:
            fg_mask = self.back_sub.apply(frame)
            _, thresh = cv2.threshold(fg_mask, 30, 255, cv2.THRESH_BINARY)
            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, 
                                         cv2.CHAIN_APPROX_SIMPLE)
            
            for contour in contours:
                if cv2.contourArea(contour) > 500:
                    (x, y, w, h) = cv2.boundingRect(contour)
                    cv2.rectangle(processed_frame, (x, y), (x+w, y+h), 
                                (0, 255, 0), 2)
                    cv2.putText(processed_frame, 'Motion', (x, y-10),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        if self.enable_detection:
            results = self.model(processed_frame)
            detections = results.pandas().xyxy[0]
            
            for idx, detection in detections.iterrows():
                x1, y1, x2, y2 = map(int, detection[['xmin', 'ymin', 'xmax', 'ymax']])
                label = f"{detection['name']} {detection['confidence']:.2f}"
                cv2.rectangle(processed_frame, (x1, y1), (x2, y2), 
                            (255, 0, 0), 2)
                cv2.putText(processed_frame, label, (x1, y1-10),
                          cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
        
        return processed_frame
    
    def start_stream(self, source=0):
        """Start the video stream with processing"""
        self.stop_stream = False
        self.stream_thread = Thread(target=self._stream_worker, args=(source,))
        self.stream_thread.daemon = True
        self.stream_thread.start()
    
    def _stream_worker(self, source):
        """Worker thread for video streaming"""
        # Initialize video stream
        stream = VideoStreamWidget(source)
        stream.start()
        
        if self.output_widget is None:
            self.output_widget = widgets.Image(format='jpeg', width='100%')
            display(self.output_widget)
        
        try:
            frame_count = 0
            while not self.stop_stream:
                frame = stream.read()
                
                processed_frame = self.process_frame(frame)
                
                rgb_frame = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
                
                jpg_data = PIL.Image.fromarray(rgb_frame)
                img_byte_arr = io.BytesIO()
                jpg_data.save(img_byte_arr, format='JPEG')
                
                self.output_widget.value = img_byte_arr.getvalue()
                
                frame_count += 1
                if frame_count % 30 == 0: 
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    save_path = os.path.join(self.output_dir, f'frame_{timestamp}.jpg')
                    cv2.imwrite(save_path, processed_frame)
                
        except Exception as e:
            print(f"Streaming error: {str(e)}")
        finally:
            stream.stop()
    
    def stop_streaming(self):
        """Stop the video stream"""
        self.stop_stream = True
        if self.stream_thread and self.stream_thread.is_alive():
            self.stream_thread.join()

def create_streaming_interface():
    analyzer = RealTimeVisionAnalyzer()
    source_dropdown = widgets.Dropdown(
        options=[('Webcam', 0), ('IP Camera', 'rtsp://your_camera_ip')],
        value=0,
        description='Source:'
    )
    
    motion_checkbox = widgets.Checkbox(
        value=True,
        description='Motion Detection'
    )
    
    detection_checkbox = widgets.Checkbox(
        value=True,
        description='Object Detection'
    )
    
    pose_checkbox = widgets.Checkbox(
        value=True,
        description='Pose Estimation'
    )
    
    start_button = widgets.Button(description='Start Stream')
    stop_button = widgets.Button(description='Stop Stream', disabled=True)
    
    status_label = widgets.Label(value='Status: Ready')
    
    def on_start_click(b):
        try:
            analyzer.enable_motion = motion_checkbox.value
            analyzer.enable_detection = detection_checkbox.value
            analyzer.enable_pose = pose_checkbox.value
            
            start_button.disabled = True
            stop_button.disabled = False
            source_dropdown.disabled = True
            
            status_label.value = 'Status: Starting stream...'
            analyzer.start_stream(source_dropdown.value)
            status_label.value = 'Status: Streaming'
        except Exception as e:
            status_label.value = f'Status: Error - {str(e)}'
            start_button.disabled = False
            stop_button.disabled = True
            source_dropdown.disabled = False
    
    def on_stop_click(b):
        try:
            analyzer.stop_streaming()
            start_button.disabled = False
            stop_button.disabled = True
            source_dropdown.disabled = False
            status_label.value = 'Status: Ready'
        except Exception as e:
            status_label.value = f'Status: Error - {str(e)}'
    
    start_button.on_click(on_start_click)
    stop_button.on_click(on_stop_click)
    
    # Display widgets
    controls = widgets.VBox([
        source_dropdown,
        widgets.HBox([motion_checkbox, detection_checkbox, pose_checkbox]),
        widgets.HBox([start_button, stop_button]),
        status_label
    ])
    
    display(controls)

if __name__ == "__main__":
    print("Starting Real-Time Video Analytics...")
    create_streaming_interface()




[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


D:\Hadi e Learning\yolov5\yolov5\yolov5


Cloning into 'yolov5'...


Starting Real-Time Video Analytics...



[notice] A new release of pip is available: 23.3.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip
Using cache found in C:\Users\aoons/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-24 Python-3.10.11 torch-2.0.0+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|█████████████████████████████████████████████████████████████████████████████| 14.1M/14.1M [00:01<00:00, 10.2MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


VBox(children=(Dropdown(description='Source:', options=(('Webcam', 0), ('IP Camera', 'rtsp://your_camera_ip'))…

Image(value=b'', format='jpeg', width='100%')

In [14]:
import cv2
import mediapipe as mp
import torch
import numpy as np
from torchvision import models
import torchvision.transforms as transforms

# Load YOLO model for action detection (here, YOLOv5 as an example)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # Pretrained YOLO model

# Load MediaPipe Pose for pose detection
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

# MediaPipe drawing utils to draw landmarks
mp_drawing = mp.solutions.drawing_utils

# Set up video capture for real-time video streaming
cap = cv2.VideoCapture(0)  # 0 is the webcam device number

# Define preprocessing transformations for YOLO input
transform = transforms.Compose([
    transforms.ToTensor()
])

def detect_action(image):
    """Run YOLO model on the input image for action detection."""
    results = model(image)  # Detect objects (actions) in the image
    return results

def draw_pose(image, landmarks):
    """Draw pose landmarks on the frame."""
    for landmark in landmarks:
        # Draw landmarks and connections on the frame for better visualization
        mp_drawing.draw_landmarks(image, landmark, mp_pose.POSE_CONNECTIONS,
                                  mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
                                  mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2))
    return image

# Run the real-time video stream loop
while True:
    ret, frame = cap.read()  # Read frame from video stream
    if not ret:
        break

    # YOLO model runs on the current frame
    # Convert BGR to RGB since OpenCV uses BGR and YOLO uses RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = detect_action(rgb_frame)  # Detect actions with YOLO

    # Draw bounding boxes for YOLO detections
    for bbox in results.xyxy[0]:
        x1, y1, x2, y2, conf, cls = bbox
        if conf > 0.5:  # Filter detections with a confidence threshold
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            label = f"{model.names[int(cls)]}: {conf:.2f}"
            cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # MediaPipe Pose estimation runs on the current frame
    rgb_frame.flags.writeable = False  # Pose estimation uses an immutable frame
    results_pose = pose.process(rgb_frame)  # Detect human poses

    # Draw pose landmarks on the frame if any poses are detected
    if results_pose.pose_landmarks:
        frame = draw_pose(frame, [results_pose.pose_landmarks])

        # Annotate key points (like hands, feet, head) to better understand the pose
        height, width, _ = frame.shape
        for idx, landmark in enumerate(results_pose.pose_landmarks.landmark):
            cx, cy = int(landmark.x * width), int(landmark.y * height)
            cv2.putText(frame, f"{mp_pose.PoseLandmark(idx).name}", (cx, cy), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 1)

    # Display the frame with both YOLO detections and pose landmarks
    cv2.imshow('Real-time Action Detection & Pose Estimation', frame)

    # Press 'q' to break the loop and exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


Using cache found in C:\Users\aoons/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-24 Python-3.10.11 torch-2.0.0+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [16]:
import cv2
import torch
import numpy as np
from torchvision import models
import torchvision.transforms as transforms

# Load YOLO model for hazard detection (specific use case: fire, weapons, etc.)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # Pretrained YOLO model

# Load segmentation model (Mask R-CNN)
segmentation_model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
segmentation_model.eval()

# Set up video capture for real-time video streaming
cap = cv2.VideoCapture(0)  # 0 is the webcam device number

# Define preprocessing transformations for segmentation model input
transform = transforms.Compose([
    transforms.ToTensor()
])

def segment_objects(image):
    """Run Mask R-CNN model on the input image for object segmentation."""
    with torch.no_grad():
        image_tensor = transform(image).unsqueeze(0)
        predictions = segmentation_model(image_tensor)[0]
    return predictions

def hazard_detection(results):
    """Detect hazards based on YOLO model's output."""
    hazards = []
    for bbox in results.xyxy[0]:
        x1, y1, x2, y2, conf, cls = bbox
        label = model.names[int(cls)]
        if label in ['fire', 'knife', 'weapon']:  # Example hazard classes
            hazards.append((label, conf, (int(x1), int(y1), int(x2), int(y2))))
    return hazards

def anomaly_detection(prev_frame, curr_frame):
    """Detect anomalies based on motion detection or unusual patterns."""
    # Calculate absolute difference between frames for motion detection
    diff = cv2.absdiff(prev_frame, curr_frame)
    gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
    blur_diff = cv2.GaussianBlur(gray_diff, (5, 5), 0)
    _, thresh = cv2.threshold(blur_diff, 20, 255, cv2.THRESH_BINARY)
    
    # Count the number of non-zero pixels (movement)
    movement = np.sum(thresh)
    if movement < 1000:  # Low movement can indicate anomaly (e.g., something is stuck)
        return True
    return False

# Initialize previous frame for anomaly detection
ret, prev_frame = cap.read()

# Run the real-time video stream loop
while True:
    ret, frame = cap.read()  # Read frame from video stream
    if not ret:
        break

    # Convert BGR to RGB since YOLO uses RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Hazard Detection using YOLO model
    results = model(rgb_frame)
    hazards = hazard_detection(results)
    for hazard in hazards:
        label, conf, (x1, y1, x2, y2) = hazard
        cv2.putText(frame, f"HAZARD: {label} ({conf:.2f})", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Red box for hazard

    # Object Segmentation with Mask R-CNN
    segmentation_results = segment_objects(frame)
    if segmentation_results:
        masks = segmentation_results['masks'].numpy()
        for i in range(masks.shape[0]):
            mask = masks[i][0]
            # Resize the mask to match the frame size and overlay it on the frame
            mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]))
            mask = mask > 0.5  # Threshold the mask
            frame[mask] = [0, 255, 0]  # Highlight the segmented objects with a distinct color

    # Anomaly Detection (based on movement or lack thereof)
    if anomaly_detection(prev_frame, frame):
        cv2.putText(frame, "Anomaly Detected!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Update the previous frame for the next iteration
    prev_frame = frame.copy()

    # Display the frame with segmentation, hazard, and anomaly indicators
    cv2.imshow('Real-time Segmentation, Hazard & Anomaly Detection', frame)

    # Press 'q' to break the loop and exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


Using cache found in C:\Users\aoons/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-24 Python-3.10.11 torch-2.0.0+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [19]:
import cv2
import torch
import numpy as np
from torchvision import models
import torchvision.transforms as transforms

# Load YOLO model for hazard detection (specific use case: fire, smoke, unsafe conditions)
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')  # Pretrained YOLO model

# Set up video capture for real-time video streaming
cap = cv2.VideoCapture(0)  # 0 is the webcam device number

# Define preprocessing transformations for any segmentation or additional models (if needed)
transform = transforms.Compose([
    transforms.ToTensor()
])

def hazard_detection(results):
    """Detect hazards such as fire, smoke, and unsafe conditions based on YOLO model's output."""
    hazards = []
    for bbox in results.xyxy[0]:
        x1, y1, x2, y2, conf, cls = bbox
        label = model.names[int(cls)]
        # Detect hazards like fire, smoke, unsafe objects
        if label in ['fire', 'smoke', 'fire extinguisher', 'danger sign', 'explosive']:  # Example hazard classes
            hazards.append((label, conf, (int(x1), int(y1), int(x2), int(y2))))
    return hazards

def anomaly_detection(prev_frame, curr_frame):
    """Detect anomalies based on sudden motion or unusual patterns."""
    # Calculate absolute difference between frames for motion detection
    diff = cv2.absdiff(prev_frame, curr_frame)
    gray_diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
    blur_diff = cv2.GaussianBlur(gray_diff, (5, 5), 0)
    _, thresh = cv2.threshold(blur_diff, 20, 255, cv2.THRESH_BINARY)
    
    # Count the number of non-zero pixels (movement)
    movement = np.sum(thresh)
    # Low movement or sudden spikes in movement indicate anomalies (e.g., person falling, unexpected events)
    if movement < 1000:  # Low movement can indicate anomaly (e.g., something is stuck)
        return True
    return False

# Initialize previous frame for anomaly detection
ret, prev_frame = cap.read()

# Run the real-time video stream loop
while True:
    ret, frame = cap.read()  # Read frame from video stream
    if not ret:
        break

    # Convert BGR to RGB since YOLO uses RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Hazard Detection using YOLO model
    results = model(rgb_frame)
    hazards = hazard_detection(results)
    for hazard in hazards:
        label, conf, (x1, y1, x2, y2) = hazard
        # Draw red boxes around detected hazards
        cv2.putText(frame, f"HAZARD: {label} ({conf:.2f})", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Red box for hazard

    # Anomaly Detection (based on movement or lack thereof)
    if anomaly_detection(prev_frame, frame):
        cv2.putText(frame, "Anomaly Detected!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Update the previous frame for the next iteration
    prev_frame = frame.copy()

    # Display the frame with hazard and anomaly indicators
    cv2.imshow('Real-time Hazard & Anomaly Detection', frame)

    # Press 'q' to break the loop and exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


Using cache found in C:\Users\aoons/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-24 Python-3.10.11 torch-2.0.0+cpu CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients
Adding AutoShape... 


In [2]:
import cv2
import torch
import numpy as np

# Load YOLO model for hazard detection
model = torch.hub.load('ultralytics/yolov5', 'yolov5x')  # Pretrained YOLO model
model.conf = 0.25  # Lower the confidence threshold if needed

# Hazard classes based on the available model output
hazard_classes = ['knife', 'fire hydrant']  # Knife and fire hydrant only

def hazard_detection(results):
    """Detect hazards based on YOLO model's output."""
    hazards = []
    for bbox in results.xyxy[0]:
        x1, y1, x2, y2, conf, cls = bbox
        label = model.names[int(cls)]
        # Only treat 'knife' and 'fire hydrant' as hazards
        if label in hazard_classes:
            hazards.append((label, conf, (int(x1), int(y1), int(x2), int(y2))))
    return hazards

# Set up video capture for real-time video streaming
cap = cv2.VideoCapture(0)  # 0 is the webcam device number

# Initialize previous frame for anomaly detection
ret, prev_frame = cap.read()

# Run the real-time video stream loop
while True:
    ret, frame = cap.read()  # Read frame from video stream
    if not ret:
        break

    # Convert BGR to RGB since YOLO uses RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Hazard Detection using YOLO model
    results = model(rgb_frame)
    hazards = hazard_detection(results)
    for hazard in hazards:
        label, conf, (x1, y1, x2, y2) = hazard
        # Draw red boxes around detected hazards
        cv2.putText(frame, f"HAZARD: {label} ({conf:.2f})", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)  # Red box for hazard

    # Update the previous frame for the next iteration
    prev_frame = frame.copy()

    # Display the frame with hazard indicators
    cv2.imshow('Real-time Hazard Detection', frame)

    # Press 'q' to break the loop and exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


Using cache found in C:\Users\aoons/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-10-24 Python-3.10.11 torch-2.0.0+cpu CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5x.pt to yolov5x.pt...
100%|███████████████████████████████████████████████████████████████████████████████| 166M/166M [00:27<00:00, 6.27MB/s]

Fusing layers... 
YOLOv5x summary: 444 layers, 86705005 parameters, 0 gradients
Adding AutoShape... 
