In [1]:
import cv2
import time
import os
from ultralytics import YOLO

In [2]:
class Config:
    '''
    Configuration settings for Object Detection Project
    '''
    MODEL_WEIGHTS = 'yolov8n.pt'
    CONFIDENCE_THRESHOLD = 0.5
    device = 'cpu'

    # Visual Settings
    BOX_COLOR = (0, 255, 0) # green
    TEXT_COLOR = (0,0,0) # bLack
    TEXT_BG_COLOR = (0, 255, 0)

class ObjectDetector:
    def __init__(self, model_path=None):
        '''
        Initialize the YOLO detector
        '''
        self.model_name = model_path if model_path else Config.MODEL_WEIGHTS
        print(f"Loading YOLO model: {self.model_name}")
        try:
            self.model = YOLO(self.model_name)
        except Exception as e:
            print(f"error loading model: {e}")
            raise
    def predict_frame(self, frame):
        '''
        Runs inference on a single frame.
        '''
        results = self.model(frame, conf=Config.CONFIDENCE_THRESHOLD, verbose = False)
        return results[0] # because we sent one frame
    def annotate_frame(self, frame, result):
        '''
        Draws bounding boxes and labels on the frame using OpenCV.
        '''
        annotated_frame = frame.copy()
        #Iterating through detections
        for box in result.boxes:
            # Getting box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0])

            # Get Confidence and class info
            conf = float(box.conf[0])
            cls = int(box.cls[0])
            label = f"{self.model.names[cls]} {conf:.2f}"
            # Draw bounding box
            cv2.rectangle(annotated_frame, (x1,y1), (x2,y2), Config.BOX_COLOR, 2)
            # Draw Label Background
            (w,h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
            cv2.rectangle(annotated_frame, (x1, y1 -20), (x1 + w, y1), Config.TEXT_BG_COLOR, -1 )
            # Draw Text
            cv2.putText(annotated_frame, label, (x1,y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, Config.TEXT_COLOR, 1)
        return annotated_frame
    def run_live_stream(self, source=0):
        '''
        Capture video from a webcam or video file and runs detection.
        '''
        cap = cv2.VideoCapture(source)
        if not cap.isOpened():
            print(f"Error: Could not open video source {source}")
            return
        print(f"Starting detection on source: {source}")
        print(f"press 'q' to exit")

        # Calculate FPS
        prev_frame_time = 0
        new_frame_time = 0

        while True:
            success, frame = cap.read()
            if not success:
                print('End of stream or failed to read frame')
                break
            # 1.Inference
            result = self.predict_frame(frame)
            # 2. Annotation
            display_frame = self.annotate_frame(frame, result)
            # 3. Calculate and display FPS
            new_frame_time = time.time()
            new_frame_time = time.time()
            fps = 1 / (new_frame_time - prev_frame_time)
            prev_frame_time = new_frame_time
            cv2.putText(display_frame, f"FPS: {int(fps)}", (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            # 4. Show Frame
            cv2.imshow("Object Detection System (YOLOv8)", display_frame)

            # Exit condition
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()

In [3]:
def train_custom_model(self, data_yaml_path, epochs=10):
    model = YOLO('yolov8n.pt')
    results = model.train(data=data_yaml_path, epochs=epochs, imgsz=640)
    print("Training complete. Best model saved as best.pt")
    return results

In [4]:
if __name__ == "__main__":
    # Initialize Detector
    detector = ObjectDetector()
    
    # MODE SELECTION
    # 1. Real-time Webcam Detection
    print("Launching Webcam...")
    detector.run_live_stream(source=0)
    
    # 2. To run on a video file, uncomment below:
    # detector.run_live_stream(source='path/to/your/video.mp4')
    
    # 3. To Train on Custom Data (requires a dataset.yaml), uncomment below:
    # detector.train_custom_model('dataset.yaml', epochs=50)

Loading YOLO model: yolov8n.pt
Launching Webcam...
Starting detection on source: 0
press 'q' to exit


In [5]:
def train_custom_model():
    """
    Trains a YOLOv8 model on a custom dataset.
    """
    print("Initializing Training Process...")

    # 1. Load a model
    # We start with 'yolov8n.pt' (Nano) because it's fast and small.
    # It already knows 80 basic objects, which helps it learn new ones faster (Transfer Learning).
    model = YOLO('yolov8n.pt') 

    # 2. Train the model
    # data: Path to'data.yaml' file 
    # epochs: How many times the model sees the data. 50-100 is standard for good results.
    # imgsz: Image size. 640 is standard.
    # batch: How many images to process at once. 
    try:
        print("Starting training... this may take a while depending on your computer speed.")
        results = model.train(
            data='datasets/data.yaml',  # CRITICAL: Point this to your config file
            epochs=50,                  # Try 50 for a real result, 5 just to test code
            imgsz=640,
            batch=8,                    # Reduce if you run out of memory
            name='my_custom_model'      # Name of the output folder
        )
        print("Training Complete!")
        print(f"Your new brain is saved at: runs/detect/my_custom_model/weights/best.pt")
        
    except Exception as e:
        print(f"Error during training: {e}")
        print("Make sure your 'data.yaml' path is correct and your images are labeled!")

if __name__ == "__main__":
    # Ensure you have the 'datasets' folder structure set up before running this!
    train_custom_model()

Initializing Training Process...
Starting training... this may take a while depending on your computer speed.
Ultralytics 8.3.240  Python-3.12.3 torch-2.7.1+cu118 CUDA:0 (Quadro P1000, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=datasets/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=my_custom_model2, nbs=64,