# Simple Classification

In [None]:
from ultralytics import YOLO
import cv2
import os

input_vid_location = "videos/sample_usa.mp4"
expt = "results3/"

# Output directory
os.makedirs(expt, exist_ok=True)

# Load YOLO model
model = YOLO('yolo11x.pt')

# Process video and save results
results = model.track(
    source=input_vid_location,
    save=True,
    project=expt,
    # name="vehicle_detection",
    classes=[2, 3, 5, 7],  # car, motorcycle, bus, truck
    conf=0.5,
    show=False,
    imgsz=640,
    stream=True
)

frame_count = 0
for r in results:
    frame_count += 1
    if frame_count % 500 == 0:
        print(f"Processed {frame_count} frames...")

# print(f"Complete! Total frames: {frame_count}")
print(f"Output video saved in: {expt}/vehicle_detection/")


video 1/1 (frame 1/836) c:\Users\Jaath\Documents\Py Projects\AI-Driven Surveillance Analysis\videos\sample_usa.mp4: 384x640 7 cars, 2 trucks, 113.8ms
video 1/1 (frame 2/836) c:\Users\Jaath\Documents\Py Projects\AI-Driven Surveillance Analysis\videos\sample_usa.mp4: 384x640 7 cars, 2 trucks, 51.3ms
video 1/1 (frame 3/836) c:\Users\Jaath\Documents\Py Projects\AI-Driven Surveillance Analysis\videos\sample_usa.mp4: 384x640 8 cars, 1 bus, 1 truck, 51.3ms
video 1/1 (frame 4/836) c:\Users\Jaath\Documents\Py Projects\AI-Driven Surveillance Analysis\videos\sample_usa.mp4: 384x640 8 cars, 2 trucks, 51.0ms
video 1/1 (frame 5/836) c:\Users\Jaath\Documents\Py Projects\AI-Driven Surveillance Analysis\videos\sample_usa.mp4: 384x640 8 cars, 2 trucks, 51.2ms
Processed 500 frames...
video 1/1 (frame 6/836) c:\Users\Jaath\Documents\Py Projects\AI-Driven Surveillance Analysis\videos\sample_usa.mp4: 384x640 8 cars, 2 trucks, 51.1ms
video 1/1 (frame 7/836) c:\Users\Jaath\Documents\Py Projects\AI-Driven Sur

LPR With Basic Color Seperation

In [5]:
from ultralytics import YOLO
import os
import cv2
import numpy as np
import easyocr
from collections import defaultdict
import time

def get_dominant_color(image):
    """Extract dominant color from image using better sampling"""
    h, w = image.shape[:2]
    
    # Sample from center regions to avoid shadows/reflections
    center_y, center_x = h // 2, w // 2
    sample_h, sample_w = h // 3, w // 3
    
    y1 = max(0, center_y - sample_h // 2)
    y2 = min(h, center_y + sample_h // 2)
    x1 = max(0, center_x - sample_w // 2)
    x2 = min(w, center_x + sample_w // 2)
    
    center_region = image[y1:y2, x1:x2]
    
    # Downsample for speed
    small = cv2.resize(center_region, (30, 30))
    data = small.reshape((-1, 3))
    data = np.float32(data)
    
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 5, 1.0)
    k = 3
    _, labels, centers = cv2.kmeans(data, k, None, criteria, 5, cv2.KMEANS_RANDOM_CENTERS)
    
    dominant_color = centers[np.argmax(np.bincount(labels.flatten()))]
    return dominant_color

def color_name(bgr):
    """Convert BGR to color name with improved logic"""
    b, g, r = bgr
    
    # Calculate brightness and saturation
    brightness = (r + g + b) / 3
    max_val = max(r, g, b)
    min_val = min(r, g, b)
    saturation = (max_val - min_val) / max_val if max_val > 0 else 0
    
    # White/Gray/Black based on brightness and saturation
    if brightness > 200 and saturation < 0.2:
        return "White"
    elif brightness < 50:
        return "Black"
    elif saturation < 0.3:
        if brightness > 150:
            return "Light Gray"
        elif brightness > 100:
            return "Gray"
        else:
            return "Dark Gray"
    
    # Color detection with more flexible thresholds
    r_dom = r > max(g, b) + 20
    g_dom = g > max(r, b) + 20  
    b_dom = b > max(r, g) + 20
    
    if r_dom and g_dom and saturation > 0.4:  # Yellow
        return "Yellow"
    elif r_dom and b_dom:  # Purple/Magenta
        return "Purple"
    elif g_dom and b_dom:  # Cyan
        return "Cyan"
    elif r_dom:  # Red spectrum
        if r > 120 and g < 80 and b < 80:
            return "Red"
        elif r > 100 and g > 60:
            return "Orange"
        else:
            return "Red"
    elif g_dom:  # Green spectrum
        return "Green"
    elif b_dom:  # Blue spectrum
        return "Blue"
    elif r > 100 and g > 70 and b < 60:  # Brown
        return "Brown"
    else:
        # Fallback based on dominant channel
        if max(r, g, b) == r:
            return "Red"
        elif max(r, g, b) == g:
            return "Green"
        else:
            return "Blue"

def assign_plate_to_vehicle(plate_bbox, vehicle_bboxes):
    """Assign license plate to closest vehicle based on overlap/proximity"""
    px1, py1, px2, py2 = plate_bbox
    plate_center_x = (px1 + px2) / 2
    plate_center_y = (py1 + py2) / 2
    
    best_vehicle = None
    best_score = float('inf')
    
    for vehicle_id, (vx1, vy1, vx2, vy2) in vehicle_bboxes.items():
        # Check if plate is inside vehicle bbox
        if vx1 <= plate_center_x <= vx2 and vy1 <= plate_center_y <= vy2:
            # Calculate distance to vehicle center
            vehicle_center_x = (vx1 + vx2) / 2
            vehicle_center_y = (vy1 + vy2) / 2
            distance = ((plate_center_x - vehicle_center_x) ** 2 + 
                       (plate_center_y - vehicle_center_y) ** 2) ** 0.5
            
            if distance < best_score:
                best_score = distance
                best_vehicle = vehicle_id
    
    return best_vehicle

# USER SETTINGS
input_vid_location = "videos/sam.mp4"
license_plate_model_path = "runs/detect/train2/weights/best.pt"  # rained model
expt = "results/10"
frame_limit = 100
skip_frames = 1  # Process every frame for better tracking

# SETUP
os.makedirs(expt, exist_ok=True)
vehicle_model = YOLO('yolo11x.pt')
plate_model = YOLO(license_plate_model_path)  # Your trained license plate detector
vehicle_model.fuse()

# Initialize OCR reader
reader = easyocr.Reader(['en'], gpu=True)

cap = cv2.VideoCapture(input_vid_location)
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(f"{expt}/vehicle_complete.mp4", fourcc, fps, (width, height))

license_plates = []
vehicle_cache = defaultdict(dict)
tracked_plates = set()
frame_count = 0
processed_frames = 0

start_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret or (frame_limit and processed_frames >= frame_limit):
        break
    
    frame_count += 1
    
    # Skip frames for performance
    if frame_count % skip_frames != 0:
        out.write(frame)
        continue
        
    processed_frames += 1
    
    # Detect vehicles
    vehicle_results = vehicle_model.track(frame, classes=[2, 3, 5, 7], conf=0.5, persist=True)
    
    # Detect license plates in the same frame
    plate_results = plate_model(frame, conf=0.3)
    
    # Store vehicle bounding boxes for plate assignment
    current_vehicles = {}
    
    if vehicle_results[0].boxes is not None:
        for box in vehicle_results[0].boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            class_id = int(box.cls[0])
            track_id = int(box.id[0]) if box.id is not None else -1
            
            if track_id != -1:
                current_vehicles[track_id] = (x1, y1, x2, y2)
                
                vehicle_type = vehicle_model.names[class_id]
                
                # Get or calculate vehicle color
                if track_id in vehicle_cache and 'color' in vehicle_cache[track_id]:
                    color = vehicle_cache[track_id]['color']
                else:
                    vehicle_crop = frame[y1:y2, x1:x2]
                    if vehicle_crop.size > 0:
                        dominant_color = get_dominant_color(vehicle_crop)
                        color = color_name(dominant_color)
                        vehicle_cache[track_id]['color'] = color
                    else:
                        color = "Unknown"
                
                # Get cached plate text if available
                plate_text = vehicle_cache[track_id].get('plate', '')
                
                # Draw vehicle box and label
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f"{color} {vehicle_type}{plate_text}", (x1, y1-10), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    
    # Process license plates and assign to vehicles
    if plate_results[0].boxes is not None:
        for plate_box in plate_results[0].boxes:
            px1, py1, px2, py2 = map(int, plate_box.xyxy[0])
            plate_conf = float(plate_box.conf[0])
            
            # Draw license plate bounding box
            cv2.rectangle(frame, (px1, py1), (px2, py2), (255, 0, 0), 2)
            
            # Assign plate to vehicle
            assigned_vehicle = assign_plate_to_vehicle((px1, py1, px2, py2), current_vehicles)
            
            if assigned_vehicle and 'plate' not in vehicle_cache[assigned_vehicle]:
                # Crop license plate
                plate_crop = frame[py1:py2, px1:px2]
                
                if plate_crop.size > 0:
                    try:
                        # Preprocess plate image
                        gray = cv2.cvtColor(plate_crop, cv2.COLOR_BGR2GRAY)
                        gray = cv2.equalizeHist(gray)
                        
                        # Resize for better OCR
                        h, w = gray.shape
                        if h < 100 or w < 200:
                            scale = max(200/w, 100/h)
                            gray = cv2.resize(gray, None, fx=scale, fy=scale)
                        
                        # OCR on license plate
                        ocr_results = reader.readtext(gray, 
                                                    allowlist='ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-',
                                                    width_ths=0.7,
                                                    height_ths=0.7)
                        
                        best_text = ""
                        best_conf = 0
                        
                        for (bbox, text, conf) in ocr_results:
                            text = ''.join(c for c in text if c.isalnum() or c == '-').upper()
                            
                            if conf > 0.5 and len(text) >= 3:
                                if conf > best_conf:
                                    best_conf = conf
                                    best_text = text
                        
                        if best_text and best_text not in tracked_plates:
                            license_plates.append(best_text)
                            tracked_plates.add(best_text)
                            plate_text = f" - {best_text}"
                            vehicle_cache[assigned_vehicle]['plate'] = plate_text
                            print(f"New plate detected: {best_text} (Vehicle ID: {assigned_vehicle})")
                            
                    except Exception as e:
                        print(f"OCR error: {e}")
    
    out.write(frame)
    
    if processed_frames % 50 == 0:
        elapsed = time.time() - start_time
        fps_current = processed_frames / elapsed
        print(f"Processed {processed_frames}/{frame_limit if frame_limit else '?'} frames "
              f"({fps_current:.2f} fps avg)")

cap.release()
out.release()

# Save results
with open(f"{expt}/license_plates.txt", "w") as f:
    f.write("Detected License Plates:\n")
    f.write("=" * 30 + "\n")
    for i, plate in enumerate(set(license_plates), 1):
        f.write(f"{i}. {plate}\n")
    
    f.write(f"\nTotal unique plates: {len(set(license_plates))}\n")
    f.write(f"Total detections: {len(license_plates)}\n")

total_time = time.time() - start_time
avg_fps = processed_frames / total_time

print(f"Complete! Video: {expt}/vehicle_complete.mp4")
print(f"Found {len(set(license_plates))} unique plates")
print(f"Plates saved: {expt}/license_plates.txt")
print(f"Processing time: {total_time:.2f}s ({avg_fps:.2f} fps average)")
print(f"Processed {processed_frames} frames out of {frame_count} total frames")

YOLO11x summary (fused): 190 layers, 56,919,424 parameters, 0 gradients, 194.9 GFLOPs

0: 384x640 11 cars, 3 trucks, 51.5ms
Speed: 2.2ms preprocess, 51.5ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 License_Plates, 8.2ms
Speed: 1.4ms preprocess, 8.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 3 trucks, 51.0ms
Speed: 2.1ms preprocess, 51.0ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 License_Plates, 6.8ms
Speed: 1.5ms preprocess, 6.8ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 3 trucks, 96.5ms
Speed: 2.4ms preprocess, 96.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 License_Plates, 8.8ms
Speed: 1.7ms preprocess, 8.8ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 cars, 3 trucks, 50.4ms
Speed: 1.7ms preprocess, 50.4ms inference, 2.2ms postprocess per image at s