
<h1 align=center><font size = 5>CAPSTONE PROJECT</font></h1>
<h2 align=center><font size = 5>AIML Certification Programme</font></h2>



## Student Name and ID:
Mention your name and ID if done individually<br>
If done as a group,clearly mention the contribution from each group member qualitatively and as a precentage.<br>
1. KUNA MURALI (ID: 2024AIML030)                          

2. MADIRE MAHESHKUMAR (ID: 2024AIML079)

3. V VIJAY KUMAR (ID: 2024AIML100)

4. GADIGA MOUNESWAR BABU (ID: 2024AIML095)


## Helmet Violation Detection from Indian CCTV Video

**Problem statement:**
    Detect and flag two-wheeler helmet violations (helmetless riding) from traffic camera frames in Indian cities in real-time.

**Description:**
Create a computer vision system using YOLOv8 and object tracking to detect two-wheeler riders and classify helmet usage. Optionally perform license plate OCR for enforcement.

**Dataset:**

    •	Indian Helmet Detection Dataset
    
    •	Research-generated dataset of Indian two-wheeler violations (images+video with annotations for helmet & plate) 

   

## Setup

Import libraries:

In [None]:
!pip install opencv-python==4.9.0.80
!pip install matplotlib==3.8.4
!pip install numpy==1.26.4
!pip install pillow==10.3.0
!pip install pandas==2.2.2
!pip install seaborn==0.13.2
!pip install scikit-learn==1.4.2
!pip install torch==2.3.0
!pip install notebook==7.2.0
!pip install albumentations==1.4.8
!pip install albucore==0.0.16
!pip install ultralytics==8.0.134
!pip install --upgrade ultralytics torch

In [None]:
import sys
import os
import random
import shutil
import hashlib
import warnings
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
warnings.filterwarnings("ignore")
from PIL import Image, ImageDraw, ImageEnhance, ImageFilter
import albumentations as A
import glob
import pandas as pd
import seaborn as sns
from itertools import combinations
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

scripts_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'scripts'))
sys.path.append(scripts_dir)
for entry in os.listdir(scripts_dir):
    entry_path = os.path.join(scripts_dir, entry)
    if os.path.isdir(entry_path):
        sys.path.append(entry_path)

from utils import show_images_grid, split_and_copy_dataset, show_random_images_grid
from flip import HorizontalFlip
from zoom import DynamicZoomer
from mosaic import MosaicAugmentor
from cutout import CutoutAugmentor
from synthetic import SyntheticImageAugmentor
from edgedetect import EdgeDetectAugmentor
from cutmix import CutMixAugmentor
from rotate import RotateAugmentor
from shadow import ShadowCastingAugmentor
from grayscale import GrayscaleAugmentor
from noise import NoiseInjectionAugmentor

## Dataset Splitting and Copying for Train/Validation Sets
This facilitate splitting image datasets into training and validation sets and organizing them into proper directory structures. These functions support working with datasets having corresponding label files in YOLO format.

**split_and_copy_dataset**

This function performs a straightforward split of a dataset into training and validation subsets based on a specified ratio, then copies images and corresponding label files to destination folders.

**Workflow**:

Deletes any existing data in destination folders for a clean start.

Reads all images in source folder, shuffles them randomly.

Splits shuffled images into training and validation sets per split_ratio.

Copies images and associated label .txt files to train/val folders accordingly.

Prints the count of images copied to each subset.

**split_and_copy_all_processed**

This function extends the above with support for multiple augmentation subfolders inside a processed root directory. It samples a fraction of images from each augmentation folder, then splits and copies them similarly.

**Workflow**:

Iterates over augmentation folders in processed_root.

For each augmentation, samples a fraction (sample_ratio) of images randomly.

Splits sampled images into train and val sets per split_ratio.

Copies sampled, split images and labels to destination folders under model/train/<aug_type> and model/val/<aug_type>.

Removes train- prefix in folder names before copy.

Prints processed folder and counts per split.

Cleans and recreates destination directories before copying.

In [None]:
def split_and_copy_dataset(
    src_img_dir,
    src_lbl_dir,
    dst_train_img_dir,
    dst_train_lbl_dir,
    dst_val_img_dir,
    dst_val_lbl_dir,
    split_ratio=0.9
):
    """
    Splits images and labels into train/val sets and copies them to destination folders.
    """
    if os.path.exists(dst_train_img_dir):
        shutil.rmtree(dst_train_img_dir)
    if os.path.exists(dst_train_lbl_dir):
        shutil.rmtree(dst_train_lbl_dir)
    if os.path.exists(dst_val_img_dir):
        shutil.rmtree(dst_val_img_dir)
    if os.path.exists(dst_val_lbl_dir):
        shutil.rmtree(dst_val_lbl_dir)
    os.makedirs(dst_train_img_dir, exist_ok=True)
    os.makedirs(dst_train_lbl_dir, exist_ok=True)
    os.makedirs(dst_val_img_dir, exist_ok=True)
    os.makedirs(dst_val_lbl_dir, exist_ok=True)

    img_files = [f for f in os.listdir(src_img_dir) if f.lower().endswith(('.jpg', '.png'))]
    random.shuffle(img_files)
    split_idx = int(len(img_files) * split_ratio)
    train_files = img_files[:split_idx]
    val_files = img_files[split_idx:]

    def copy_files(file_list, img_dst, lbl_dst):
        for img_file in file_list:
            img_src_path = os.path.join(src_img_dir, img_file)
            lbl_src_path = os.path.join(src_lbl_dir, os.path.splitext(img_file)[0] + '.txt')
            shutil.copy2(img_src_path, os.path.join(img_dst, img_file))
            if os.path.exists(lbl_src_path):
                shutil.copy2(lbl_src_path, os.path.join(lbl_dst, os.path.splitext(img_file)[0] + '.txt'))

    copy_files(train_files, dst_train_img_dir, dst_train_lbl_dir)
    copy_files(val_files, dst_val_img_dir, dst_val_lbl_dir)
    print(f"Copied {len(train_files)} images to train and {len(val_files)} images to val folders.")

def split_and_copy_all_processed(processed_root, dst_model_root, split_ratio=0.9, sample_ratio=0.2):
    """
    Dynamically go through all augmentation folders in processed, randomly select sample_ratio of images,
    then split and copy to model/train and model/val. Removes 'train-' prefix from destination folder names.
    """
    for aug_type in os.listdir(processed_root):
        aug_img_dir = os.path.join(processed_root, aug_type, 'images')
        aug_lbl_dir = os.path.join(processed_root, aug_type, 'labels')
        if not (os.path.isdir(aug_img_dir) and os.path.isdir(aug_lbl_dir)):
            continue
        dst_folder = aug_type.replace('train-', '')
        dst_train_img = os.path.join(dst_model_root, 'train', dst_folder, 'images')
        dst_train_lbl = os.path.join(dst_model_root, 'train', dst_folder, 'labels')
        dst_val_img = os.path.join(dst_model_root, 'val', dst_folder, 'images')
        dst_val_lbl = os.path.join(dst_model_root, 'val', dst_folder, 'labels')

        # Select a random sample of images
        img_files = [f for f in os.listdir(aug_img_dir) if f.lower().endswith(('.jpg', '.png'))]
        sample_size = max(1, int(len(img_files) * sample_ratio))
        sampled_files = random.sample(img_files, sample_size)

        # Split sampled files into train/val
        random.shuffle(sampled_files)
        split_idx = int(len(sampled_files) * split_ratio)
        train_files = sampled_files[:split_idx]
        val_files = sampled_files[split_idx:]

        def copy_files(file_list, img_dst, lbl_dst):
            # Delete folders if they exist, then create again
            if os.path.exists(img_dst):
                shutil.rmtree(img_dst)
            if os.path.exists(lbl_dst):
                shutil.rmtree(lbl_dst)
            os.makedirs(img_dst, exist_ok=True)
            os.makedirs(lbl_dst, exist_ok=True)
            for img_file in file_list:
                img_src_path = os.path.join(aug_img_dir, img_file)
                lbl_src_path = os.path.join(aug_lbl_dir, os.path.splitext(img_file)[0] + '.txt')
                shutil.copy2(img_src_path, os.path.join(img_dst, img_file))
                if os.path.exists(lbl_src_path):
                    shutil.copy2(lbl_src_path, os.path.join(lbl_dst, os.path.splitext(img_file)[0] + '.txt'))

        copy_files(train_files, dst_train_img, dst_train_lbl)
        copy_files(val_files, dst_val_img, dst_val_lbl)
        print(f"Processed augmentation: {dst_folder} | Train: {len(train_files)} | Val: {len(val_files)}")

# Example usage:
split_and_copy_dataset(
    src_img_dir='../data/raw/train/images',
    src_lbl_dir='../data/raw/train/labels',
    dst_train_img_dir='../data/model/train/raw/images',
    dst_train_lbl_dir='../data/model/train/raw/labels',
    dst_val_img_dir='../data/model/val/raw/images',
    dst_val_lbl_dir='../data/model/val/raw/labels',
    split_ratio=0.9)

# Example usage:
split_and_copy_all_processed('../data/processed', '../data/model', split_ratio=0.9, sample_ratio=0.2)


## Visualizing Object Detection Results on Test Images
This code snippet demonstrates how to visualize YOLOv8 model predictions alongside ground truth annotations on a sample of test images.

**Workflow**
1.  **Load Pretrained Model**
The YOLOv8 model is loaded from saved weights located in the project directory (best.pt).

2. **Prepare Test Data Paths**
Paths to test images and their corresponding label files (in YOLO format) are specified.

3. **Random Sampling of Test Images**
A configurable number (num_images) of test images are randomly selected to display.

4. **Color Coding for Bounding Boxes**
A predefined set of RGB colors assigns unique colors to each object class for easy distinction during visualization.

**Benefits**
Allows side-by-side comparison of model output with ground truth to assess detection quality visually.

Color-coded bounding boxes enable easy distinction between different object classes.

Random sampling helps to get an unbiased view of model performance over the test set.

**Usage**
Modify num_images to control how many test images are visualized in each run. Ensure test image and label directories are correctly set to the dataset locations.

In [None]:
import random
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import os
from ultralytics import YOLO

# Load model
model = YOLO('../runs/train/motorbike_yolov8s/weights/best.pt')

# Path to test images and label files
test_img_dir = '../data/raw/test/images'
test_label_dir = '../data/raw/test/labels'

# Configure number of images to display
num_images = 10

img_files = [f for f in os.listdir(test_img_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
num_images = min(num_images, len(img_files))  # Avoid exceeding available images
selected_files = random.sample(img_files, num_images)

colors = {0: (255, 0, 0), 1: (0, 255, 0), 2: (0, 255, 255), 3: (255, 165, 0), 4: (0, 0, 255)}

def draw_boxes(image_path, boxes, class_ids=None):
    img = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(img)
    for i, box in enumerate(boxes):
        color = colors[class_ids[i]] if class_ids is not None else (255, 0, 0)
        draw.rectangle(box, outline=color, width=2)
    return img

def read_label_file(label_file, image_path):
    boxes = []
    class_ids = []
    with open(label_file, 'r') as f:
        for line in f:
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center, y_center, w, h = map(float, parts[1:])
            img_w, img_h = Image.open(image_path).size
            x1 = (x_center - w/2) * img_w
            y1 = (y_center - h/2) * img_h
            x2 = (x_center + w/2) * img_w
            y2 = (y_center + h/2) * img_h
            boxes.append((x1, y1, x2, y2))
            class_ids.append(class_id)
    return boxes, class_ids

fig, axes = plt.subplots(num_images, 3, figsize=(15, num_images*5))

for i, img_file in enumerate(selected_files):
    img_path = os.path.join(test_img_dir, img_file)
    label_path = os.path.join(test_label_dir, img_file.rsplit('.', 1)[0] + '.txt')

    # Original Image
    orig_img = Image.open(img_path).convert("RGB")
    axes[i, 0].imshow(orig_img)
    axes[i, 0].set_title("Original Image")
    axes[i, 0].axis('off')

    # Ground Truth
    gt_boxes, gt_classes = read_label_file(label_path, img_path)
    img_gt = draw_boxes(img_path, gt_boxes, class_ids=gt_classes)
    axes[i, 1].imshow(img_gt)
    axes[i, 1].set_title("Ground Truth")
    axes[i, 1].axis('off')

    # Model Prediction
    results = model(img_path)
    boxes_pred = results[0].boxes.xyxy.cpu().numpy()
    class_ids_pred = results[0].boxes.cls.cpu().numpy().astype(int)
    img_pred = draw_boxes(img_path, boxes_pred, class_ids=class_ids_pred)
    axes[i, 2].imshow(img_pred)
    axes[i, 2].set_title("Model Prediction")
    axes[i, 2].axis('off')

plt.tight_layout()
plt.show()
