In [20]:
import gdown

file_id = "15hGDLhsx8bLgLcIRD5DhYt5iBxnjNF1M"
output_path = "data/wider_face.zip"  # Specify the output path and filename

# Download the file
gdown.download(f"https://drive.google.com/uc?id={file_id}", output_path, quiet=False)


Downloading...
From (original): https://drive.google.com/uc?id=15hGDLhsx8bLgLcIRD5DhYt5iBxnjNF1M
From (redirected): https://drive.google.com/uc?id=15hGDLhsx8bLgLcIRD5DhYt5iBxnjNF1M&confirm=t&uuid=69ec88d8-6769-446a-95e7-6f29034846ab
To: c:\Users\Hunter Rogers\Desktop\Personal\University of Utah\Deep Learning\DeepLearningProject-1\data\wider_face.zip
100%|██████████| 1.47G/1.47G [01:06<00:00, 22.2MB/s]


'data/wider_face.zip'

In [6]:
def parse_widerface_annotations(annotation_file):
    annotations = {}
    with open(annotation_file, 'r') as f:
        lines = f.readlines()
    i = 0
    while i < len(lines):
        image_path = lines[i].strip()  # First line should be the image path
        try:
            # Attempt to parse the number of faces, skip if it fails
            num_faces = int(lines[i + 1].strip())
            boxes = []
            for j in range(i + 2, i + 2 + num_faces):
                x, y, w, h = map(int, lines[j].strip().split()[:4])
                boxes.append([x, y, w, h])
            annotations[image_path] = boxes
            i += 2 + num_faces  # Move to the next image
        except ValueError:
            # Skip any malformed or unexpected lines
            print(f"Skipping line {i} due to parsing error: {lines[i]}")
            i += 1
    return annotations

# Usage with corrected file path

annotations = parse_widerface_annotations(r'data/wider_face_split/wider_face_split/wider_face_train_bbx_gt.txt')


Skipping line 10423 due to parsing error: 0 0 0 0 0 0 0 0 0 0 

Skipping line 86538 due to parsing error: 0 0 0 0 0 0 0 0 0 0 

Skipping line 133393 due to parsing error: 0 0 0 0 0 0 0 0 0 0 

Skipping line 145713 due to parsing error: 0 0 0 0 0 0 0 0 0 0 



In [8]:
import os
from PIL import Image

# Example: Load an image and its bounding boxes
image_dir = "data/wider_face/WIDER_train/images"
image_name = "0--Parade/0_Parade_marchingband_1_849.jpg"  # Example image path from annotations
image_path = os.path.join(image_dir, image_name)

# Load image and bounding boxes
image = Image.open(image_path)
boxes = annotations[image_name]  # Bounding boxes for this image

print("Image size:", image.size)
print("Bounding boxes:", boxes)


Image size: (1024, 1385)
Bounding boxes: [[449, 330, 122, 149]]


In [12]:
from PIL import Image, ImageDraw

def display_image_with_boxes(image_path, boxes):
    # Open the image
    image = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(image)
    
    # Draw each bounding box in yellow
    for box in boxes:
        x, y, w, h = box  # Coordinates for each bounding box
        draw.rectangle([x, y, x + w, y + h], outline="yellow", width=3)
    
    # Display the image
    image.show()

# Example usage
image_name = "6--Funeral/6_Funeral_Funeral_6_66.jpg"  # Replace with your example image
image_path = os.path.join(image_dir, image_name)

if image_name in annotations:
    boxes = annotations[image_name]
    display_image_with_boxes(image_path, boxes)
else:
    print(f"No annotations found for {image_name}")


In [None]:
import os
import shutil
from PIL import Image
import random

# Define paths
data_dir = 'data/wider_face/WIDER_train/images'  # Adjusted to the base images directory
label_file = 'data/wider_face_split/wider_face_split/wider_face_train_bbx_gt.txt'
organized_data_dir = 'OrganizedData'

# Set up directories
images_dir = os.path.join(organized_data_dir, 'images')
labels_dir = os.path.join(organized_data_dir, 'labels')
os.makedirs(images_dir, exist_ok=True)
os.makedirs(labels_dir, exist_ok=True)
for split in ['train', 'val', 'test']:
    os.makedirs(os.path.join(images_dir, split), exist_ok=True)
    os.makedirs(os.path.join(labels_dir, split), exist_ok=True)

# Build a dictionary to map image names to their full paths
image_path_map = {}
for root, _, files in os.walk(data_dir):
    for file in files:
        if file.endswith('.jpg'):
            image_path_map[file] = os.path.join(root, file)

# Read label data
with open(label_file, 'r') as f:
    lines = f.readlines()

# Process the labels
image_data = []
i = 0
while i < len(lines):
    image_name = lines[i].strip()
    if not image_name.endswith('.jpg'):
        print(f"Skipping invalid image name at line {i}: {image_name}")
        i += 1
        continue

    # Check if the next line contains a valid integer
    try:
        num_faces = int(lines[i + 1].strip())
    except ValueError:
        print(f"Skipping invalid entry at line {i + 1}: {lines[i + 1].strip()}")
        i += 2  # Skip to the next image entry
        continue
    
    boxes = []
    for j in range(num_faces):
        try:
            # Read bounding box data
            box_data = lines[i + 2 + j].strip()
            parts = list(map(int, box_data.split()))
            if len(parts) >= 4:
                x, y, w, h = parts[:4]
                boxes.append([x, y, w, h])
        except Exception as e:
            print(f"Error reading bounding box data at line {i + 2 + j}: {e}")

    image_data.append((image_name, boxes))
    i += 2 + num_faces

# Split data into train, val, test
random.shuffle(image_data)
train_split = int(len(image_data) * 0.7)
val_split = int(len(image_data) * 0.2)
train_data = image_data[:train_split]
val_data = image_data[train_split:train_split + val_split]
test_data = image_data[train_split + val_split:]

# Updated save_data function
def save_data(data, split):
    for image_name, boxes in data:
        # Look up the full image path
        src_image_path = image_path_map.get(os.path.basename(image_name))
        if not src_image_path:
            print(f"Image not found in directory: {image_name}")
            continue
        
        dst_image_path = os.path.join(images_dir, split, os.path.basename(image_name))

        # Copy the image
        try:
            shutil.copy2(src_image_path, dst_image_path)
            print(f"Copied image to {dst_image_path}")
        except Exception as e:
            print(f"Error copying image {src_image_path} to {dst_image_path}: {e}")
            continue
        
        # Open image to get its dimensions
        try:
            with Image.open(src_image_path) as img:
                img_width, img_height = img.size
        except Exception as e:
            print(f"Error opening image {src_image_path}: {e}")
            continue
        
        # Save the label in YOLO format
        label_path = os.path.join(labels_dir, split, os.path.basename(image_name).replace('.jpg', '.txt'))
        try:
            with open(label_path, 'w') as label_file:
                for box in boxes:
                    x, y, w, h = box
                    x_center = (x + w / 2) / img_width
                    y_center = (y + h / 2) / img_height
                    width = w / img_width
                    height = h / img_height
                    label_file.write(f"0 {x_center} {y_center} {width} {height}\n")
                print(f"Label saved to {label_path}")
        except Exception as e:
            print(f"Error writing label file {label_path}: {e}")

# Save each dataset split
save_data(train_data, 'train')
save_data(val_data, 'val')
save_data(test_data, 'test')

print("Data organization complete!")


In [6]:
import os

# Define paths for the images and labels directories
organized_data_dir = 'OrganizedData'
images_dir = os.path.join(organized_data_dir, 'images')
labels_dir = os.path.join(organized_data_dir, 'labels')

# Loop through each split and check for matching label files
splits = ['train', 'val', 'test']
for split in splits:
    print(f"\nChecking {split} split:")

    # Get lists of image and label file names without extensions
    image_files = set(f.split('.')[0] for f in os.listdir(os.path.join(images_dir, split)) if f.endswith('.jpg'))
    label_files = set(f.split('.')[0] for f in os.listdir(os.path.join(labels_dir, split)) if f.endswith('.txt'))

    # Find images without corresponding labels
    images_without_labels = image_files - label_files
    if images_without_labels:
        print(f"Images without labels: {len(images_without_labels)}")
        for img in images_without_labels:
            print(f"  {img}.jpg")
    else:
        print("All images have corresponding labels.")

    # Find labels without corresponding images
    labels_without_images = label_files - image_files
    if labels_without_images:
        print(f"Labels without images: {len(labels_without_images)}")
        for lbl in labels_without_images:
            print(f"  {lbl}.txt")
    else:
        print("All labels have corresponding images.")



Checking train split:
All images have corresponding labels.
All labels have corresponding images.

Checking val split:
All images have corresponding labels.
All labels have corresponding images.

Checking test split:
All images have corresponding labels.
All labels have corresponding images.


In [None]:
import os
from pathlib import Path

# Define paths
dataset_yaml = 'dataset.yaml'  # Path to the dataset YAML file
project_dir = 'runs/train'             # Directory where training runs are saved
model_name = 'yolov5s'                 # Select a model size (e.g., yolov5s, yolov5m, yolov5l, yolov5x)

# Run training
!python yolov5/train.py --img 640 --batch 16 --epochs 100 --data "C:\Users\Hunter Rogers\Desktop\Personal\University of Utah\Deep Learning\DeepLearningProject-1\dataset.yaml" --cfg "yolov5/models/yolov5s.yaml" --weights "yolov5s.pt" --project "runs/train" --name "face_detection" --cache




In [13]:
import os

# Print current working directory
print(os.getcwd())


c:\Users\Hunter Rogers\Desktop\Personal\University of Utah\Deep Learning\DeepLearningProject-1
