# Download the images according class_names

In [None]:
from coco_dataset import coco_dataset_download as cocod

# List of class names you want to download
class_names = ['person']  
images_count = 5000  # Count of images for each class
annotations_path = 'annotations_trainval2017/annotations/instances_train2017.json'  # Path to COCO dataset annotations

# Loop through each class and download the images
for class_name in class_names:
    print(f"Downloading images for class: {class_name}")
    cocod.coco_dataset_download(class_name, images_count, annotations_path)

print("Download complete for all classes!")


# COCO ==> YOLO format

In [None]:
import os
import json

# Define the directories
downloaded_images_dir = 'dataset\\train\images'  # Directory where your downloaded images are stored
coco_annotation_file = 'annotations_trainval2017\\annotations\instances_train2017.json'  # Path to your COCO annotations file
yolo_output_dir = 'yolo_labels'  # Directory where YOLO labels will be saved
os.makedirs(yolo_output_dir, exist_ok=True)

# Categories of interest
categories_of_interest = ['person']

# Category mapping from COCO to YOLO (assumed in this case, adjust if needed)
category_mapping = {'person': 0}

# Load COCO annotations
with open(coco_annotation_file, 'r') as f:
    coco_data = json.load(f)

# Get category IDs for the classes of interest
category_ids = {cat['id']: cat['name'] for cat in coco_data['categories'] if cat['name'] in categories_of_interest}

# Get the list of downloaded images
downloaded_images = [img for img in os.listdir(downloaded_images_dir) if img.endswith('.jpg')]

# Process only the downloaded images
for img_file_name in downloaded_images:
    img_base_name = img_file_name.split('.')[0]
    
    # Find the image information in the COCO dataset
    img_info = next((img for img in coco_data['images'] if img['file_name'] == img_file_name), None)
    
    if img_info is None:
        print(f"Image {img_file_name} not found in COCO dataset.")
        continue
    
    img_id = img_info['id']
    img_width = img_info['width']
    img_height = img_info['height']

    # Get annotations for this image
    anns = [ann for ann in coco_data['annotations'] if ann['image_id'] == img_id and ann['category_id'] in category_ids]

    if not anns:
        print(f"No relevant annotations for {img_file_name}.")
        continue

    # Prepare YOLO label file
    yolo_label_file = os.path.join(yolo_output_dir, f"{img_base_name}.txt")
    
    # Open label file for writing YOLO annotations
    with open(yolo_label_file, 'w') as f:
        for ann in anns:
            # Get the bounding box in COCO format (x, y, width, height)
            x, y, width, height = ann['bbox']
            
            # Convert to YOLO format (normalized center x, center y, width, height)
            x_center = (x + width / 2) / img_width
            y_center = (y + height / 2) / img_height
            norm_width = width / img_width
            norm_height = height / img_height
            
            # Get the class ID based on category
            class_id = category_mapping[category_ids[ann['category_id']]]
            
            # Write YOLO format: class_id, x_center, y_center, width, height
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {norm_width:.6f} {norm_height:.6f}\n")

    print(f"Converted annotations for image: {img_file_name}")

print("COCO to YOLO conversion completed for downloaded images.")


# select 500 random images for validation dataset

In [None]:
import os
import random
import shutil

# Define the paths to the images and labels
images_dir = 'dataset\\train\images'  # Folder containing all images
labels_dir = 'dataset\\train\labels'  # Folder containing all labels

# Define the validation directories
val_images_dir = 'dataset\\val\images'  # Folder to store validation images
val_labels_dir = 'dataset\\val\labels'  # Folder to store validation labels

# Ensure the validation directories exist
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

# Number of images to select per category
num_images_per_category = 500

# Dictionary to store images by category
category_images = {}

# Step 1: Collect images by category based on the labels
for label_file in os.listdir(labels_dir):
    if label_file.endswith('.txt'):
        label_path = os.path.join(labels_dir, label_file)
        
        # Read the label file to determine the category (assuming YOLO format with class ID as the first entry)
        with open(label_path, 'r') as f:
            lines = f.readlines()
            for line in lines:
                class_id = int(line.split()[0])  # First element in YOLO label is class ID
                if class_id not in category_images:
                    category_images[class_id] = []
                # Store the corresponding image filename (assuming image and label filenames match)
                image_filename = label_file.replace('.txt', '.jpg')
                category_images[class_id].append(image_filename)
                break  # Consider only the first class ID for simplicity

# Step 2: Randomly select 500 images from each category and move them to validation folders
for class_id, images in category_images.items():
    if len(images) > num_images_per_category:
        selected_images = random.sample(images, num_images_per_category)
    else:
        selected_images = images  # Take all images if less than 500
    
    # Move selected images and their corresponding labels to validation folders
    for image_file in selected_images:
        image_src_path = os.path.join(images_dir, image_file)
        label_src_path = os.path.join(labels_dir, image_file.replace('.jpg', '.txt'))
        
        image_dst_path = os.path.join(val_images_dir, image_file)
        label_dst_path = os.path.join(val_labels_dir, image_file.replace('.jpg', '.txt'))
        
        # Move the image and label to the validation set
        shutil.move(image_src_path, image_dst_path)
        shutil.move(label_src_path, label_dst_path)
    
    print(f"Selected {len(selected_images)} images for category {class_id}")

print("Validation set creation completed.")
