# Process all annotations

In [None]:
import os
import xml.etree.ElementTree as ET
import logging
import yaml
from importlib import reload

# Reload logging to ensure it works in Jupyter
reload(logging)

# Configure logging to print to the notebook and save to a file
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
                    handlers=[logging.FileHandler("path_to_processing_log.txt"), logging.StreamHandler()])

def read_class_names(class_names_path):
    class_mapping = {}
    with open(class_names_path, 'r') as file:
        for line in file:
            class_id, class_name = line.strip().split(': ')
            class_mapping[class_id.strip()] = class_name.strip()
    return class_mapping

def convert_bbox_to_yolo(bbox, img_width, img_height):
    xmin, ymin, xmax, ymax = bbox
    x_center = (xmin + xmax) / (2 * img_width)
    y_center = (ymin + ymax) / (2 * img_height)
    width = (xmax - xmin) / img_width
    height = (ymax - ymin) / img_height
    return [x_center, y_center, width, height]

def convert_annotation(annotation_path, class_mapping, output_dir):
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    output_lines = []
    for obj in root.iter('object'):
        class_id = obj.find('name').text
        if class_id not in class_mapping:
            continue
        cls_idx = list(class_mapping.keys()).index(class_id)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('xmax').text), float(xmlbox.find('ymax').text))
        bb = convert_bbox_to_yolo(b, int(root.find('size').find('width').text), int(root.find('size').find('height').text))
        output_lines.append(f"{cls_idx} {' '.join(map(str, bb))}")
    if output_lines:
        os.makedirs(output_dir, exist_ok=True)
        output_file_path = os.path.join(output_dir, os.path.splitext(os.path.basename(annotation_path))[0] + '.txt')
        with open(output_file_path, 'w') as file:
            file.write('\n'.join(output_lines))

def process_annotations(annotation_dir, output_dir, class_mapping, valid_subdirs=None, is_validation=False):
    # Revised to handle both validation and specific training subdirectories
    if is_validation:
        for filename in os.listdir(annotation_dir):
            if filename.endswith('.xml'):
                file_path = os.path.join(annotation_dir, filename)
                convert_annotation(file_path, class_mapping, output_dir)
    else:
        for subdir in valid_subdirs:
            subdir_path = os.path.join(annotation_dir, subdir)
            if os.path.exists(subdir_path):
                for filename in os.listdir(subdir_path):
                    if filename.endswith('.xml'):
                        file_path = os.path.join(subdir_path, filename)
                        convert_annotation(file_path, class_mapping, output_dir)
                    
def create_data_yaml(output_dir, class_mapping):
    data = {
        'train': os.path.join(output_dir, 'images', 'train'),
        'val': os.path.join(output_dir, 'images', 'val'),
        'nc': len(class_mapping),
        'names': list(class_mapping.values())
    }
    with open(os.path.join(output_dir, 'data.yaml'), 'w') as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False)
    logging.info("data.yaml created successfully.")

def main():
    dataset_dir = 'path_to_ILSVRC_2017_DET'
    yolo_output_dir = 'path_to_output_dir'
    class_names_path = 'path_to_object_detection.txt'
    valid_subdirs = [
        'ILSVRC2014_train_0000', 'ILSVRC2014_train_0001',
        'ILSVRC2014_train_0002', 'ILSVRC2014_train_0003', 'ILSVRC2014_train_0004',
        'ILSVRC2014_train_0005', 'ILSVRC2014_train_0006'
    ]

    # Process validation annotations
    val_annotation_dir = os.path.join(dataset_dir, 'Annotations', 'DET', 'val')
    val_output_labels_dir = os.path.join(yolo_output_dir, 'labels', 'val')
    process_annotations(val_annotation_dir, val_output_labels_dir, read_class_names(class_names_path), is_validation=True)

    # Process training annotations
    train_annotation_dir = os.path.join(dataset_dir, 'Annotations', 'DET', 'train')
    train_output_labels_dir = os.path.join(yolo_output_dir, 'labels', 'train')
    process_annotations(train_annotation_dir, train_output_labels_dir, read_class_names(class_names_path), valid_subdirs=valid_subdirs, is_validation=False)
    
    # Generate data.yaml
    create_data_yaml(yolo_output_dir, read_class_names(class_names_path))

    logging.info("Processing complete.")

if __name__ == "__main__":
    main()

In [None]:
import os

def count_files_in_directory(directory_path):
    file_count = 0
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.txt'):
                file_count += 1
    return file_count

# Specify the directory path where the resulting .txt files are stored
resulting_dir_path = 'path_to_new_labels/val'

# Count the files
total_txt_files = count_files_in_directory(resulting_dir_path)
print(f"Total number of .txt files in '{resulting_dir_path}': {total_txt_files}")


In [None]:
import os

def count_files_in_directory(directory_path):
    file_count = 0
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.txt'):
                file_count += 1
    return file_count

# Specify the directory path where the resulting .txt files are stored
resulting_dir_path = 'path_to_new_labels/train'

# Count the files
total_txt_files = count_files_in_directory(resulting_dir_path)
print(f"Total number of .txt files in '{resulting_dir_path}': {total_txt_files}")


# Copy over images

In [None]:
import os
import shutil

def copy_train_images_with_annotations(train_images_dir, annotations_dir, target_dir, valid_subdirs):
    os.makedirs(target_dir, exist_ok=True)
    for subdir in valid_subdirs:
        subdir_path = os.path.join(train_images_dir, subdir)
        if os.path.isdir(subdir_path):
            for image_file in os.listdir(subdir_path):
                base_name = os.path.splitext(image_file)[0]
                txt_file = base_name + '.txt'
                # Check if corresponding annotation exists
                if os.path.exists(os.path.join(annotations_dir, txt_file)):
                    source_path = os.path.join(subdir_path, image_file)
                    target_path = os.path.join(target_dir, image_file)
                    shutil.copy(source_path, target_path)

train_images_dir = 'path_to_old_train_images'
annotations_dir = 'path_to_new_labels/train'
target_dir = 'path_to_new_images/train'
valid_subdirs = [
    'ILSVRC2014_train_0000', 'ILSVRC2014_train_0001',
    'ILSVRC2014_train_0002', 'ILSVRC2014_train_0003', 'ILSVRC2014_train_0004',
    'ILSVRC2014_train_0005', 'ILSVRC2014_train_0006'
]

copy_train_images_with_annotations(train_images_dir, annotations_dir, target_dir, valid_subdirs)

In [None]:
import os
import shutil

def copy_val_images(val_images_dir, target_dir):
    os.makedirs(target_dir, exist_ok=True)
    for image_file in os.listdir(val_images_dir):
        source_path = os.path.join(val_images_dir, image_file)
        target_path = os.path.join(target_dir, image_file)
        shutil.copy(source_path, target_path)

val_images_dir = 'path_to_old_val_images'
target_dir = 'path_to_new_val_images'

copy_val_images(val_images_dir, target_dir)


In [None]:
import os

def count_files_in_directory(directory_path):
    file_count = 0
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.JPEG'):
                file_count += 1
    return file_count

# Specify the directory path where the resulting .txt files are stored
resulting_dir_path = 'path_to_new_images/train'

# Count the files
total_jpeg_files = count_files_in_directory(resulting_dir_path)
print(f"Total number of .jpeg files in '{resulting_dir_path}': {total_jpeg_files}")


In [None]:
import os

def count_files_in_directory(directory_path):
    file_count = 0
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.JPEG'):
                file_count += 1
    return file_count

# Specify the directory path where the resulting .txt files are stored
resulting_dir_path = 'path_to_new_images/val'

# Count the files
total_jpeg_files = count_files_in_directory(resulting_dir_path)
print(f"Total number of .jpeg files in '{resulting_dir_path}': {total_jpeg_files}")


# Plot sample annotations

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import os

def plot_image_with_annotations(image_path, annotation_path):
    # Load the image
    img = Image.open(image_path)
    fig, ax = plt.subplots(1)
    ax.imshow(img)
    
    # Load and plot the annotations
    with open(annotation_path) as f:
        for line in f.readlines():
            class_id, x_center, y_center, width, height = map(float, line.split())
            x_center, y_center, width, height = x_center * img.width, y_center * img.height, width * img.width, height * img.height
            rect = patches.Rectangle((x_center - width / 2, y_center - height / 2), width, height, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
    plt.show()

images_dir = 'path_to_new_images/train'
annotations_dir = 'path_to_new_labels/train'

# Assuming image filenames without extension are the same as annotation filenames
image_files = sorted([f for f in os.listdir(images_dir) if f.endswith('.JPEG')])[:3]  # Adjust extension if needed

for img_name in image_files:
    image_path = os.path.join(images_dir, img_name)
    annotation_path = os.path.join(annotations_dir, os.path.splitext(img_name)[0] + '.txt')
    plot_image_with_annotations(image_path, annotation_path)


In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import os

# Define class names file path
class_names_path = 'path_to_classes_for_object_detection.txt'

# Read class names from the file
def read_class_names(class_names_path):
    class_mapping = {}
    with open(class_names_path, 'r') as file:
        for idx, line in enumerate(file):
            _, class_name = line.strip().split(': ')
            class_mapping[idx] = class_name.strip()
    return class_mapping

class_mapping = read_class_names(class_names_path)

def plot_image_with_annotations(image_path, annotation_path, class_mapping):
    img = Image.open(image_path)
    fig, ax = plt.subplots(1)
    ax.imshow(img)

    with open(annotation_path) as f:
        for line in f.readlines():
            class_id, x_center, y_center, width, height = map(float, line.split())
            x_center, y_center, width, height = x_center * img.width, y_center * img.height, width * img.width, height * img.height
            rect = patches.Rectangle((x_center - width / 2, y_center - height / 2), width, height, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            # Display class name
            plt.text(x_center - width / 2, y_center - height / 2, class_mapping[int(class_id)], color='blue', fontsize=12)

    plt.show()

images_dir = 'path_to_new_images/train'
annotations_dir = 'path_to_new_labels/train'
image_files = sorted([f for f in os.listdir(images_dir) if f.endswith('.JPEG')])[:3]  

for img_name in image_files:
    image_path = os.path.join(images_dir, img_name)
    annotation_path = os.path.join(annotations_dir, os.path.splitext(img_name)[0] + '.txt')
    plot_image_with_annotations(image_path, annotation_path, class_mapping)
