This notebook translated the COCO 1.0 JSON file export from CVAT into YOLO .txt files after annotating images on the CVAT website.

In [1]:
# Let's start by loading the JSON file and examining its structure
import json

input_directory = '/Users/eric/Desktop/2-Career/Projects/ObjectDetection/coco_to_yolo'
# Load the JSON content
json_path = f'{input_directory}/61_200.json'
with open(json_path, 'r') as file:
    json_data = json.load(file)

# Let's check the keys at the top level of the JSON structure, and the first image with an annotation
first_annotation = json_data['annotations'][0]
first_image_with_annotations = int(first_annotation['image_id'])

json_data.keys()
for key in json_data.keys():
    if isinstance(json_data[key], list):
        print(json_data[key][0])
    else:
        print(json_data[key])

json_data['images'][first_image_with_annotations - 1], first_annotation



{'name': '', 'id': 0, 'url': ''}
{'contributor': '', 'date_created': '', 'description': '', 'url': '', 'version': '', 'year': ''}
{'id': 1, 'name': 'license plate', 'supercategory': ''}
{'id': 1, 'width': 3840, 'height': 2160, 'file_name': '100-00000.png', 'license': 0, 'flickr_url': '', 'coco_url': '', 'date_captured': 0}
{'id': 1, 'image_id': 19, 'category_id': 1, 'segmentation': [], 'area': 5352.394699999999, 'bbox': [171.13, 881.1, 82.97, 64.51], 'iscrowd': 0, 'attributes': {'occluded': False, 'rotation': 0.0}}


({'id': 19,
  'width': 3840,
  'height': 2160,
  'file_name': '101-00006.png',
  'license': 0,
  'flickr_url': '',
  'coco_url': '',
  'date_captured': 0},
 {'id': 1,
  'image_id': 19,
  'category_id': 1,
  'segmentation': [],
  'area': 5352.394699999999,
  'bbox': [171.13, 881.1, 82.97, 64.51],
  'iscrowd': 0,
  'attributes': {'occluded': False, 'rotation': 0.0}})

In [2]:
import os

# Define the directory for saving the .txt files
output_labels_dir = f'{input_directory}/labels'
output_images_dir = f'{input_directory}/images'
os.makedirs(output_labels_dir, exist_ok=True)
os.makedirs(output_images_dir, exist_ok=True)

def normalize_coordinates(points, img_width, img_height):
    """
    Normalize the coordinates by dividing by the image width and height.
    The points list is assumed to be in the format [x1, y1, x2, y2, ...].
    """
    normalized_points = []
    for i, point in enumerate(points):
        normalized_point = point / img_width if i % 2 == 0 else point / img_height
        normalized_point = round(normalized_point, 5)
        normalized_points.append(normalized_point)
    return normalized_points

def process_annotation(ann, img_width, img_height):
    """
    Process a single annotation, returning a formatted string
    containing the class (always 0) followed by the normalized segmentation points.
    If segmentation is not available, use normalized bounding box coordinates.
    """
    if 'segmentation' in ann and ann['segmentation']:
        # Use segmentation if available, and normalize the points
        points = [coord for segment in ann['segmentation'] for coord in segment]
        normalized_points = normalize_coordinates(points, img_width, img_height)
        return '0 ' + ' '.join(map(str, normalized_points))
    else:
        # Fall back to bounding box if segmentation is not available, and normalize
        bbox = ann['bbox']
        # Convert COCO bbox [x,y,width,height] format to [x1,y1,x2,y2] format
        x_tl, y_tl, width, height = bbox
        x_center, y_center = x_tl + width / 2, y_tl + height / 2
        box_points = [x_center, y_center, width, height]
        normalized_box_points = normalize_coordinates(box_points, img_width, img_height)
        return '0 ' + ' '.join(map(str, normalized_box_points))

def create_normalized_annotation_files(images, annotations):
    """
    Create a .txt file for each image with normalized annotations.
    """
    for img in images:
        img_width, img_height = img['width'], img['height']  # Assuming these are always 3840x2160 for 4k images
        # Create a .txt file for each image
        file_path = os.path.join(output_dir, f"{'.'.join(img['file_name'].split('.')[:-1])}.txt")
        with open(file_path, 'w') as file:
            # Write annotations related to the image
            for ann in annotations:
                if ann['image_id'] == img['id']:
                    file.write(process_annotation(ann, img_width, img_height) + '\n')

# Remove previous files to avoid confusion
for filename in sorted(os.listdir(output_dir)):
    if not os.path.isfile(filename):
        continue
    file_path = os.path.join(output_dir, filename)
    os.remove(file_path)

# Process the images and annotations from the JSON file with normalization
create_normalized_annotation_files(json_data['images'], json_data['annotations'])

# Provide the path to the directory containing the updated output files
output_dir


'labels_coco_to_yolo'

In [5]:
import os
images = sorted(os.listdir(output_images_dir))
labels = sorted(os.listdir(output_labels_dir))
images_set = set(images)
labels_set = set(labels)
len(images), len(labels), images[0], labels[0]

(3034, 3034, '100-00000.png', '100-00000.txt')

In [7]:
mutual_file_names = set()

for image in images:
    if image[:-4] + '.txt' not in labels:
        mutual_file_names.add(image)

for label in labels:
    if label[:-4] + '.png' not in images:
        mutual_file_names.add(label)

len(mutual_file_names), mutual_file_names

(0, set())

In [8]:
import os
import random
import shutil

images = sorted(os.listdir('images_coco_to_yolo'))
labels = sorted(os.listdir('labels_coco_to_yolo'))
for image, label in zip(images, labels):    
    # check if it is a file:
    if not os.path.isfile(os.path.join('images_coco_to_yolo', image)):
        continue
    if random.random() <= 0.8:
        # move and replace if it already exists
        shutil.move(os.path.join('images_coco_to_yolo', image), 'dataset/train/images')
        shutil.move(os.path.join('labels_coco_to_yolo', label), 'dataset/train/labels')
    else:
        shutil.move(os.path.join('images_coco_to_yolo', image), 'dataset/valid/images')
        shutil.move(os.path.join('labels_coco_to_yolo', label), 'dataset/valid/labels')



In [9]:
# See the proportion of images with no annotations (True Negatives)
for folder in ['train', 'valid']:
    image_dir = f'dataset/{folder}/images'
    image_filenames = os.listdir(image_dir)
    label_dir = f'dataset/{folder}/labels'
    label_filenames = os.listdir(label_dir)
    num_empty = 0
    for label_filename in label_filenames:
        with open(os.path.join(label_dir, label_filename), 'r') as file:
            contents = file.read()
            if not contents:
                num_empty += 1
    print(f'Percentage of True Negatives in {folder} set: {num_empty / len(label_filenames) * 100:.2f}%')

Percentage of True Negatives in train set: 30.76%
Percentage of True Negatives in valid set: 29.13%
