In [1]:
import json


annotations_files = [
    '/data/students/juan/train/grocery-item-segmentation-yolo/dataset_v4/annotations/instances_train.json',
    # '/data/students/juan/train/grocery-item-segmentation-yolo/dataset_v3/annotations/instances_val.json',
]

for file_idx, file_path in enumerate(annotations_files):
    # Load your annotations JSON file
    with open(file_path, 'r') as f:
        data = json.load(f)

    # Extract existing IDs and sort them
    existing_ids = sorted(set(item['category_id'] for item in data['annotations']))

    # Create a mapping from existing IDs to a continuous range starting from 1
    id_mapping = {old_id: new_id for new_id, old_id in enumerate(existing_ids, start=1)}

    # Update IDs in the annotations with the new mapping
    for item in data['annotations']:
        item['category_id'] = id_mapping[item['category_id']]

    # Save the corrected annotations back to a JSON file
    with open(f'/data/students/juan/train/grocery-item-segmentation-yolo/dataset_v4/annotations/corrected_annotations_{file_idx}.json', 'w') as f:
        json.dump(data, f, indent=4)

    print(f"IDs have been corrected and saved in 'corrected_annotations_{file_idx}.json'")


IDs have been corrected and saved in 'corrected_annotations_0.json'


In [2]:
from ultralytics.data.converter import convert_coco

convert_coco(labels_dir='/data/students/juan/train/grocery-item-segmentation-yolo/dataset_v4/annotations', use_segments=True, cls91to80=False)

Annotations /raid/students/juan/train/grocery-item-segmentation-yolo/dataset_v4/annotations/corrected_annotations_0.json: 100%|██████████| 122/122 [00:00<00:00, 9055.77it/s]

COCO data converted successfully.
Results saved to /raid/students/juan/train/grocery-item-segmentation-yolo/coco_converted





In [3]:
import os

def clip_segmentation_labels(label_dir):
    """
    Clips all segmentation points in YOLO label files to the range [0, 1].
    Args:
        label_dir (str): Path to the directory containing YOLO segmentation label files.
    """
    for label_file in os.listdir(label_dir):
        if label_file.endswith('.txt'):
            label_path = os.path.join(label_dir, label_file)
            with open(label_path, 'r') as file:
                lines = file.readlines()

            corrected_lines = []
            for line in lines:
                parts = line.strip().split()
                if len(parts) > 1:  # Ensure valid segmentation label format
                    class_id = parts[0]
                    points = list(map(float, parts[1:]))
                    
                    # Clip each coordinate to [0, 1]
                    clipped_points = [max(0, min(1, coord)) for coord in points]
                    corrected_line = f"{class_id} " + " ".join(map(str, clipped_points))
                    corrected_lines.append(corrected_line)

            # Overwrite the file with corrected data
            with open(label_path, 'w') as file:
                file.write("\n".join(corrected_lines))
    
    print("Segmentation labels clipped successfully.")

# Run the function
label_train_directory = "dataset_v3/labels/train/"
label_val_directory = "dataset_v3/labels/val/"

clip_segmentation_labels(label_train_directory)
clip_segmentation_labels(label_val_directory)


Segmentation labels clipped successfully.
Segmentation labels clipped successfully.


In [9]:
import os

dataset_v3_prefix = '/data/students/juan/train/grocery-item-segmentation-yolo/dataset_v3/'
train_image_file_path = os.path.join(dataset_v3_prefix, 'images', 'train')
train_label_file_path = os.path.join(dataset_v3_prefix, 'labels', 'train')

train_image_files = os.listdir(train_image_file_path)
train_label_files = os.listdir(train_label_file_path)

print(len(train_image_files))
print(len(train_label_files))

print(len(train_image_files) == len(train_label_files))

train_image_files = [img.split('.')[0] for img in train_image_files]
train_label_files = [label.split('.')[0] for label in train_label_files]

images_not_in_labels = []
for img in train_image_files:
    if img not in train_label_files:
        images_not_in_labels.append(img)

print(images_not_in_labels)


2460
2457
False
['240461', '090474', '040488']
