In [3]:

import json
import os
from PIL import Image

# Set the paths for the input and output directories
input_dir = '/media/Datacenter_storage/Ji/valdo_dataset/valdo_t2s_cmbOnly_GAN/images/train'
output_dir = '/media/Datacenter_storage/Ji/valdo_dataset/valdo_t2s_cmbOnly_GAN/annotations'

# Define the categories for the COCO dataset
categories = [{"id": 0, "name": "cmb"}]

# Define the COCO dataset dictionary
coco_dataset = {
    "info": {},
    "licenses": [],
    "categories": categories,
    "images": [],
    "annotations": []
}

# Loop through the images in the input directory
for image_file in os.listdir(input_dir):
    
    # Load the image and get its dimensions
    image_path = os.path.join(input_dir, image_file)
    image = Image.open(image_path)
    width, height = image.size
    
    # Add the image to the COCO dataset
    image_dict = {
        "id": int(image_file.split('.')[0]),
        "width": width,
        "height": height,
        "file_name": image_file
    }
    coco_dataset["images"].append(image_dict)
    
    # Load the bounding box annotations for the image
    with open(os.path.join(input_dir, f'{image_file.split(".")[0]}.txt')) as f:
        annotations = f.readlines()
    
    # Loop through the annotations and add them to the COCO dataset
    for ann in annotations:
        x, y, w, h = map(float, ann.strip().split()[1:])
        x_min, y_min = int((x - w / 2) * width), int((y - h / 2) * height)
        x_max, y_max = int((x + w / 2) * width), int((y + h / 2) * height)
        ann_dict = {
            "id": len(coco_dataset["annotations"]),
            "image_id": int(image_file.split('.')[0]),
            "category_id": 0,
            "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],
            "area": (x_max - x_min) * (y_max - y_min),
            "iscrowd": 0
        }
        coco_dataset["annotations"].append(ann_dict)

# Save the COCO dataset to a JSON file
with open(os.path.join(output_dir, 'annotations.json'), 'w') as f:
    json.dump(coco_dataset, f)

ValueError: invalid literal for int() with base 10: 'sub-228_slice_091'

In [None]:
import json
import os
from PIL import Image
from datetime import datetime

def yolo_to_coco(yolo_dir, output_file, class_names_file=None):
    """
    Convert YOLO format dataset to COCO format
    
    Args:
        yolo_dir: Directory containing YOLO format files (images and labels)
        output_file: Output COCO JSON file path
        class_names_file: Optional file containing class names (one per line)
    """
    
    # Initialize COCO format structure
    coco_format = {
        "info": {
            "description": "YOLO to COCO conversion",
            "version": "1.0",
            "year": datetime.now().year,
            "contributor": "YOLO to COCO Converter",
            "date_created": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        },
        "licenses": [
            {
                "id": 1,
                "name": "Unknown",
                "url": ""
            }
        ],
        "categories": [],
        "images": [],
        "annotations": []
    }
    
    # Load class names
    class_names = []
    if class_names_file and os.path.exists(class_names_file):
        with open(class_names_file, 'r') as f:
            class_names = [line.strip() for line in f.readlines()]
    else:
        # If no class names file, we'll collect unique class IDs from labels
        print("No class names file provided. Will use generic names.")
    
    # Find all image files
    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif'}
    image_files = []
    
    for file in os.listdir(yolo_dir):
        if any(file.lower().endswith(ext) for ext in image_extensions):
            image_files.append(file)
    
    image_id = 1
    annotation_id = 1
    category_ids = set()
    
    for image_file in image_files:
        image_path = os.path.join(yolo_dir, image_file)
        
        # Get image dimensions
        try:
            with Image.open(image_path) as img:
                width, height = img.size
        except Exception as e:
            print(f"Error reading image {image_file}: {e}")
            continue
        
        # Add image info
        image_info = {
            "id": image_id,
            "width": width,
            "height": height,
            "file_name": image_file,
            "license": 1,
            "flickr_url": "",
            "coco_url": "",
            "date_captured": ""
        }
        coco_format["images"].append(image_info)
        
        # Look for corresponding label file
        label_file = os.path.splitext(image_file)[0] + '.txt'
        label_path = os.path.join(yolo_dir, label_file)
        
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                lines = f.readlines()
            
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                
                parts = line.split()
                if len(parts) < 5:
                    continue
                
                class_id = int(parts[0])
                x_center = float(parts[1])
                y_center = float(parts[2])
                bbox_width = float(parts[3])
                bbox_height = float(parts[4])
                
                # Convert YOLO format (normalized) to COCO format (absolute)
                x_center_abs = x_center * width
                y_center_abs = y_center * height
                bbox_width_abs = bbox_width * width
                bbox_height_abs = bbox_height * height
                
                # COCO bbox format: [x_min, y_min, width, height]
                x_min = x_center_abs - bbox_width_abs / 2
                y_min = y_center_abs - bbox_height_abs / 2
                
                # Add annotation
                annotation = {
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": class_id + 1,  # COCO categories start from 1
                    "bbox": [x_min, y_min, bbox_width_abs, bbox_height_abs],
                    "area": bbox_width_abs * bbox_height_abs,
                    "iscrowd": 0,
                    "segmentation": []
                }
                coco_format["annotations"].append(annotation)
                
                category_ids.add(class_id)
                annotation_id += 1
        
        image_id += 1
    
    # Create categories
    for cat_id in sorted(category_ids):
        if cat_id < len(class_names):
            category_name = class_names[cat_id]
        else:
            category_name = f"class_{cat_id}"
        
        category = {
            "id": cat_id + 1,  # COCO categories start from 1
            "name": category_name,
            "supercategory": ""
        }
        coco_format["categories"].append(category)
    
    # Save COCO format JSON
    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=2)
    
    print(f"Conversion complete!")
    print(f"Total images: {len(coco_format['images'])}")
    print(f"Total annotations: {len(coco_format['annotations'])}")
    print(f"Total categories: {len(coco_format['categories'])}")
    print(f"COCO format saved to: {output_file}")

def create_class_names_file(yolo_dir, output_file):
    """
    Helper function to create a class names file from YOLO labels
    """
    class_ids = set()
    
    for file in os.listdir(yolo_dir):
        if file.endswith('.txt'):
            file_path = os.path.join(yolo_dir, file)
            try:
                with open(file_path, 'r') as f:
                    lines = f.readlines()
                
                for line in lines:
                    line = line.strip()
                    if line:
                        parts = line.split()
                        if len(parts) >= 5 and parts[0].isdigit():
                            class_ids.add(int(parts[0]))
            except:
                continue
    
    # Create generic class names
    with open(output_file, 'w') as f:
        for class_id in sorted(class_ids):
            f.write(f"class_{class_id}\n")
    
    print(f"Created class names file with {len(class_ids)} classes: {output_file}")

# Example usage
if __name__ == "__main__":
    # Configuration
    # yolo_directory = "path/to/your/yolo/dataset"  # Directory with images and .txt label files
    # coco_output_file = "annotations.json"        # Output COCO JSON file
    yolo_directory = '/media/Datacenter_storage/Ji/valdo_dataset/valdo_t2s_cmbOnly_GAN/images/train'
    coco_output_file = '/media/Datacenter_storage/Ji/valdo_dataset/valdo_t2s_cmbOnly_GAN/annotations.json'
    class_names_file = "classes.txt"             # Optional: file with class names
    
    # If you don't have a class names file, create one
    # create_class_names_file(yolo_directory, class_names_file)
    
    # Convert YOLO to COCO
    yolo_to_coco(yolo_directory, coco_output_file, class_names_file)

No class names file provided. Will use generic names.


IsADirectoryError: [Errno 21] Is a directory: '/media/Datacenter_storage/Ji/valdo_dataset/valdo_t2s_cmbOnly_GAN/annotations'