In [None]:
# This Python code converts a dataset in YOLO format into the COCO format. 
# The YOLO dataset contains images of bottles and the bounding box annotations in the 
# YOLO format. The COCO format is a widely used format for object detection datasets.

# The input and output directories are specified in the code. The categories for 
# the COCO dataset are also defined, with only one category for "bottle". A dictionary for the COCO dataset is initialized with empty values for "info", "licenses", "images", and "annotations".

# The code then loops through each image in the input directory. The dimensions 
# of the image are extracted and added to the COCO dataset as an "image" dictionary, 
# including the file name and an ID. The bounding box annotations for each image are 
# read from a text file with the same name as the image file, and the coordinates are 
# converted to the COCO format. The annotations are added to the COCO dataset as an 
# "annotation" dictionary, including an ID, image ID, category ID, bounding box coordinates,
# area, and an "iscrowd" flag.

# The COCO dataset is saved as a JSON file in the output directory.

  0: "crazing"
  1: "inclusion"
  2: "patches"
  3: "pitted_surface"
  4: "rolled-in"
  5: "scratches"


import json
import os
from PIL import Image

# Set the paths for the input and output directories
input_dir = '/path/to/yolo/dataset'
output_dir = '/path/to/coco/dataset'

# Define the categories for the COCO dataset
categories = [{"id": 0, "name": "bottle"}]

# Define the COCO dataset dictionary
coco_dataset = {
    "info": {},
    "licenses": [],
    "categories": categories,
    "images": [],
    "annotations": []
}

# Loop through the images in the input directory
for image_file in os.listdir(input_dir):
    
    # Load the image and get its dimensions
    image_path = os.path.join(input_dir, image_file)
    image = Image.open(image_path)
    width, height = image.size
    
    # Add the image to the COCO dataset
    image_dict = {
        "id": int(image_file.split('.')[0]),
        "width": width,
        "height": height,
        "file_name": image_file
    }
    coco_dataset["images"].append(image_dict)
    
    # Load the bounding box annotations for the image
    with open(os.path.join(input_dir, f'{image_file.split(".")[0]}.txt')) as f:
        annotations = f.readlines()
    
    # Loop through the annotations and add them to the COCO dataset
    for ann in annotations:
        x, y, w, h = map(float, ann.strip().split()[1:])
        x_min, y_min = int((x - w / 2) * width), int((y - h / 2) * height)
        x_max, y_max = int((x + w / 2) * width), int((y + h / 2) * height)
        ann_dict = {
            "id": len(coco_dataset["annotations"]),
            "image_id": int(image_file.split('.')[0]),
            "category_id": 0,
            "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],
            "area": (x_max - x_min) * (y_max - y_min),
            "iscrowd": 0
        }
        coco_dataset["annotations"].append(ann_dict)

# Save the COCO dataset to a JSON file
with open(os.path.join(output_dir, 'annotations.json'), 'w') as f:
    json.dump(coco_dataset, f)


In [2]:
import json
import os
from PIL import Image

# Set the paths for the input and output directories
input_dir = '/home/andrew/works/shpad_add/Detection-of-welding-seams/dataset_copy/train'
images_dir = os.path.join(input_dir, 'images')
labels_dir = os.path.join(input_dir, 'labels')
output_dir = '/home/andrew/works/shpad_add/Detection-of-welding-seams/coco_data/train'

# Define the categories for the COCO dataset
categories = [
    {"id": 0, "name": "crazing"},
    {"id": 1, "name": "inclusion"},
    {"id": 2, "name": "patches"},
    {"id": 3, "name": "pitted_surface"},
    {"id": 4, "name": "rolled-in"},
    {"id": 5, "name": "scratches"}
]

# Define the COCO dataset dictionary
coco_dataset = {
    "info": {},
    "licenses": [],
    "categories": categories,
    "images": [],
    "annotations": []
}

# Function to get category ID by name
def get_category_id(name):
    for category in categories:
        if category["name"] == name:
            return category["id"]
    return None

# Loop through the images in the input directory
for image_file in os.listdir(images_dir):
    if image_file.endswith('.jpg'):
        # Load the image and get its dimensions
        image_path = os.path.join(images_dir, image_file)
        image = Image.open(image_path)
        width, height = image.size

        # Add the image to the COCO dataset
        image_id = int(image_file.split('.')[0].split()[0])
        image_dict = {
            "id": image_id,
            "width": width,
            "height": height,
            "file_name": image_file
        }
        coco_dataset["images"].append(image_dict)

        # Load the bounding box annotations for the image
        label_file = f'{image_file.split(".")[0]}.txt'
        label_path = os.path.join(labels_dir, label_file)
        if os.path.exists(label_path):
            with open(label_path) as f:
                annotations = f.readlines()

            # Loop through the annotations and add them to the COCO dataset
            for ann in annotations:
                ann_parts = ann.strip().split()
                class_id = int(ann_parts[0])
                x, y, w, h = map(float, ann_parts[1:])
                x_min, y_min = int((x - w / 2) * width), int((y - h / 2) * height)
                x_max, y_max = int((x + w / 2) * width), int((y + h / 2) * height)
                ann_dict = {
                    "id": len(coco_dataset["annotations"]),
                    "image_id": image_id,
                    "category_id": class_id,
                    "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],
                    "area": (x_max - x_min) * (y_max - y_min),
                    "iscrowd": 0
                }
                coco_dataset["annotations"].append(ann_dict)

# Save the COCO dataset to a JSON file
os.makedirs(output_dir, exist_ok=True)
with open(os.path.join(output_dir, 'annotations.json'), 'w') as f:
    json.dump(coco_dataset, f)
