In [14]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import kwcoco
import os
from pathlib import Path
import shutil
import numpy as np
from tqdm import tqdm

def coco2yolo(coco:kwcoco.coco_dataset.CocoDataset, output_dir=".", dataset_name:str=None):
  # 前準備
  if dataset_name is None or not isinstance(dataset_name, str):
    dataset_name = str(Path(coco.fpath).name.split(".")[0])

  base_save_dir = Path(output_dir) / Path(dataset_name)
  save_images_dir = base_save_dir / Path("images")
  save_labels_dir = base_save_dir / Path("labels")

  save_images_dir.mkdir(parents=True, exist_ok=True)
  save_labels_dir.mkdir(parents=True, exist_ok=True)

  print(str(base_save_dir))

  # カテゴリーを書き出す
  with open(f'{base_save_dir}/categories.txt', mode = "w") as f:
    for i in range(1, max(list(coco.cats.keys()))+1):
      try:
        f.write(f"{i}: {coco.cats[i]['name']}\n")
      except:
        f.write(f"{i}:\n")

  # YOLOv5学習用configを書き出す
  with open(f'{base_save_dir}/{dataset_name}.yaml', mode = "w") as f:
    f.write(f"path:  ../__datasets__/train_dataset/\n")
    f.write(f"train: {Path(dataset_name)}/images\n")
    f.write(f"val:   {Path(dataset_name)}/images\n")
    f.write(f"test:  {Path(dataset_name)}/images\n")
    f.write(f"\n")
    f.write(f"nc: {len(list(coco.cats.keys()))+1}\n")
    
    f.write(f"names: \n")
    for i in range(0, max(list(coco.cats.keys()))+1):
      try:
        f.write(f"  {i}: {coco.cats[i]['name']}\n")
      except:
        f.write(f"  {i}:\n")

  # アノテーションを書き出す
  gids = list(coco.imgs.keys())
  for i, gid in enumerate(tqdm(gids, desc="coco2yolo")):
    img_src_path = Path(coco.get_image_fpath(gid))
    img_dst_path = save_images_dir/img_src_path.name

    if img_src_path.exists():
      shutil.copyfile(img_src_path, img_dst_path)

      img = coco.load_image(gid)
      with open(f'{str(save_labels_dir)}/{img_dst_path.name.split(".")[0]}.txt', mode = "w") as f:
        aids = coco.gid_to_aids[gid]
        for j, aid in enumerate(aids):
          cls = coco.index.anns[aid]["category_id"]

          x, y, w, h = coco.index.anns[aid]["bbox"]
          dh, dw = (1/img.shape[0], 1/img.shape[1])

          center_x      = (x + w / 2) * dw
          center_y      = (y + h / 2) * dh
          width         = w * dw
          height        = h * dh

          center_x = max(0, min(center_x, 1.0))
          center_y = max(0, min(center_y, 1.0))
          width    = max(0, min(width, 1.0))
          height   = max(0, min(height, 1.0))

          f.write(f"{cls} {center_x} {center_y} {width} {height}\n")

  return None
  

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
datasets = [
  {
    "json_path" : "./__datasets__/sentan_dataset/sentan_dataset/sentan_dataset.json",
    "img_root"  : "./__datasets__/sentan_dataset/sentan_dataset/",
  },
]

for dataset in datasets:
  coco = kwcoco.CocoDataset(data=dataset["json_path"])
  coco.img_root = dataset["img_root"]

  coco2yolo(coco=coco, output_dir="./__yolo_datasets__/train_dataset/", dataset_name=str(Path(dataset["img_root"]).parent.name)+"_"+str(Path(dataset["img_root"]).name))


__yolo_datasets__/train_dataset/sentan_dataset_sentan_dataset


coco2yolo: 100%|██████████| 9/9 [00:00<00:00, 89.02it/s]
