In [6]:
import os
import shutil
import json
import cv2
import matplotlib.pyplot as plt
import yaml
import glob
import copy

## 訓練データセットの作成

In [7]:
# 整形後のデータの保存先
os.makedirs("./after_data/train/images", exist_ok=True)
os.makedirs("./after_data/train/labels", exist_ok=True)

# annotationデータのjsonのpathと作成したlabelデータの保存先
json_path = "./before_data/train/_annotations.coco.json"

# jsonファイルを開く
with open(json_path, "r") as f:
    json_load = json.load(f)

In [8]:
# jsonfileのkeyの確認
json_load.keys()

dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])

In [9]:
# カテゴリの確認
json_load["categories"]

[{'id': 0, 'name': 'shellfish', 'supercategory': 'none'},
 {'id': 1, 'name': 'Crab', 'supercategory': 'shellfish'},
 {'id': 2, 'name': 'Lobster', 'supercategory': 'shellfish'},
 {'id': 3, 'name': 'Shrimp', 'supercategory': 'shellfish'}]

In [10]:
# 0番目のannotationsの確認
json_load["annotations"][0]

{'id': 0,
 'image_id': 0,
 'category_id': 2,
 'bbox': [179, 164, 94, 108],
 'area': 10152,
 'segmentation': [],
 'iscrowd': 0}

In [11]:
# 0番目の画像情報
json_load["images"][0]

{'id': 0,
 'license': 1,
 'file_name': 'f0a31b9c190e138a_jpg.rf.01424894847982a705e985ba6c48e235.jpg',
 'height': 416,
 'width': 416,
 'date_captured': '2020-07-31T02:42:06+00:00'}

In [19]:
# 整形前のimgデータとjsonデータが入っているフォルダまでのpath
input_path = "C:/Users/yuuki/Kaggle/object_detection_try/shellfish_v2/before_data/train"

# 整形後のimgデータとjsonデータが入るフォルダまでのpath
output_path = "C:/Users/yuuki/Kaggle/object_detection_try/shellfish_v2/after_data/train/"

In [20]:
# 整形前のimageデータのファイルネームの取得する関数
file_names = []
def load_images_from_folder(folder):
  count = 0
  for filename in os.listdir(folder):
        source = os.path.join(folder,filename)
        destination = f"{output_path}images/img{count}.jpg"

        try:
            shutil.copy(source, destination)
            print("File copied successfully.")
        # If source and destination are same
        except shutil.SameFileError:
            print("Source and destination represents the same file.")

        file_names.append(filename)
        count += 1

In [21]:
# ファイルネームを取得
load_images_from_folder(input_path)

File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied 

In [22]:
# image_idを入力として受け取り、ロードされたjsonファイルから関連画像注釈を取得する
def get_img_ann(image_id):
    img_ann = []
    isFound = False
    for ann in json_load['annotations']:
        if ann['image_id'] == image_id:
            img_ann.append(ann)
            isFound = True
    if isFound:
        return img_ann
    else:
        return None

In [23]:
# filename を入力として受け取り、ロードされた JSON から対応する画像データを取得する関数
def get_img(filename):
  for img in json_load['images']:
    if img['file_name'] == filename:
      return img

In [24]:
# バウンディングボックスが書かれたtextファイルを作成
count = 0

for filename in file_names:
  # Extracting image 
  img = get_img(filename)
  img_id = img['id']
  img_w = img['width']
  img_h = img['height']

  # Get Annotations for this image
  img_ann = get_img_ann(img_id)

  if img_ann:
    # Opening file for current image
    file_object = open(f"{output_path}labels/img{count}.txt", "a")

    for ann in img_ann:
      current_category = ann['category_id'] - 1 # As yolo format labels start from 0 
      current_bbox = ann['bbox']
      x = current_bbox[0]
      y = current_bbox[1]
      w = current_bbox[2]
      h = current_bbox[3]
      
      # Finding midpoints
      x_centre = (x + (x+w))/2
      y_centre = (y + (y+h))/2
      
      # Normalization
      x_centre = x_centre / img_w
      y_centre = y_centre / img_h
      w = w / img_w
      h = h / img_h
      
      # Limiting upto fix number of decimal places
      x_centre = format(x_centre, '.6f')
      y_centre = format(y_centre, '.6f')
      w = format(w, '.6f')
      h = format(h, '.6f')
          
      # Writing current object 
      file_object.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")

    file_object.close()
    count += 1  # This should be outside the if img_ann block.

TypeError: 'NoneType' object is not subscriptable

## 検証データセットの作成

In [25]:
# 整形後のデータの保存先
os.makedirs("./after_data/valid/images", exist_ok=True)
os.makedirs("./after_data/valid/labels", exist_ok=True)

# annotationデータのjsonのpathと作成したlabelデータの保存先
json_path = "./before_data/valid/_annotations.coco.json"

# jsonファイルを開く
with open(json_path, "r") as f:
    json_load = json.load(f)

# 整形前のimgデータとjsonデータが入っているフォルダまでのpath
input_path = "C:/Users/yuuki/Kaggle/object_detection_try/shellfish_v2/before_data/valid"

# 整形後のimgデータとjsonデータが入るフォルダまでのpath
output_path = "C:/Users/yuuki/Kaggle/object_detection_try/shellfish_v2/after_data/valid/"

# yolo用のimageデータの保存先
os.makedirs("C:/Users/yuuki/Kaggle/object_detection_try/shellfish_v2/after_data/valid/images", exist_ok=True)

In [26]:
# 整形前のimageデータのファイルネームの取得してfile_namesに格納
# そしてoutput_pathのimagesにコピー
file_names = []
load_images_from_folder(input_path)

File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied 

In [27]:
# image_idを入力として受け取り、ロードされたjsonファイルから関連画像注釈を取得する
def get_img_ann(image_id):
    img_ann = []
    isFound = False
    for ann in json_load['annotations']:
        if ann['image_id'] == image_id:
            img_ann.append(ann)
            isFound = True
    if isFound:
        return img_ann
    else:
        return None

In [28]:
# filename を入力として受け取り、ロードされた JSON から対応する画像データを取得する関数
def get_img(filename):
  for img in json_load['images']:
    if img['file_name'] == filename:
      return img

In [29]:
# バウンディングボックスが書かれたtextファイルを作成
count = 0

for filename in file_names:
  # Extracting image 
  img = get_img(filename)
  img_id = img['id']
  img_w = img['width']
  img_h = img['height']

  # Get Annotations for this image
  img_ann = get_img_ann(img_id)

  if img_ann:
    # Opening file for current image
    file_object = open(f"{output_path}labels/img{count}.txt", "a")

    for ann in img_ann:
      current_category = ann['category_id'] - 1 # As yolo format labels start from 0 
      current_bbox = ann['bbox']
      x = current_bbox[0]
      y = current_bbox[1]
      w = current_bbox[2]
      h = current_bbox[3]
      
      # Finding midpoints
      x_centre = (x + (x+w))/2
      y_centre = (y + (y+h))/2
      
      # Normalization
      x_centre = x_centre / img_w
      y_centre = y_centre / img_h
      w = w / img_w
      h = h / img_h
      
      # Limiting upto fix number of decimal places
      x_centre = format(x_centre, '.6f')
      y_centre = format(y_centre, '.6f')
      w = format(w, '.6f')
      h = format(h, '.6f')
          
      # Writing current object 
      file_object.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")

    file_object.close()
    count += 1  # This should be outside the if img_ann block.

TypeError: 'NoneType' object is not subscriptable

## test_data

In [30]:
# 整形後のデータの保存先
os.makedirs("./after_data/test/images", exist_ok=True)
os.makedirs("./after_data/test/labels", exist_ok=True)

# annotationデータのjsonのpathと作成したlabelデータの保存先
json_path = "./before_data/test/_annotations.coco.json"

# jsonファイルを開く
with open(json_path, "r") as f:
    json_load = json.load(f)

# 整形前のimgデータとjsonデータが入っているフォルダまでのpath
input_path = "C:/Users/yuuki/Kaggle/object_detection_try/shellfish_v2/before_data/test"

# 整形後のimgデータとjsonデータが入るフォルダまでのpath
output_path = "C:/Users/yuuki/Kaggle/object_detection_try/shellfish_v2/after_data/test/"

# yolo用のimageデータの保存先
os.makedirs("C:/Users/yuuki/Kaggle/object_detection_try/shellfish_v2/after_data/test/images", exist_ok=True)

In [31]:
# 整形前のimageデータのファイルネームの取得してfile_namesに格納
# そしてoutput_pathのimagesにコピー
file_names = []
load_images_from_folder(input_path)

File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied successfully.
File copied 

In [32]:
# image_idを入力として受け取り、ロードされたjsonファイルから関連画像注釈を取得する
def get_img_ann(image_id):
    img_ann = []
    isFound = False
    for ann in json_load['annotations']:
        if ann['image_id'] == image_id:
            img_ann.append(ann)
            isFound = True
    if isFound:
        return img_ann
    else:
        return None

In [33]:
# filename を入力として受け取り、ロードされた JSON から対応する画像データを取得する関数
def get_img(filename):
  for img in json_load['images']:
    if img['file_name'] == filename:
      return img

In [34]:
# バウンディングボックスが書かれたtextファイルを作成
count = 0

for filename in file_names:
  # Extracting image 
  img = get_img(filename)
  img_id = img['id']
  img_w = img['width']
  img_h = img['height']

  # Get Annotations for this image
  img_ann = get_img_ann(img_id)

  if img_ann:
    # Opening file for current image
    file_object = open(f"{output_path}labels/img{count}.txt", "a")

    for ann in img_ann:
      current_category = ann['category_id'] - 1 # As yolo format labels start from 0 
      current_bbox = ann['bbox']
      x = current_bbox[0]
      y = current_bbox[1]
      w = current_bbox[2]
      h = current_bbox[3]
      
      # Finding midpoints
      x_centre = (x + (x+w))/2
      y_centre = (y + (y+h))/2
      
      # Normalization
      x_centre = x_centre / img_w
      y_centre = y_centre / img_h
      w = w / img_w
      h = h / img_h
      
      # Limiting upto fix number of decimal places
      x_centre = format(x_centre, '.6f')
      y_centre = format(y_centre, '.6f')
      w = format(w, '.6f')
      h = format(h, '.6f')
          
      # Writing current object 
      file_object.write(f"{current_category} {x_centre} {y_centre} {w} {h}\n")

    file_object.close()
    count += 1  # This should be outside the if img_ann block.

TypeError: 'NoneType' object is not subscriptable