In [1]:
import os
import glob
import json
import cv2
import datetime
from tqdm import tqdm

In [2]:
# workding directory 세팅
working_dir = 'synthesis-car-od'
dir_len = len(working_dir)

path = os.getcwd().replace('\\', '/')
index = path.find(working_dir)
working_dir = path[:index + dir_len + 1]
working_dir

'd:/Assignments/dacon/synthesis-car-od/'

In [3]:
# 데이터 경로 세팅
DATA_PATH = os.path.join(working_dir, 'data/')
TRAIN_PATH = os.path.join(DATA_PATH, 'train/')
TEST_PATH = os.path.join(DATA_PATH, 'test/')

In [4]:
# categories
categories_list = []
cls_id = 0

with open(os.path.join(DATA_PATH, 'classes.txt'), 'r') as cls_file:
    lines = cls_file.readlines()

    for line in lines:
        line = line.strip()

        # category_name
        category_name = line.split(',')[-1]

        # categories
        categories_list.append({
            'id': cls_id,
            'name' : category_name,
        })

        cls_id += 1      

# Train Data

In [5]:
# 이미지, annotaion 경로
train_imgs = sorted(glob.glob(os.path.join(TRAIN_PATH, '*.png')))
anns = sorted(glob.glob(os.path.join(TRAIN_PATH, '*.txt')))

In [6]:
ISCROWD = 0

train_images_list = []
annotations_list = []
ann_id = 0

for img_id, (img, ann) in enumerate(zip(tqdm(train_imgs), anns)):
    # date_captured
    date_captured = os.path.getmtime(img)
    date_captured = datetime.datetime.fromtimestamp(date_captured).strftime('%Y-%m-%d %H:%M:%S')

    # file_name
    file_name = '/'.join(img.replace('\\', '/').split('/')[-2:])

    # width & height
    img = cv2.imread(img)
    img_h, img_w, _ = img.shape

    # images
    train_images_list.append({
        'id' : img_id,
        'width' : img_w,
        'height' : img_h,
        'file_name' : file_name,
        'date_captured' : date_captured
    })

    with open(ann, 'r') as ann_file:
        lines = ann_file.readlines()

        for line in lines:
            line = line.strip()

            # category_id
            category_id = int(float(line.split(' ')[0]))

            # bbox
            bbox = list(map(lambda x : float(x), line.split(' ')[1:]))

            xs = [bbox[i] for i in range(0, 8, 2)]
            ys = [bbox[i] for i in range(1, 8, 2)]

            x_min = min(xs)
            y_min = min(ys)
            x_max = max(xs)
            y_max = max(ys)

            width = x_max - x_min 
            height = y_max - y_min

            bbox = [x_min, y_min, width, height]

            # annotations
            annotations_list.append({
                'id' : ann_id,
                'image_id' : img_id,
                'category_id' : category_id,
                'bbox' : bbox,
                'iscrowd' : ISCROWD,
            })

            ann_id += 1
    

100%|██████████| 6481/6481 [05:12<00:00, 20.76it/s]


In [7]:
coco_train = {
    'categories' : categories_list,
    'images' : train_images_list,
    'annotations' : annotations_list,
}

In [8]:
with open(os.path.join(DATA_PATH, 'coco_train.json'), 'w') as file:
    json.dump(coco_train, file, indent=2)

# Test Data

In [9]:
# test 이미지 경로
test_imgs = sorted(glob.glob(os.path.join(TEST_PATH, '*.png')))

In [10]:
test_images_list = []

for img_id, img in enumerate(tqdm(test_imgs)):
    # date_captured
    date_captured = os.path.getmtime(img)
    date_captured = datetime.datetime.fromtimestamp(date_captured).strftime('%Y-%m-%d %H:%M:%S')

    # file_name
    file_name = '/'.join(img.replace('\\', '/').split('/')[-2:])

    # width & height
    img = cv2.imread(img)
    img_h, img_w, _ = img.shape

    # images
    test_images_list.append({
        'id' : img_id,
        'width' : img_w,
        'height' : img_h,
        'file_name' : file_name,
        'date_captured' : date_captured
    })
    

100%|██████████| 3400/3400 [02:41<00:00, 21.03it/s]


In [11]:
coco_test = {
    'categories' : categories_list,
    'images' : test_images_list,
    'annotations' : [],
}

In [12]:
with open(os.path.join(DATA_PATH, 'coco_test.json'), 'w') as file:
    json.dump(coco_test, file, indent=2)