In [1]:
import json
from PIL import Image
import numpy as np
from tqdm import tqdm
import pandas as pd
import os

### 경로 지정

In [2]:
# 원본 json 파일 경로들
base_path_ori = '/opt/ml/input/data'
train_json_fname = 'cv_train_0.json'
val_json_fname = 'cv_val_0.json'
test_json_fname = 'test.json'

# ade format에 맞춰 이미지를 저장할 폴더
base_path_dst = '/opt/ml/mmseg_dataset' 

### category idx 지정

In [3]:
from pycocotools.coco import COCO

def get_classname(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"

category_names = {j:i for (i,j) in zip(np.arange(11), ['Background', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 
                                                        'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing']) }
category_names

{'Background': 0,
 'General trash': 1,
 'Paper': 2,
 'Paper pack': 3,
 'Metal': 4,
 'Glass': 5,
 'Plastic': 6,
 'Styrofoam': 7,
 'Plastic bag': 8,
 'Battery': 9,
 'Clothing': 10}

### 이미지 저장 함수

In [4]:
def saveImages(src_path, json_fname, dst_path, phase='train'):
    """
    ade format에 맞춰서 새로운 이미지들을 생성해줍니다. 
    :param src_path: json파일과 이미지들이 저장된 상위 폴더 경로 
    :param json_fname: json 파일명
    :param dst_path: 새로 생성할 이미지 파일들의 root 경로
    :param phase: train/val/test
    """
    coco = COCO(os.path.join(src_path, json_fname))

    with open(os.path.join(src_path, json_fname), 'r') as f:
        load_json = json.load(f)

    # GT 이미지 저장 폴더 생성
    if not os.path.exists(os.path.join(dst_path, 'images', phase)):
        os.makedirs(os.path.join(dst_path, 'images', phase))

    # label 이미지 저장 폴더 생성
    if not os.path.exists(os.path.join(base_path_dst, 'annotations', phase)):
        os.makedirs(os.path.join(base_path_dst, 'annotations', phase))
    
    for index in tqdm(range(len(load_json['images']))):
        image_id = load_json['images'][index]["id"]
        
        image_infos = coco.loadImgs(image_id)[0]
        # cv2 를 활용하여 image 불러오기
        images = Image.open(os.path.join(src_path, image_infos['file_name']))
        new_fname = image_infos['file_name'].replace('/', '-').replace('.jpg', '')

        ann_ids = coco.getAnnIds(imgIds=image_infos['id'])
        anns = coco.loadAnns(ann_ids)

        # Load the categories in a variable
        cat_ids = coco.getCatIds()
        cats = coco.loadCats(cat_ids)

        # masks : size가 (height x width)인 2D
        # 각각의 pixel 값에는 "category id" 할당
        # Background = 0
        masks = np.zeros((image_infos["height"], image_infos["width"]))
        # General trash = 1, ... , Cigarette = 10
        if phase!='test':
            anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)
            for i in range(len(anns)):
                className = get_classname(anns[i]['category_id'], cats)
                pixel_value = category_names[className]
                masks[coco.annToMask(anns[i]) == 1] = pixel_value
        masks = Image.fromarray(masks.astype(np.int8))
        images.save(os.path.join(dst_path, 'images', phase, new_fname+'.jpg'))
        masks.save(os.path.join(dst_path, 'annotations', phase, new_fname+'.png'))

In [5]:
saveImages(base_path_ori, train_json_fname, base_path_dst, 'train')
saveImages(base_path_ori, val_json_fname, base_path_dst, 'val')
saveImages(base_path_ori, test_json_fname, base_path_dst, 'test')

loading annotations into memory...
Done (t=2.54s)
creating index...
index created!


100%|██████████| 2379/2379 [01:09<00:00, 34.26it/s]


loading annotations into memory...
Done (t=0.79s)
creating index...
index created!


100%|██████████| 772/772 [00:23<00:00, 33.46it/s]


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


100%|██████████| 624/624 [00:13<00:00, 47.86it/s]
