In [3]:
import json
from PIL import Image
import numpy as np
from tqdm import tqdm
import pandas as pd
import os

In [4]:
with open('/opt/ml/input/data/train.json', 'r') as f:
    train_json = json.load(f)

with open('/opt/ml/input/data/val.json', 'r') as f:
    val_json = json.load(f)

with open('/opt/ml/input/data/test.json', 'r') as f:
    test_json = json.load(f)


In [65]:
if not os.path.exists('/opt/ml/mmseg_dataset/images/train'):
    os.makedirs('/opt/ml/mmseg_dataset/images/train')
if not os.path.exists('/opt/ml/mmseg_dataset/images/val'):
    os.makedirs('/opt/ml/mmseg_dataset/images/val')
            
if not os.path.exists('/opt/ml/mmseg_dataset/annotations/train'):
    os.makedirs('/opt/ml/mmseg_dataset/annotations/train')
if not os.path.exists('/opt/ml/mmseg_dataset/annotations/val'):
    os.makedirs('/opt/ml/mmseg_dataset/annotations/val')

In [7]:
from pycocotools.coco import COCO

def get_classname(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"

category_names = {j:i for (i,j) in zip(np.arange(11), ['Background', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 
                                                        'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing']) }

category_names



{'Background': 0,
 'General trash': 1,
 'Paper': 2,
 'Paper pack': 3,
 'Metal': 4,
 'Glass': 5,
 'Plastic': 6,
 'Styrofoam': 7,
 'Plastic bag': 8,
 'Battery': 9,
 'Clothing': 10}

In [72]:
dataset_path = '/opt/ml/input/data'
coco = COCO('/opt/ml/input/data/train.json')
for index in tqdm(range(len(train_json['images']))):
    image_id = coco.getImgIds(imgIds=index)
    image_infos = coco.loadImgs(image_id)[0]

    # cv2 를 활용하여 image 불러오기
    images = Image.open(os.path.join(dataset_path, image_infos['file_name']))
    new_fname = image_infos['file_name'].replace('/', '-').replace('.jpg', '')

    ann_ids = coco.getAnnIds(imgIds=image_infos['id'])
    anns = coco.loadAnns(ann_ids)

    # Load the categories in a variable
    cat_ids = coco.getCatIds()
    cats = coco.loadCats(cat_ids)

    # masks : size가 (height x width)인 2D
    # 각각의 pixel 값에는 "category id" 할당
    # Background = 0
    masks = np.zeros((image_infos["height"], image_infos["width"]))
    # General trash = 1, ... , Cigarette = 10
    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)
    for i in range(len(anns)):
        className = get_classname(anns[i]['category_id'], cats)
        pixel_value = category_names[className]
        masks[coco.annToMask(anns[i]) == 1] = pixel_value
    masks = Image.fromarray(masks.astype(np.int8))
    images.save(os.path.join('/opt/ml/mmseg_dataset/images/train', new_fname+'.jpg'))
    masks.save(os.path.join('/opt/ml/mmseg_dataset/annotations/train', new_fname+'.png'))


loading annotations into memory...
Done (t=3.46s)
creating index...
index created!


100%|██████████| 2617/2617 [01:17<00:00, 33.88it/s]


In [73]:
dataset_path = '/opt/ml/input/data'
coco = COCO('/opt/ml/input/data/val.json')
for index in tqdm(range(len(val_json['images']))):
    image_id = coco.getImgIds(imgIds=index)
    image_infos = coco.loadImgs(image_id)[0]

    # cv2 를 활용하여 image 불러오기
    images = Image.open(os.path.join(dataset_path, image_infos['file_name']))
    new_fname = image_infos['file_name'].replace('/', '-').replace('.jpg', '')

    ann_ids = coco.getAnnIds(imgIds=image_infos['id'])
    anns = coco.loadAnns(ann_ids)

    # Load the categories in a variable
    cat_ids = coco.getCatIds()
    cats = coco.loadCats(cat_ids)

    # masks : size가 (height x width)인 2D
    # 각각의 pixel 값에는 "category id" 할당
    # Background = 0
    masks = np.zeros((image_infos["height"], image_infos["width"]))
    # General trash = 1, ... , Cigarette = 10
    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)
    for i in range(len(anns)):
        className = get_classname(anns[i]['category_id'], cats)
        pixel_value = category_names[className]
        masks[coco.annToMask(anns[i]) == 1] = pixel_value
    masks = Image.fromarray(masks.astype(np.int8))
    images.save(os.path.join('/opt/ml/mmseg_dataset/images/val', new_fname+'.jpg'))
    masks.save(os.path.join('/opt/ml/mmseg_dataset/annotations/val', new_fname+'.png'))


loading annotations into memory...
Done (t=0.88s)
creating index...
index created!


100%|██████████| 655/655 [00:19<00:00, 33.22it/s]


In [10]:
dataset_path = '/opt/ml/input/data'
coco = COCO('/opt/ml/input/data/test.json')
for index in tqdm(range(len(test_json['images']))):
    image_id = coco.getImgIds(imgIds=index)
    image_infos = coco.loadImgs(image_id)[0]

    # cv2 를 활용하여 image 불러오기
    images = Image.open(os.path.join(dataset_path, image_infos['file_name']))
    new_fname = image_infos['file_name'].replace('/', '-').replace('.jpg', '')

    ann_ids = coco.getAnnIds(imgIds=image_infos['id'])
    anns = coco.loadAnns(ann_ids)

    # Load the categories in a variable
    cat_ids = coco.getCatIds()
    cats = coco.loadCats(cat_ids)

    # masks : size가 (height x width)인 2D
    # 각각의 pixel 값에는 "category id" 할당
    # Background = 0
    masks = np.zeros((image_infos["height"], image_infos["width"]))
    # General trash = 1, ... , Cigarette = 10
    # anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)
    # for i in range(len(anns)):
    #     className = get_classname(anns[i]['category_id'], cats)
    #     pixel_value = category_names[className]
    #     masks[coco.annToMask(anns[i]) == 1] = pixel_value
    masks = Image.fromarray(masks.astype(np.int8))
    images.save(os.path.join('/opt/ml/mmseg_dataset/images/test', new_fname+'.jpg'))
    masks.save(os.path.join('/opt/ml/mmseg_dataset/annotations/test', new_fname+'.png'))


loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


100%|██████████| 624/624 [00:13<00:00, 47.45it/s]


In [79]:
img = Image.open('/opt/ml/mmseg_dataset/annotations/val/batch_01_vt-0172.png')
img = np.array(img)
img.max()

10