In [1]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
import random
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools import mask
from PassionateMix import *

# 저장 경로
save_dir = '/opt/ml/segmentation/input/data/augmented/'
file_name_dir = 'augmented/'

# 데이터셋 경로
dataset_path  = './segmentation/input/data/'

# 증강 파일
json_path = dataset_path + 'train.json'
coco = COCO(json_path)

loading annotations into memory...
Done (t=3.62s)
creating index...
index created!


In [2]:
# 증강시킬 json 파일 읽기
with open(json_path, 'r') as f:
    json_data = json.load(f)


# 카테고리 이름 설정
df = pd.read_csv('./segmentation/baseline_code/class_dict.csv')
category_names = list(df.name)

# 증강시킬 mask_class 번호 지정
mask_class = 9

In [3]:
# 원하는 mask_class가 들어있는 images의 인덱스들 추출
foreground_images = []

for idx in tqdm(range(len(json_data['images']))):
    image_id = coco.getImgIds(imgIds=idx)
    image_infos = coco.loadImgs(image_id)[0]
    
    ann_ids = coco.getAnnIds(imgIds=image_infos['id'])
    anns = coco.loadAnns(ann_ids)
    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)
    
    category_type = []
    for i in range(len(anns)):
        category_id = anns[i]['category_id']
        if category_id == mask_class:
            foreground_images.append(idx)
            break

100%|██████████| 2617/2617 [00:00<00:00, 66090.01it/s]


In [4]:
# json data 파일의 마지막 images id 추출
last_images_id = json_data['images'][-1]['id']

# mask_class가 없는 이미지에 증강시킬 목적으로 
# mask_class가 없는 이미지들의 인덱스 background_images 선언
background_images = range(last_images_id)
background_images = [x for x in background_images if x not in foreground_images]

In [5]:
random.seed(16)

# 만약 추출해야하는 이미지가 250장 이상이면 300장으로 고정
if len(foreground_images) >= 250:
    foreground_images = random.sample(foreground_images, 250)
    
# 만약 추출해야하는 이미지가 125장 미만이라면 100장 이상으로 추출(최대 248장)
while len(foreground_images) < 125:
    foreground_images *= 2

In [6]:
# 증강시킬 개수 k
random.seed(16)

k = len(foreground_images)
fg_image_idx = random.sample(foreground_images, k)
bg_image_idx = random.sample(background_images, k)
print(k, len(fg_image_idx), len(bg_image_idx))

136 136 136


In [7]:
for fg_idx, bg_idx in tqdm(zip(fg_image_idx, bg_image_idx)):
    # background 처리
    bg_masks, bg_images, bg_image_infos = get_image_detail(dataset_path, bg_idx, coco, category_names)
    edge, empty_ymax, empty_xmax = find_background_space(bg_masks)
    
    # 만약 배경이 없는 사진이라면 해당 iter는 continue
    if (bg_masks == 0).sum() == 0:
        continue
    
    # foreground 처리
    fg_masks, fg_images, fg_image_infos = get_image_detail(dataset_path, fg_idx, coco, category_names)
    fg, fg_bbox = find_foreground_bbox(fg_masks, fg_images, mask_class)
    
    # resize
    resized_fg, segmentation_mask = resize_foreground_backgroud(fg_masks, fg, fg_bbox, empty_ymax, empty_xmax, edge)
    
    # background, foreground merge & save
    merged_image = merge_bg_fg(bg_images, resized_fg, empty_ymax, empty_xmax, edge)
    save_merged_image(save_dir, file_name_dir, merged_image, bg_image_infos)
    
    # json file 처리
    json_data, annotation = make_coco_annotation(file_name_dir, bg_image_infos, segmentation_mask, mask_class, json_data)
    json_data['annotations'].append(annotation)

136it [01:26,  1.68it/s]


In [None]:
# 증강이 끝나면 json file 저장

with open(dataset_path + 'passionatemix.json', 'w') as outfile:
    json.dump(json_data, outfile)