In [None]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
import random
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools import mask
from PassionateMix_v2 import *
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from matplotlib.patches import Patch
import webcolors
# 저장 경로
save_dir = '/opt/ml/segmentation/input/data/augmented/'
file_name_dir = 'augmented/'

# 데이터셋 경로
dataset_path  = '/opt/ml/segmentation/input/data/'

# 증강 파일
json_path = dataset_path + 'mix_9+341_10+264.json'
coco = COCO(json_path)

In [None]:
# 증강시킬 json 파일 읽기
with open(json_path, 'r') as f:
    json_data = json.load(f)


# 카테고리 이름 설정
df = pd.read_csv('/opt/ml/segmentation/baseline_code/class_dict.csv')
category_names = list(df.name)

# 증강시킬 mask_class 번호 지정
mask_class = 3

In [None]:
# 원하는 mask_class가 들어있는 images의 인덱스들 추출
foreground_images = []

for idx in tqdm(range(len(json_data['images']))):
    image_id = coco.getImgIds(imgIds=idx)
    image_infos = coco.loadImgs(image_id)[0]
    
    ann_ids = coco.getAnnIds(imgIds=image_infos['id'])
    anns = coco.loadAnns(ann_ids)
    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)
    
    category_type = []
    for i in range(len(anns)):
        category_id = anns[i]['category_id']
        if category_id == mask_class:
            foreground_images.append(idx)
            break

In [None]:
# json data 파일의 마지막 images id 추출
last_images_id = json_data['images'][-1]['id']

# mask_class가 없는 이미지에 증강시킬 목적으로 
# mask_class가 없는 이미지들의 인덱스 background_images 선언
background_images = range(last_images_id)
background_images = [x for x in background_images if x not in foreground_images]
len(foreground_images)

In [None]:




categories = json_data['categories']
anns = json_data['annotations']
imgs = json_data['images']
nr_cats = len(categories)
nr_annotations = len(anns)
nr_images = len(imgs)

# Load categories and super categories
cat_names = []
super_cat_names = []
super_cat_ids = {}
super_cat_last_name = ''
nr_super_cats = 0
for cat_it in categories:
    cat_names.append(cat_it['name'])
    super_cat_name = cat_it['supercategory']
    # Adding new supercat
    if super_cat_name != super_cat_last_name:
        super_cat_names.append(super_cat_name)
        super_cat_ids[super_cat_name] = nr_super_cats
        super_cat_last_name = super_cat_name
        nr_super_cats += 1

print('Number of super categories:', nr_super_cats)
print('Number of categories:', nr_cats)
print('Number of annotations:', nr_annotations)
print('Number of images:', nr_images)
# Count annotations
cat_histogram = np.zeros(nr_cats,dtype=int)
for ann in anns:
    cat_histogram[ann['category_id']-1] += 1

# Initialize the matplotlib figure
f, ax = plt.subplots(figsize=(5,5))

# Convert to DataFrame
df = pd.DataFrame({'Categories': cat_names, 'Number of annotations': cat_histogram})
df = df.sort_values('Number of annotations', 0, False)

# Plot the histogram
plt.title("category distribution of train_all set ")
plot_1 = sns.barplot(x="Number of annotations", y="Categories", data=df, label="Total", color="b")
# category labeling 
sorted_temp_df = df.sort_index()

# background = 0 에 해당되는 label 추가 후 기존들을 모두 label + 1 로 설정
sorted_df = pd.DataFrame(["Backgroud"], columns = ["Categories"])
sorted_df = sorted_df.append(sorted_temp_df, ignore_index=True)

In [None]:
dic_cls = dict([(idx+1,0) for idx, val in enumerate(range(10))])
for idx in tqdm(range(len(json_data['images']))):
    image_id = coco.getImgIds(imgIds=idx)
    image_infos = coco.loadImgs(image_id)[0]
    ann_ids = coco.getAnnIds(imgIds=image_infos['id'])
    anns = coco.loadAnns(ann_ids)
    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)
    for i in range(len(anns)):
        dic_cls[anns[i]['category_id']] += 1
print(dic_cls)

In [None]:
len(foreground_images)

In [None]:
# 증강시킬 개수 k
random.seed(16)
k = 335

fg_image_idx = random.sample(foreground_images, k)
bg_image_idx = random.sample(background_images, k)
print(k, len(fg_image_idx), len(bg_image_idx))

In [None]:
for fg_idx, bg_idx in tqdm(zip(fg_image_idx, bg_image_idx)):
    # background 처리
    bg_masks, bg_images, bg_image_infos = get_image_detail(dataset_path, bg_idx, coco, category_names)
    print('bg_image_infos:',bg_image_infos)
    edge, empty_ymax, empty_xmax = find_background_space(bg_masks)
    
    # 만약 배경이 없는 사진이라면 해당 iter는 continue
    if (bg_masks == 0).sum() == 0:
        continue
    
    # foreground 처리
    fg_masks, fg_images, fg_image_infos = get_image_detail(dataset_path, fg_idx, coco, category_names)
    fg, fg_bbox = find_foreground_bbox(fg_masks, fg_images, mask_class)
    
    # resize
    resized_fg, segmentation_mask = resize_foreground_backgroud(fg_masks, fg, fg_bbox, empty_ymax, empty_xmax, edge)
    
    # background, foreground merge & save
    merged_image = merge_bg_fg(bg_images, resized_fg, empty_ymax, empty_xmax, edge)
    save_merged_image(save_dir, file_name_dir, merged_image, bg_image_infos, fg_image_infos, mask_class)
    
    # json file 처리
    json_data, annotation = make_coco_annotation(file_name_dir, bg_image_infos, fg_image_infos, mask_class, segmentation_mask, json_data)
    json_data['annotations'].append(annotation)

In [None]:
# 증강이 끝나면 json file 저장

with open(dataset_path + 'mix_9+341_10+264_3+335.json', 'w') as outfile:
    json.dump(json_data, outfile)