In [None]:
import pandas as pd
import numpy as np

import json
from pycocotools.coco import COCO

from tqdm import tqdm
import os
import copy

import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import cv2
import webcolors

### 시각화를 위한 함수 및 변수 초기화

In [None]:
def get_classname(classID, cats):
    for i in range(len(cats)):
        if cats[i]['id']==classID:
            return cats[i]['name']
    return "None"

def create_trash_label_colormap():
    """Creates a label colormap used in Trash segmentation.
    Returns:
        A colormap for visualizing segmentation results.
    """
    colormap = np.zeros((11, 3), dtype=np.uint8)
    for inex, (_, r, g, b) in enumerate(class_colormap.values):
        colormap[inex] = [r, g, b]
    
    return colormap

def label_to_color_image(label):
    """Adds color defined by the dataset colormap to the label.

    Args:
        label: A 2D array with integer type, storing the segmentation label.

    Returns:
        result: A 2D array with floating type. The element of the array
                is the color indexed by the corresponding element in the input label
                to the trash color map.

    Raises:
        ValueError: If label is not of rank 2 or its value is larger than color
              map maximum entry.
    """
    if label.ndim != 2:
        raise ValueError('Expect 2-D input label')

    colormap = create_trash_label_colormap()

    if np.max(label) >= len(colormap):
        raise ValueError('label value too large.')

    return colormap[label]

category_names = [
    "Background",
    "General trash",
    "Paper",
    "Paper pack",
    "Metal",
    "Glass",
    "Plastic",
    "Styrofoam",
    "Plastic bag",
    "Battery",
    "Clothing",
]

# variable for legend
class_colormap = pd.read_csv("../../../class_dict.csv")

category_and_rgb = [[category, (r,g,b)] for idx, (category, r, g, b) in enumerate(class_colormap.values)]
legend_elements = [Patch(facecolor=webcolors.rgb_to_hex(rgb), 
                        edgecolor=webcolors.rgb_to_hex(rgb), 
                        label=category) for category, rgb in category_and_rgb]

In [None]:
# 구글 드라이브에 업로드했습니다.
df = pd.read_csv('./ver3/image.csv')

In [None]:
df.head(3)

In [None]:
file_name_list = list(df.file_name)

len(file_name_list)

In [None]:
data_dir = '../../../../data'

In [None]:
# train.json 정보 불러오기
with open(os.path.join(data_dir, 'train_all.json'), 'r', encoding='UTF-8') as train_json:
    train_data = json.load(train_json)
    
    info = train_data['info']
    licenses = train_data['licenses']
    categories = train_data['categories']
    
    train_images = train_data['images']
    train_annotations = train_data['annotations']

### 이미지 제외하기

In [None]:
new_train_images = []
new_train_annotations = []

train_annotation_id = 0
train_image_id = 0

cnt = 0

for img in tqdm(train_images):

    file_name = img['file_name']

    for ann in train_annotations:

        if file_name in file_name_list:
            continue
        
        if ann['image_id'] == img['id']:
            new_train_annotations.append(copy.deepcopy(ann))
            new_train_annotations[-1]['id'] = train_annotation_id
            new_train_annotations[-1]['image_id'] = train_image_id
            train_annotation_id += 1

    if file_name in file_name_list:
        cnt += 1
    else:
        new_train_images.append(copy.deepcopy(img))
        new_train_images[-1]['id'] = train_image_id
        train_image_id += 1

print(f'{cnt} images excluded.')
            

### Plastic bag 내부 object의 category가 외부 object의 category와 겹치지 않을 때, category_id 변경해주기

In [None]:
# Plastic bag 내부에 있어 바뀌어야 할 annotation.
df_anno = pd.read_csv('./ver3/anno.csv')

In [None]:
img_id = []

for img in new_train_images:
    if img['file_name'] in list(df_anno.file_name):
        img_id.append(img['id'])

len(img_id)

In [None]:
# input/data 경로에 train_test.json으로 저장
with open('../../../../data/train_test.json', 'w', encoding='UTF-8') as new_json:
    json.dump({ 'info': info, 'licenses': licenses, 'images': new_train_images, 
            'annotations': new_train_annotations, 'categories': categories}, new_json, indent=1)

In [None]:
coco = COCO(os.path.join(data_dir, 'train_test.json'))

#### 바꾸기 전 시각화

In [None]:
dataset_path = '../../../../data/'

for id in img_id:

    image_infos = coco.loadImgs(id)[0]

    ann_ids = coco.getAnnIds(id)
    anns = coco.loadAnns(ann_ids)

    cat_ids = coco.getCatIds()
    cats = coco.loadCats(cat_ids)

    masks = np.zeros((512, 512))

    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)

    for i in range(len(anns)):
        className = get_classname(anns[i]['category_id'], cats)
        pixel_value = category_names.index(className)
        masks[coco.annToMask(anns[i]) == 1] = pixel_value
    masks = masks.astype(np.int8)
    masks = label_to_color_image(masks)

    images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
    images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB)

    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 10))

    ax1.imshow(images)
    ax1.grid(False)
    ax1.set_title("input image : {}".format(image_infos['file_name']), fontsize = 15)

    ax2.imshow(masks)
    ax2.grid(False)
    ax2.set_title("masks : {}".format(image_infos['file_name']), fontsize = 15)

    ax2.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0)

In [None]:
file_names = list(df_anno.file_name)
cats = df_anno.category

for f, c in zip(file_names, cats):
    c = c.split(', ')

    for img in new_train_images:
        if f == img['file_name']:
            for i in range(len(new_train_annotations)):
                if new_train_annotations[i]['image_id'] == img['id']:
                    if new_train_annotations[i]['category_id'] == int(c[0]):
                        new_train_annotations[i]['category_id'] = 8
                    
                    if len(c) > 1:
                        if new_train_annotations[i]['category_id'] == int(c[1]):
                            new_train_annotations[i]['category_id'] = 8



In [None]:
# input/data 경로에 train_test.json으로 저장
with open('../../../../data/train_test.json', 'w', encoding='UTF-8') as new_json:
    json.dump({ 'info': info, 'licenses': licenses, 'images': new_train_images, 
            'annotations': new_train_annotations, 'categories': categories}, new_json, indent=1)

In [None]:
coco_test = COCO(os.path.join(data_dir, 'train_test.json'))

#### 바꾼 후 시각화

In [None]:
for id in img_id:

    image_infos = coco_test.loadImgs(id)[0]

    ann_ids = coco_test.getAnnIds(id)
    anns = coco_test.loadAnns(ann_ids)

    cat_ids = coco_test.getCatIds()
    cats = coco_test.loadCats(cat_ids)

    masks = np.zeros((512, 512))

    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)

    for i in range(len(anns)):
        className = get_classname(anns[i]['category_id'], cats)
        pixel_value = category_names.index(className)
        masks[coco_test.annToMask(anns[i]) == 1] = pixel_value
    masks = masks.astype(np.int8)
    masks = label_to_color_image(masks)

    images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
    images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB)

    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 20))

    ax1.imshow(images)
    ax1.grid(False)
    ax1.set_title("input image : {}".format(image_infos['file_name']), fontsize = 15)

    ax2.imshow(masks)
    ax2.grid(False)
    ax2.set_title("masks : {}".format(image_infos['file_name']), fontsize = 15)

    ax2.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0)

#### Plastic bag 내부 object의 category와 외부 object의 category가 겹친다면, Plastic bbox 안에 있는 객체의 category 바꿔주기

In [None]:
df_bbox = pd.read_csv('./ver3/bbox.csv')

In [None]:
df_bbox

In [None]:
img_id = []

for img in new_train_images:
    if img['file_name'] in list(df_bbox.file_name):
        img_id.append(img['id'])

img_id

#### 바꾸기 전 시각화

In [None]:
for id in img_id:

    image_infos = coco.loadImgs(id)[0]

    ann_ids = coco.getAnnIds(id)
    anns = coco.loadAnns(ann_ids)

    cat_ids = coco.getCatIds()
    cats = coco.loadCats(cat_ids)

    masks = np.zeros((512, 512))

    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)

    for i in range(len(anns)):
        className = get_classname(anns[i]['category_id'], cats)
        pixel_value = category_names.index(className)
        masks[coco.annToMask(anns[i]) == 1] = pixel_value
    masks = masks.astype(np.int8)
    masks = label_to_color_image(masks)

    images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
    images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB)

    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 10))

    ax1.imshow(images)
    ax1.grid(False)
    ax1.set_title("input image : {}".format(image_infos['file_name']), fontsize = 15)

    ax2.imshow(masks)
    ax2.grid(False)
    ax2.set_title("masks : {}".format(image_infos['file_name']), fontsize = 15)

    ax2.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0)

In [None]:
for i in img_id:
    for j in range(len(new_train_annotations)):
        if i == new_train_annotations[j]['image_id'] and new_train_annotations[j]['category_id'] == 8:
            
            bbox_pb = new_train_annotations[j]['bbox']

            x_min_pb = bbox_pb[0]
            y_min_pb = bbox_pb[1]
            x_max_pb = x_min_pb + bbox_pb[2]
            y_max_pb = y_min_pb + bbox_pb[3]

            for k in range(len(new_train_annotations)):
                if k != j and new_train_annotations[k]['image_id'] == i:
                    bbox = new_train_annotations[k]['bbox']

                    x_min = bbox[0]
                    y_min = bbox[1]
                    x_max = x_min + bbox[2]
                    y_max = y_min + bbox[3]

                    if x_min_pb <= x_min and y_min_pb <= y_min and x_max_pb >= x_max and y_max_pb >= y_max:
                        new_train_annotations[k]['category_id'] = 8

In [None]:
# input/data 경로에 train_test.json으로 저장
with open('../../../../data/train_test.json', 'w', encoding='UTF-8') as new_json:
    json.dump({ 'info': info, 'licenses': licenses, 'images': new_train_images, 
            'annotations': new_train_annotations, 'categories': categories}, new_json, indent=1)

In [None]:
coco_test = COCO(os.path.join(data_dir, 'train_test.json'))

#### 바꾼 후 시각화

In [None]:
for id in img_id:

    image_infos = coco_test.loadImgs(id)[0]

    ann_ids = coco_test.getAnnIds(id)
    anns = coco_test.loadAnns(ann_ids)

    cat_ids = coco_test.getCatIds()
    cats = coco_test.loadCats(cat_ids)

    masks = np.zeros((512, 512))

    anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)

    for i in range(len(anns)):
        className = get_classname(anns[i]['category_id'], cats)
        pixel_value = category_names.index(className)
        masks[coco_test.annToMask(anns[i]) == 1] = pixel_value
    masks = masks.astype(np.int8)
    masks = label_to_color_image(masks)

    images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
    images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB)

    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 20))

    ax1.imshow(images)
    ax1.grid(False)
    ax1.set_title("input image : {}".format(image_infos['file_name']), fontsize = 15)

    ax2.imshow(masks)
    ax2.grid(False)
    ax2.set_title("masks : {}".format(image_infos['file_name']), fontsize = 15)

    ax2.legend(handles=legend_elements, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0)