## Pseudo Labeling
submission.csv 파일을 통해 test dataset의 pseudo labeling json 파일 생성하기

In [None]:
import os
import cv2
import numpy as np
from pycocotools.coco import COCO
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import random
import pandas as pd
from PIL import Image
import json

In [None]:
# TODO : 실험파일명
input_exp = '[test]example'
# TODO : 실험파일 경로 설정하기
result_dir = os.path.join('/opt/ml/baseline/mmdetection/configs/_teamconfig_/', input_exp)
result_dir = os.path.join(result_dir, 'results')
for file_name in os.listdir(result_dir):  # csv 파일 가져오기 (다른 이름으로 저장했을 대비)
    if file_name.startswith('.'):
        continue
    submission_dir = os.path.join(result_dir, file_name)
print(submission_dir)

In [None]:
submission_df = pd.read_csv(submission_dir)
submission_df

In [None]:
image_num = 500000
annotation_num = 500000

new_images = []
new_annotations = []
unsearch_images = []
for i in range(len(submission_df)):
    image_id = submission_df['image_id'].tolist()[i]
    predict_string = submission_df['PredictionString'].tolist()[i]
    predict_list = str(predict_string).split()
    if len(predict_list) <= 1:
        print('unsearch image :', image_id)
        unsearch_images.append(image_id)
        continue
    predict_list = np.reshape(predict_list, (-1, 6))
    # print(predict_list)
    for predict in predict_list:
        category_id = predict[0]
        xmin = float(predict[2])
        ymin = float(predict[3])
        xmax = float(predict[4])
        ymax = float(predict[5])
        width = xmax-xmin
        height = ymax-ymin
        new_annotations.append({
            'image_id': image_num,
            'category_id': int(category_id),
            'area': round(width*height, 2),
            'bbox': [round(xmin, 1), round(ymin, 1), round(width, 1), round(height, 1)],
            'iscrowd': 0,
            'id': annotation_num})
        annotation_num += 1
        
    new_images.append({
            'width': 1024,
            'height': 1024,
            'file_name': image_id,
            'license': 0,
            'flickr_url': None,
            'coco_url': None,
            'date_captured': None,
            'id': image_num
        })
    image_num += 1

In [None]:
unsearch_images

## bbox를 찾지 못한 test 이미지들을 확인해보기

In [None]:
if len(unsearch_images) != 0:
    n = 0   # TODO : unsearch_images 인덱스 입력
    image = cv2.imread(os.path.join('/opt/ml/dataset/', unsearch_images[n]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.0

    plt.figure(figsize=(4, 4))
    plt.xticks([])
    plt.yticks([])
    plt.imshow(image)

In [None]:
# TODO : 합칠 train json 파일 경로
train_json = '/opt/ml/dataset/train2_swj_battery_mosaic2.json'
with open(train_json, 'r') as f:
    data = json.load(f)
    info = data['info']
    licenses = data['licenses']
    images = data['images']
    categories = data['categories']
    annotations = data['annotations']

In [None]:
images.extend(new_images)
annotations.extend(new_annotations)

# train + test labeling json 파일
train_test_labeling = '/opt/ml/dataset/train_test_labeling.json'
with open(train_test_labeling, 'w') as f:
    json.dump({ 'info': info, 'licenses': licenses, 'images': images, 
            'annotations': annotations, 'categories': categories}, f)

# only test labeling json 파일
test_labeling = '/opt/ml/dataset/test_labeling.json'
with open(test_labeling, 'w') as f:
    json.dump({ 'info': info, 'licenses': licenses, 'images': new_images, 
            'annotations': new_annotations, 'categories': categories}, f)

## Test Image BBOX 확인해보기

In [None]:
data_dir = '/opt/ml/dataset'
annotation = test_labeling
coco = COCO(annotation)

classes = ["General trash", "Paper", "Paper pack", "Metal", "Glass", "Plastic", "Styrofoam", "Plastic bag", "Battery", "Clothing"]
LABELCOLORS = ['red', 'orange', 'yellow', 'greenyellow', 'green', 'turquoise', 'blue', 'indigo', 'purple', 'black']
fig, ax = plt.subplots(figsize=(12, 12))

for i in range(1, 17):
    n = random.randrange(500000, image_num-1)
    image_id = coco.getImgIds(imgIds=n)
    image_info = coco.loadImgs(image_id)[0]
    box = [x['bbox'] for x in coco.loadAnns(coco.getAnnIds(imgIds=image_info['id']))]
    cat = [coco.loadCats(x['category_id'])[0]['name'] for x in coco.loadAnns(coco.getAnnIds(imgIds=image_info['id']))]

    image = cv2.imread(os.path.join(data_dir, image_info['file_name']))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.0

    ax = fig.add_subplot(4, 4, i, aspect=1)
    ax.set_title(image_info['file_name'])
    plt.xticks([])
    plt.yticks([])
    for (x, y, w, h), c in zip(box, cat):
        label_color = LABELCOLORS[classes.index(c)]
        ax.add_patch(patches.Rectangle((x, y), w, h, edgecolor=label_color, linewidth=1, fill=False))
        if annotation:
            ax.text(x, y-5, c, fontsize=9, color=label_color)
    plt.imshow(image)