# Ensemble - WBF
utils 폴더안에 해당 주피터파일 위치해서 실행하기

In [None]:
!pip install ensemble_boxes

In [None]:
import pandas as pd
from ensemble_boxes import *
import numpy as np
from pycocotools.coco import COCO
import os
import cv2
import json
import skimage.io as io
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.collections import PatchCollection
import seaborn as sns

In [None]:
test_folder = '/opt/ml/input/data/medical/img/test/'
get_img_files = os.listdir(test_folder)
if get_img_files[-1].startswith('.'):  # '.'으로 시작하는 파일 제거용
    get_img_files.pop(-1)

In [None]:
len(get_img_files) # 100개 뜨면 정상

## COCO format test.json 만들기

In [None]:
coco_data = {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "text"}],    # 이부분은 비워놔도 됨(only text)
}

for i, test_file in enumerate(get_img_files):
    img = cv2.imread(test_folder+test_file)
    h, w, c = img.shape
    coco_image = {
            "id": i,
            "width": w,
            "height": h,
            "file_name": test_file,
            "license": 0, 
            "flickr_url": None, 
            "coco_url": None, 
            "date_captured": None
    }
    coco_data["images"].append(coco_image)

with open('/opt/ml/input/data/medical/ufo/test.json', 'w') as f:
    json.dump(coco_data, f)

## 앙상블 시작
code/utils/ensemble/~.json 파일들 두기

In [None]:
submission_files = []
for file_name in os.listdir('./ensemble'):
    if file_name.startswith('.'):
        continue
    submission_files.append(file_name)

In [None]:
submission_files         # 앙상블할 json 파일들 확인하기

In [None]:
annotation = '/opt/ml/input/data/medical/ufo/test.json'
coco = COCO(annotation)

iou_thr = 0.5            # TODO : iou threshold 설정
skip_box_thr = 0.0001    # TODO : skip iou threshold 설정 0.0001

images_anno = {}
for i, test_file in enumerate(get_img_files):
    boxes_list = []
    scores_list = []
    labels_list = []
    image_info = coco.loadImgs(i)[0]
    
    for submission in submission_files:
        with open('./ensemble/'+submission, 'r') as f:
            data = json.load(f)
            images = data['images']
        words = images[image_info['file_name']]
        box_list = []
        for word_id, word_data in words["words"].items():
            # UFO to PascalVOC format
            [tl, tr, br, bl] = word_data["points"]
            xmin = min(tl[0], tr[0], br[0], bl[0])
            ymin = min(tl[1], tr[1], br[1], bl[1])
            xmax = max(tl[0], tr[0], br[0], bl[0])
            ymax = max(tl[1], tr[1], br[1], bl[1])
            box = [xmin/image_info['width'], ymin/image_info['height'], xmax/image_info['width'], ymax/image_info['height']]
            box_list.append(box)
        boxes_list.append(box_list)
        scores_list.append([1.0]*len(words["words"].items()))
        labels_list.append([1]*len(words["words"].items()))

    if len(boxes_list):
        boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
        prediction_words = []
        points_list = []
        for idx, (box, score, label) in enumerate(zip(boxes, scores, labels)):
            # PascalVOC to UFO format
            p_xmin = box[0]*image_info['width']
            p_ymin = box[1]*image_info['height']
            p_xmax = box[2]*image_info['width']
            p_ymax = box[3]*image_info['height']
            p_width = p_xmax - p_xmin
            p_height = p_ymax - p_ymin
            p_tl = [p_xmin, p_ymin]
            p_tr = [p_xmin + p_width, p_ymin]
            p_br = [p_xmin + p_width, p_ymin + p_height]
            p_bl = [p_xmin, p_ymin + p_height]
            points = [p_tl, p_tr, p_br, p_bl]
            points_list.append(points)
        prediction_words = {idx: dict(points=p) for idx, p in enumerate(points_list)}
    images_anno[image_info['file_name']] = dict(words=prediction_words)

images_anno = {'images':images_anno}
# 앙상블 결과 출력용
with open('result.json', 'w') as f:
    json.dump(images_anno, f)

# 제출용 : csv 파일로 바로 변경
with open('result.csv', 'w') as f:  
    json.dump(images_anno, f, indent=4)

## 앙상블 결과 출력해보기

In [None]:
root_path = '/opt/ml/input/data/medical/img/test'
anno_root = './result.json'

bboxes = []

with open(anno_root, 'r') as f:
    train_json = json.load(f)
    images = train_json['images']
    images_df = pd.DataFrame.from_dict(images)

image_id = sorted(list(images_df))
fnames = [os.path.join(root_path, i) for i in image_id] # 이미지 정렬 순서 고려하기 위해, json 파일에서 image path 가져오기 

for index, img_id in enumerate(image_id):
    temp_anns = []

    temp = images_df[f'{img_id}']
    words = temp.loc['words']

    for key in words.keys():
        temp_ann = {}
        temp_ann['image_id'] = img_id 
        temp_ann['id'] = index

        word = words[key]

        temp_ann['bbox'] = word['points']
        temp_anns.append(temp_ann)
    bboxes.append(temp_anns)

img_name_list = [img.split("/")[-1][:-4] for img in fnames]

def showimg(idx):
    fig, ax = plt.subplots(1, 2, dpi=512)
    img = io.imread(fnames[idx])

    anns = bboxes[idx]
    
    ax[0].imshow(img)
    ax[1].imshow(img)

    for ann in anns:
        ann_id = ann['id']
        #class_idx = ann['category_id']
        
        ax[0].set_title('original', fontsize = 7)
        ax[1].set_title(f"{img_name_list[idx]}", fontsize = 7)
        
        ax[0].set_xticks([])
        ax[0].set_yticks([])
        
        ax[1].set_xticks([])
        ax[1].set_yticks([])
        
        for pos in ['right', 'top', 'bottom', 'left']:
            ax[0].spines[pos].set_visible(False)
            ax[1].spines[pos].set_visible(False)
            
        points = np.array(ann['bbox'])
        ax[1].add_patch(patches.Polygon(
            points,
            closed = True,
            edgecolor = 'orange',
            fill = False,
            linewidth = 0.3
        ))

In [None]:
for i in range(30, 40): # index 0~99
    showimg(i)