In [2]:
# Weighted Boxes Fusion 적용

# !pip3 install ensemble_boxes
'''
https://github.com/ZFTurbo/Weighted-Boxes-Fusion
'''

In [3]:
import pandas as pd
from ensemble_boxes import *
import numpy as np
from pycocotools.coco import COCO
from tqdm import tqdm

In [6]:
# ensemble csv files

### Example ###
submission_files = ['./submissions/htc.csv', './submissions/cascade_rcnn.csv'] # submission csv 파일 경로 넣기

submission_df = [pd.read_csv(file) for file in submission_files]

In [18]:
image_ids = submission_df[0]['image_id'].tolist()
assert len(image_ids)==4871

In [12]:
annotation = '../../dataset/test.json' ### test.json 경로 넣기 ###
coco = COCO(annotation)

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [16]:
prediction_strings = []
file_names = []

for i, image_id in tqdm(enumerate(image_ids), total=len(image_ids)):
    prediction_string = ''
    boxes_list = []
    scores_list = []
    labels_list = []
    image_info = coco.loadImgs(i)[0]
    
    for df in submission_df:
        predict_string = df[df['image_id'] == image_id]['PredictionString'].tolist()[0]
        predict_list = str(predict_string).split()
        
        if len(predict_list)==0 or len(predict_list)==1:
            continue
            
        predict_list = np.reshape(predict_list, (-1, 6))
        box_list = []
        
        for box in predict_list[:, 2:6].tolist():
            box[0] = float(box[0]) / image_info['width']
            box[1] = float(box[1]) / image_info['height']
            box[2] = float(box[2]) / image_info['width']
            box[3] = float(box[3]) / image_info['height']
            box_list.append(box)
            
        boxes_list.append(box_list)
        scores_list.append(list(map(float, predict_list[:, 1].tolist())))
        labels_list.append(list(map(int, predict_list[:, 0].tolist())))
    
    if len(boxes_list):
        # boxes, scores, labels = nms(boxes_list, scores_list, labels_list,iou_thr=iou_thr)
        # boxes, scores, labels = soft_nms(box_list, scores_list, labels_list, iou_thr=iou_thr)
        # boxes, scores, labels = non_maximum_weighted(boxes_list, scores_list, labels_list,iou_thr=iou_thr)
        # boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list,iou_thr=0.5,conf_type='box_and_model_avg')
        boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list,iou_thr=0.55)

        for box, score, label in zip(boxes, scores, labels):
            prediction_string += str(int(label)) + ' ' + str(score) + ' ' + str(box[0] * image_info['width']) + ' ' + str(box[1] * image_info['height']) + ' ' + str(box[2] * image_info['width']) + ' ' + str(box[3] * image_info['height']) + ' '
    
    prediction_strings.append(prediction_string)
    file_names.append(image_id)

100%|██████████| 4871/4871 [00:32<00:00, 149.31it/s]


In [17]:
submission = pd.DataFrame()
submission['PredictionString'] = prediction_strings
submission['image_id'] = file_names
submission.to_csv('submission_ensemble.csv')

submission.head()

Unnamed: 0,PredictionString,image_id
0,7 0.994391918182373 215.9879608154297 57.12092...,test/0000.jpg
1,5 0.866894543170929 127.87580108642578 0.0 498...,test/0001.jpg
2,1 0.9592076539993286 86.98426818847656 280.238...,test/0002.jpg
3,9 0.7666659951210022 21.942455291748047 98.228...,test/0003.jpg
4,1 0.8139079809188843 192.29458618164062 318.74...,test/0004.jpg
