In [9]:
# Weighted Boxes Fusion 적용

!pip3 install ensemble_boxes
'''
https://github.com/ZFTurbo/Weighted-Boxes-Fusion
'''

[0m

'\nhttps://github.com/ZFTurbo/Weighted-Boxes-Fusion\n'

In [10]:
import pandas as pd
from ensemble_boxes import *
import numpy as np
from pycocotools.coco import COCO
from tqdm import tqdm

In [11]:
# ensemble csv files

#========== submission.csv 파일 경로 지정 ===========#
submission_files = ['results/cascade_resnext/submission.csv', 
                    'results/cascade_resnext_newdata/submission.csv',
                    'results/swin_retina_100/submission.csv']
#===================================================#

submission_df = [pd.read_csv(file) for file in submission_files]

In [12]:
image_ids = submission_df[0]['image_id'].tolist()
assert len(image_ids)==4871

In [13]:
annotation = 'dataset/test.json' ### test.json 경로 넣기 ###
coco = COCO(annotation)

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [14]:
prediction_strings = []
file_names = []

for i, image_id in tqdm(enumerate(image_ids), total=len(image_ids)):
    prediction_string = ''
    boxes_list = []
    scores_list = []
    labels_list = []
    image_info = coco.loadImgs(i)[0]
    
    for df in submission_df:
        predict_string = df[df['image_id'] == image_id]['PredictionString'].tolist()[0]
        predict_list = str(predict_string).split()
        
        # 바운딩 박스가 0개 혹은 1개인 경우 앙상블할 수 없으므로 패스합니다
        if len(predict_list)==0 or len(predict_list)==1:
            continue
            
        predict_list = np.reshape(predict_list, (-1, 6))
        box_list = []
        
        # Ensemble_boxes는 박스의 좌표가 0에서 1 사이일 것을 요구하므로 정규화해줍니다
        for box in predict_list[:, 2:6].tolist():
            box[0] = float(box[0]) / image_info['width']
            box[1] = float(box[1]) / image_info['height']
            box[2] = float(box[2]) / image_info['width']
            box[3] = float(box[3]) / image_info['height']
            box_list.append(box)
            
        boxes_list.append(box_list)
        scores_list.append(list(map(float, predict_list[:, 1].tolist())))
        labels_list.append(list(map(int, predict_list[:, 0].tolist())))
    
    if len(boxes_list):
        # Ensemble Boxes는 아래와 같은 메서드도 지원합니다! (기본 nms, soft-nms 등등)
        
        # boxes, scores, labels = nms(boxes_list, scores_list, labels_list,iou_thr=iou_thr)
        # boxes, scores, labels = soft_nms(box_list, scores_list, labels_list, iou_thr=iou_thr)
        # boxes, scores, labels = non_maximum_weighted(boxes_list, scores_list, labels_list,iou_thr=iou_thr)
        boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list,iou_thr=0.55)

        for box, score, label in zip(boxes, scores, labels):
            prediction_string += str(int(label)) + ' ' + str(score) + ' ' + str(box[0] * image_info['width']) + ' ' + str(box[1] * image_info['height']) + ' ' + str(box[2] * image_info['width']) + ' ' + str(box[3] * image_info['height']) + ' '
    
    prediction_strings.append(prediction_string)
    file_names.append(image_id)

100%|██████████| 4871/4871 [00:45<00:00, 106.67it/s]


In [15]:
submission = pd.DataFrame()
submission['PredictionString'] = prediction_strings
submission['image_id'] = file_names

#======= 앙상블 결과 출력할 경로 지정 ========#
submission.to_csv('results/ensemble/datasetensemble_cascaderesnext_swinretina.csv', index=False)
#===========================================#

submission.head()

Unnamed: 0,PredictionString,image_id
0,7 0.6534777879714966 604.9644165039062 510.360...,test/0000.jpg
1,3 0.547940731048584 342.7675476074219 249.6631...,test/0001.jpg
2,1 0.6294417977333069 79.40958404541016 275.403...,test/0002.jpg
3,9 0.5086538791656494 130.72122192382812 220.60...,test/0003.jpg
4,1 0.5214035113652548 192.64695739746094 268.41...,test/0004.jpg
