In [2]:
# ensemble_boxes가 설치 되지 않았을 경우만 실행 해 주세요!
!pip install ensemble_boxes

Collecting ensemble_boxes
  Downloading ensemble_boxes-1.0.9-py3-none-any.whl.metadata (728 bytes)
Downloading ensemble_boxes-1.0.9-py3-none-any.whl (23 kB)
Installing collected packages: ensemble_boxes
Successfully installed ensemble_boxes-1.0.9


In [3]:
from ensemble_boxes import nms, weighted_boxes_fusion
from pycocotools.coco import COCO
import numpy as np
import pandas as pd


In [4]:
# csv 파일 이름 적어주세요
root = ['output_1','output_2','output_3']

In [None]:
submission_files = ['../submission/'+ r for r in root]
print(submission_files)
submission_df = [pd.read_csv(file) for file in submission_files]

In [None]:
image_ids = submission_df[0]['image_id'].tolist()

In [None]:
# ensemble 할 file의 image 정보를 불러오기 위한 json
annotation = '../dataset/test.json'
coco = COCO(annotation)

# 앙상블
### :참고사항:
### ensemble_type 변수를 통해서 사용할 Ensemble 기법을 선택해주세요
    ├── NMS
    ├── WBF
    └── MIX(NMS -> WBF)



In [None]:
prediction_strings = []
file_names = []
ensemble_type = 'nms' #[nms, wbf, mix{만드는 중....}]
# 수치 변경가능!!!
nms_iou_thr = 0.4 #nms의 iou threshold 값 설정
wbf_iou_thr = 0.5 #wbf의 iou threshold 값 설정

# 각 모델에 동일한 가중치를 적용
# 수치 변경 가능하며 필요 없는 경우 None 으로 바꿔주세요
weights = [1] * len(submission_df)  

In [None]:

if ensemble_type == 'nms': # NMS
    
    # 각 image id 별로 submission file에서 box좌표 추출
    for i, image_id in enumerate(image_ids):
        prediction_string = ''
        boxes_list = []
        scores_list = []
        labels_list = []
        image_info = coco.loadImgs(i)[0]
        # 각 submission file 별로 prediction box좌표 불러오기
        for df in submission_df:
            predict_string = df[df['image_id'] == image_id]['PredictionString'].tolist()[0]
            predict_list = str(predict_string).split()

            if len(predict_list)==0 or len(predict_list)==1:
                continue

            predict_list = np.reshape(predict_list, (-1, 6))
            box_list = []

            for box in predict_list[:, 2:6].tolist():
                # box의 각 좌표를 float형으로 변환한 후 image의 넓이와 높이로 각각 정규화
                image_width = image_info['width']
                image_height = image_info['height']
                box[0] = float(box[0]) / image_width
                box[1] = float(box[1]) / image_height
                box[2] = float(box[2]) / image_width
                box[3] = float(box[3]) / image_height
                box_list.append(box)

            boxes_list.append(box_list)
            scores_list.append(list(map(float, predict_list[:, 1].tolist())))
            labels_list.append(list(map(int, predict_list[:, 0].tolist())))

        # 예측 box가 있다면 이를 ensemble 수행
        if len(boxes_list):
            boxes, scores, labels = nms(boxes_list, scores_list, labels_list, iou_thr=nms_iou_thr)
            for box, score, label in zip(boxes, scores, labels):
                prediction_string += str(label) + ' ' + str(score) + ' ' + str(box[0] * image_info['width']) + ' ' + str(box[1] * image_info['height']) + ' ' + str(box[2] * image_info['width']) + ' ' + str(box[3] * image_info['height']) + ' '

        prediction_strings.append(prediction_string)
        file_names.append(image_id)

elif ensemble_type == 'wbf':
    # 각 image id 별로 submission file에서 box좌표 추출
    for i, image_id in enumerate(image_ids):
        prediction_string = ''
        boxes_list = []
        scores_list = []
        labels_list = []
        image_info = coco.loadImgs(i)[0]
        # 각 submission file 별로 prediction box좌표 불러오기
        for df in submission_df:
            predict_string = df[df['image_id'] == image_id]['PredictionString'].tolist()[0]
            predict_list = str(predict_string).split()

            if len(predict_list)==0 or len(predict_list)==1:
                continue

            predict_list = np.reshape(predict_list, (-1, 6))
            box_list = []

            for box in predict_list[:, 2:6].tolist():
                # box의 각 좌표를 float형으로 변환한 후 image의 넓이와 높이로 각각 정규화
                image_width = image_info['width']
                image_height = image_info['height']
                box[0] = float(box[0]) / image_width
                box[1] = float(box[1]) / image_height
                box[2] = float(box[2]) / image_width
                box[3] = float(box[3]) / image_height
                box_list.append(box)

            boxes_list.append(box_list)
            scores_list.append(list(map(float, predict_list[:, 1].tolist())))
            labels_list.append(list(map(int, predict_list[:, 0].tolist())))

        # 예측 box가 있다면 이를 ensemble 수행
        if len(boxes_list):
            boxes, scores, labels = nms(boxes_list, scores_list, labels_list, iou_thr=wbf_iou_thr)
            for box, score, label in zip(boxes, scores, labels):
                prediction_string += str(label) + ' ' + str(score) + ' ' + str(box[0] * image_info['width']) + ' ' + str(box[1] * image_info['height']) + ' ' + str(box[2] * image_info['width']) + ' ' + str(box[3] * image_info['height']) + ' '

        prediction_strings.append(prediction_string)
        file_names.append(image_id)

elif ensemble_type == 'mix':

    # 각 image id 별로 submission file에서 box 좌표 추출
    for i, image_id in enumerate(image_ids):
        prediction_string = ''
        boxes_list = []
        scores_list = []
        labels_list = []
        image_info = coco.loadImgs(i)[0]
        
        # 각 submission file 별로 prediction box 좌표 불러오기
        for df in submission_df:
            predict_string = df[df['image_id'] == image_id]['PredictionString'].tolist()[0]
            predict_list = str(predict_string).split()

            if len(predict_list) == 0 or len(predict_list) == 1:
                continue

            predict_list = np.reshape(predict_list, (-1, 6))
            box_list = []

            for box in predict_list[:, 2:6].tolist():
                # box의 각 좌표를 float형으로 변환한 후 image의 넓이와 높이로 각각 정규화
                image_width = image_info['width']
                image_height = image_info['height']
                box[0] = float(box[0]) / image_width
                box[1] = float(box[1]) / image_height
                box[2] = float(box[2]) / image_width
                box[3] = float(box[3]) / image_height
                box_list.append(box)

            boxes_list.append(box_list)
            scores_list.append(list(map(float, predict_list[:, 1].tolist())))
            labels_list.append(list(map(int, predict_list[:, 0].tolist())))

        # 예측 box가 있다면 NMS 수행
        if len(boxes_list):
            # NMS 계산 수행
            boxes_nms, scores_nms, labels_nms = nms(boxes_list, scores_list, labels_list, iou_thr=nms_iou_thr)

            # NMS 후 남은 박스에 대해 WBF 적용
            if len(boxes_nms):
                # WBF 수행
                boxes_wbf, scores_wbf, labels_wbf = weighted_boxes_fusion(boxes_nms, scores_nms, labels_nms, weights=weights, iou_thr=wbf_iou_thr)

                for box, score, label in zip(boxes_wbf, scores_wbf, labels_wbf):
                    prediction_string += str(label) + ' ' + str(score) + ' ' + str(box[0] * image_info['width']) + ' ' + str(box[1] * image_info['height']) + ' ' + str(box[2] * image_info['width']) + ' ' + str(box[3] * image_info['height']) + ' '

        prediction_strings.append(prediction_string)
        file_names.append(image_id)


In [None]:
submission = pd.DataFrame()
submission['PredictionString'] = prediction_strings
submission['image_id'] = file_names
submission.to_csv('./submission/submission_ensemble.csv', index=None)
submission.head()