In [15]:
import os
import pandas as pd
from ensemble_boxes import weighted_boxes_fusion as wbf
from ensemble_boxes import nms, soft_nms
import tqdm

# bbox 변환 함수
def xywh2xyxy(bbox, img_size=1024):
    x_min = bbox[0]
    y_min = bbox[1]
    x_max = x_min + bbox[2]
    y_max = y_min + bbox[3]
    return [x_min/img_size, y_min/img_size, x_max/img_size, y_max/img_size]

# ensemble 적용 함수
def apply_ensemble_method(ensemble, bbox_list, scores_list, labels_list, weights=None, iou_thr=0.5, skip_box_thr=0.0001, sigma=0.5):
    if weights is None:
        weights = [1] * len(bbox_list)  # 기본 가중치
    
    if ensemble == 'nms':
        boxes, scores, labels = nms(bbox_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr)
    elif ensemble == 'soft_nms':
        boxes, scores, labels = soft_nms(bbox_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, sigma=sigma, thresh=skip_box_thr)
    elif ensemble == 'wbf':
        boxes, scores, labels = wbf(bbox_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    else:
        raise ValueError(f"Unknown ensemble method: {ensemble}")
    
    return boxes, scores, labels


print("start")
base_dir = './ensemble_json_data/'
img_size = 1024

# 디렉토리 내 모든 JSON 파일 목록 가져오기
all_files = os.listdir(base_dir)
json_files = [file for file in all_files if file.endswith('.json')]

# 파일 경로 생성
json_file_paths = [os.path.join(base_dir, json_file) for json_file in json_files]

# JSON 파일을 읽어서 bbox, scores, labels 데이터를 준비
grouped_dfs = []
for json_file_path in json_file_paths:
    df = pd.read_json(json_file_path)
    df['bbox'] = df['bbox'].apply(xywh2xyxy)
    grouped_df = df.groupby('image_id').agg(list).reset_index()
    grouped_dfs.append(grouped_df)

print("pd.read_json 완료")

#ensemble 방법 정의
ensemble_method = 'nms'  # 'nms', 'soft_nms', 'wbf' 중 선택 가능

# 가중치 및 WBF 설정값 정의
weights = [1] * len(grouped_dfs)
iou_thr = 0.3
skip_box_thr = 0.0001 # 이 점수 이하의 박스는 결합에서 제외.
sigma = 0.1 # soft_nms 에서 IoU에 따른 가중치 감소 정도를 조정하는 값.

img_bboxs = []
img_scores = []
img_labels = []

# 각 이미지에 대해 WBF 수행
for image_id in tqdm.tqdm_notebook(range(len(grouped_dfs[0]))):
    labels_list = [grouped_df['category_id'][image_id] for grouped_df in grouped_dfs]
    scores_list = [grouped_df['score'][image_id] for grouped_df in grouped_dfs]
    bbox_list = [grouped_df['bbox'][image_id] for grouped_df in grouped_dfs]
    
    boxes, scores, labels = apply_ensemble_method(ensemble_method, bbox_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr, sigma=sigma)

    img_bboxs.append((boxes*img_size).tolist())
    img_scores.append(scores.tolist())
    img_labels.append(labels.tolist())

print("각 이미지의wbf 완료")
# PredictionString과 file_name 준비
prediction_strings = []
file_names = []

for image_id in tqdm.tqdm_notebook(range(len(img_bboxs))):
    prediction_string = ''
    for i in range(len(img_bboxs[image_id])):
        prediction_string += str(int(img_labels[image_id][i])) + ' ' + str(img_scores[image_id][i]) + ' ' + ' '.join(map(str, img_bboxs[image_id][i])) + ' '
        
    prediction_strings.append(prediction_string)


file_names = [f'test/{i:04d}.jpg' for i in range(len(img_bboxs))]

# 결과 저장
submission = pd.DataFrame()
submission['PredictionString'] = prediction_strings
submission['image_id'] = file_names
submission.to_csv('./submission/ensemble_submission_test_07.csv', index=None)

print("Ensemble submission saved successfully.")


start
pd.read_json 완료


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/4871 [00:00<?, ?it/s]

각 이미지의wbf 완료


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/4871 [00:00<?, ?it/s]

Ensemble submission saved successfully.
