In [None]:
import os
import glob
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
from pycocotools.coco import COCO
from ensemble_boxes import weighted_boxes_fusion

In [None]:
working_dir = '../'
# dir_len = len(working_dir)

# path = os.getcwd().replace('\\', '/')
# index = path.find(working_dir)
# working_dir = path[:index + dir_len + 1]
# working_dir

In [None]:
TEST_PATH = os.path.join(working_dir, 'data/coco/test.json')
SUBMISSION_PATH = os.path.join(working_dir, 'submissions')

In [None]:
submission_files = glob.glob(os.path.join(SUBMISSION_PATH, '*.csv'))
submission_files

In [None]:
submission_dfs = [pd.read_csv(file) for file in submission_files]

In [None]:
annotation = TEST_PATH
coco = COCO(annotation)

In [None]:
file_names = list(map(lambda x : os.path.basename(x['file_name']), coco.loadImgs(coco.getImgIds())))

In [None]:
X_COLS = [x for x in submission_dfs[0].columns if 'x' in x]
Y_COLS = [y for y in submission_dfs[0].columns if 'y' in y]

In [None]:
IOU_THRESHOLD = 0.85

final_file_names = []
final_class_ids = []
final_confidences = []
final_x_mins = []
final_y_mins = []
final_x_maxes = []
final_y_maxes = []

for i, file_name in enumerate(tqdm(file_names)):
    bboxes = []
    scores = []
    labels = []    
    img_info = coco.loadImgs(i)[0]

    for submission_df in submission_dfs:
        model_pred_df = submission_df.loc[submission_df['file_name'] == file_name]

        if len(model_pred_df) == 0:
            continue

        # bboxes
        x_mins = model_pred_df[X_COLS].min(axis=1).to_numpy().reshape(-1, 1) / img_info['width']
        y_mins = model_pred_df[Y_COLS].min(axis=1).to_numpy().reshape(-1, 1) / img_info['height']
        x_maxes = model_pred_df[X_COLS].max(axis=1).to_numpy().reshape(-1, 1) / img_info['width']
        y_maxes = model_pred_df[Y_COLS].max(axis=1).to_numpy().reshape(-1, 1) / img_info['height']

        model_bboxes = np.concatenate(
            (
                x_mins,
                y_mins,
                x_maxes,
                y_maxes,
            ),
            axis=1
        )
        bboxes.append(model_bboxes)
        
        # scores
        model_scores = model_pred_df['confidence'].tolist()
        scores.append(model_scores)

        # labels
        model_labels = model_pred_df['class_id'].tolist()
        labels.append(model_labels)

    if len(bboxes):
        ens_bboxes, ens_scores, ens_labels = weighted_boxes_fusion(
            bboxes, 
            scores, 
            labels, 
            iou_thr=IOU_THRESHOLD,
        )

        for bbox, score, label in zip(ens_bboxes, ens_scores, ens_labels):
            x_min, y_min, x_max, y_max = bbox

            final_file_names.append(file_name)
            final_class_ids.append(label)
            final_confidences.append(score)
            final_x_mins.append(x_min * img_info['width'])
            final_y_mins.append(y_min * img_info['height'])
            final_x_maxes.append(x_max * img_info['width'])
            final_y_maxes.append(y_max * img_info['height'])

In [None]:
ensembled_results = pd.read_csv(os.path.join(working_dir, 'data/sample_submission.csv'))
ensembled_results['file_name'] = final_file_names
ensembled_results['class_id'] = final_class_ids
ensembled_results['confidence'] = final_confidences
ensembled_results['point1_x'] = final_x_mins
ensembled_results['point1_y'] = final_y_mins
ensembled_results['point2_x'] = final_x_maxes
ensembled_results['point2_y'] = final_y_mins
ensembled_results['point3_x'] = final_x_maxes
ensembled_results['point3_y'] = final_y_maxes
ensembled_results['point4_x'] = final_x_mins
ensembled_results['point4_y'] = final_y_maxes

ensembled_results.sort_values(by=['confidence'], ascending=False, inplace=True)
ensembled_results['class_id'] = ensembled_results['class_id'].astype('int')

In [None]:
# os.makedirs(os.path.join(SUBMISSION_PATH, 'submit/'), exist_ok=True)
ensembled_results.to_csv(os.path.join(SUBMISSION_PATH, 'submission.csv'), index=False)