In [1]:
import os
import glob
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
from pycocotools.coco import COCO
from ensemble_boxes import weighted_boxes_fusion

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
working_dir = 'synthesis-car-od'
dir_len = len(working_dir)

path = os.getcwd().replace('\\', '/')
index = path.find(working_dir)
working_dir = path[:index + dir_len + 1]
working_dir

'/Users/a16/Desktop/JJ/self_study/projects/synthesis-car-od/'

In [3]:
TEST_PATH = os.path.join(working_dir, 'data/coco/test.json')
SUBMISSION_PATH = os.path.join(working_dir, 'examples/ensemble')

In [4]:
submission_files = glob.glob(os.path.join(SUBMISSION_PATH, '*.csv'))
submission_files

['/Users/a16/Desktop/JJ/self_study/projects/synthesis-car-od/examples/ensemble/9300_swinL_cascade_train_all_epoch_10.csv',
 '/Users/a16/Desktop/JJ/self_study/projects/synthesis-car-od/examples/ensemble/9027_swinB_cascade_train_all_epoch_10.csv']

In [5]:
submission_dfs = [pd.read_csv(file) for file in submission_files]

In [6]:
annotation = TEST_PATH
coco = COCO(annotation)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [7]:
file_names = list(map(lambda x : os.path.basename(x['file_name']), coco.loadImgs(coco.getImgIds())))

In [8]:
X_COLS = [x for x in submission_dfs[0].columns if 'x' in x]
Y_COLS = [y for y in submission_dfs[0].columns if 'y' in y]

In [9]:
x_mins = submission_dfs[0][X_COLS].min(axis=1).to_numpy().reshape(-1, 1)
y_mins = submission_dfs[0][Y_COLS].min(axis=1).to_numpy().reshape(-1, 1)
x_maxes = submission_dfs[0][X_COLS].max(axis=1).to_numpy().reshape(-1, 1)
y_maxes = submission_dfs[0][Y_COLS].max(axis=1).to_numpy().reshape(-1, 1)

In [10]:
IOU_THRESHOLD = 0.7

final_file_names = []
final_class_ids = []
final_confidences = []
final_x_mins = []
final_y_mins = []
final_x_maxes = []
final_y_maxes = []

for i, file_name in enumerate(tqdm(file_names)):
    bboxes = []
    scores = []
    labels = []    
    img_info = coco.loadImgs(i)[0]

    for submission_df in submission_dfs:
        model_pred_df = submission_df.loc[submission_df['file_name'] == file_name]

        if len(model_pred_df) == 0:
            continue

        # bboxes
        x_mins = model_pred_df[X_COLS].min(axis=1).to_numpy().reshape(-1, 1) / img_info['width']
        y_mins = model_pred_df[Y_COLS].min(axis=1).to_numpy().reshape(-1, 1) / img_info['height']
        x_maxes = model_pred_df[X_COLS].max(axis=1).to_numpy().reshape(-1, 1) / img_info['width']
        y_maxes = model_pred_df[Y_COLS].max(axis=1).to_numpy().reshape(-1, 1) / img_info['height']

        model_bboxes = np.concatenate(
            (
                x_mins,
                y_mins,
                x_maxes,
                y_maxes,
            ),
            axis=1
        )
        bboxes.append(model_bboxes)
        
        # scores
        model_scores = model_pred_df['confidence'].tolist()
        scores.append(model_scores)

        # labels
        model_labels = model_pred_df['class_id'].tolist()
        labels.append(model_labels)

    if len(bboxes):
        ens_bboxes, ens_scores, ens_labels = weighted_boxes_fusion(
            bboxes, 
            scores, 
            labels, 
            iou_thr=IOU_THRESHOLD,
        )

        for bbox, score, label in zip(ens_bboxes, ens_scores, ens_labels):
            x_min, y_min, x_max, y_max = bbox

            final_file_names.append(file_name)
            final_class_ids.append(label)
            final_confidences.append(score)
            final_x_mins.append(x_min * img_info['width'])
            final_y_mins.append(y_min * img_info['height'])
            final_x_maxes.append(x_max * img_info['width'])
            final_y_maxes.append(y_max * img_info['height'])

100%|██████████| 3400/3400 [01:38<00:00, 34.56it/s]


In [11]:
ensembled_results = pd.read_csv(os.path.join(working_dir, 'data/sample_submission.csv'))
ensembled_results['file_name'] = final_file_names
ensembled_results['class_id'] = final_class_ids
ensembled_results['confidence'] = final_confidences
ensembled_results['point1_x'] = final_x_mins
ensembled_results['point1_y'] = final_y_mins
ensembled_results['point2_x'] = final_x_maxes
ensembled_results['point2_y'] = final_y_mins
ensembled_results['point3_x'] = final_x_maxes
ensembled_results['point3_y'] = final_y_maxes
ensembled_results['point4_x'] = final_x_mins
ensembled_results['point4_y'] = final_y_maxes

ensembled_results.sort_values(by=['confidence'], ascending=False, inplace=True)
ensembled_results['class_id'] = ensembled_results['class_id'].astype('int')

Unnamed: 0,file_name,class_id,confidence,point1_x,point1_y,point2_x,point2_y,point3_x,point3_y,point4_x,point4_y
256072,123112633.png,13,9.999983e-01,1182.576714,278.442489,1447.655411,278.442489,1447.655411,523.666989,1182.576714,523.666989
221143,115649679.png,6,9.999972e-01,456.573000,533.210996,743.528996,533.210996,743.528996,798.544500,456.573000,798.544500
74583,093209660.png,13,9.999965e-01,818.163815,300.172598,1070.555420,300.172598,1070.555420,552.816861,818.163815,552.816861
310936,132929363.png,13,9.999962e-01,1230.807152,307.702793,1508.352242,307.702793,1508.352242,558.436432,1230.807152,558.436432
216120,115308646.png,13,9.999962e-01,512.879333,743.259215,794.095573,743.259215,794.095573,1018.178988,512.879333,1018.178988
...,...,...,...,...,...,...,...,...,...,...,...
326727,134346207.png,5,5.792714e-17,388.875370,1079.285800,426.941860,1079.285800,426.941860,1079.801600,388.875370,1079.801600
326728,134346207.png,21,5.634831e-17,17.268740,0.000000,158.907820,0.000000,158.907820,166.689800,17.268740,166.689800
326729,134346207.png,6,5.623729e-17,1514.514400,31.973350,1543.287700,31.973350,1543.287700,59.762283,1514.514400,59.762283
326730,134346207.png,8,5.455032e-17,388.875370,1079.285800,426.941860,1079.285800,426.941860,1079.801600,388.875370,1079.801600


In [12]:
os.makedirs(os.path.join(SUBMISSION_PATH, 'submit/'), exist_ok=True)
ensembled_results.to_csv(os.path.join(SUBMISSION_PATH, f'submit/WBF_ensemble_{IOU_THRESHOLD}.csv'), index=False)