In [31]:
import warnings
warnings.filterwarnings(action='ignore')

In [15]:
import os
import glob
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
from pycocotools.coco import COCO
from ensemble_boxes import weighted_boxes_fusion

In [16]:
working_dir = 'synthesis-car-od'
dir_len = len(working_dir)

path = os.getcwd().replace('\\', '/')
index = path.find(working_dir)
working_dir = path[:index + dir_len + 1]
working_dir

'/Users/a16/Desktop/JJ/self_study/projects/synthesis-car-od/'

In [17]:
TEST_PATH = os.path.join(working_dir, 'data/coco/test.json')
SUBMISSION_PATH = os.path.join(working_dir, 'examples/ensemble')

In [18]:
os.listdir(SUBMISSION_PATH)

['requirements.txt',
 'submit',
 'bc_WBF_ensemble.ipynb',
 'WBF_ensemble.ipynb',
 'ensemble.ipynb',
 '9254_swinL_cascade_train_all_epoch_20.csv',
 '9027_swinB_cascade_train_all_epoch_10.csv']

In [19]:
submission_files = glob.glob(os.path.join(SUBMISSION_PATH, '*.csv'))
submission_files

['/Users/a16/Desktop/JJ/self_study/projects/synthesis-car-od/examples/ensemble/9254_swinL_cascade_train_all_epoch_20.csv',
 '/Users/a16/Desktop/JJ/self_study/projects/synthesis-car-od/examples/ensemble/9027_swinB_cascade_train_all_epoch_10.csv']

In [20]:
dfs = [pd.read_csv(file) for file in submission_files]
df_concat = pd.concat([*dfs])

In [21]:
len(df_concat)

680000

In [22]:
annotation = TEST_PATH
coco = COCO(annotation)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [23]:
file_names = list(map(lambda x : os.path.basename(x['file_name']), coco.loadImgs(coco.getImgIds())))
print(len(file_names))
file_names[:5]

3400


['064442001.png',
 '064507368.png',
 '065131036.png',
 '065147868.png',
 '065203472.png']

In [24]:
coco.loadImgs(coco.getImgIds(np.arange(5)))

[{'id': 0,
  'width': 1920,
  'height': 1080,
  'file_name': 'test/064442001.png',
  'date_captured': '2023-05-07 06:16:48'},
 {'id': 1,
  'width': 1920,
  'height': 1080,
  'file_name': 'test/064507368.png',
  'date_captured': '2023-05-07 06:16:48'},
 {'id': 2,
  'width': 1920,
  'height': 1080,
  'file_name': 'test/065131036.png',
  'date_captured': '2023-05-07 06:16:18'},
 {'id': 3,
  'width': 1920,
  'height': 1080,
  'file_name': 'test/065147868.png',
  'date_captured': '2023-05-07 06:16:48'},
 {'id': 4,
  'width': 1920,
  'height': 1080,
  'file_name': 'test/065203472.png',
  'date_captured': '2023-05-07 06:16:30'}]

In [25]:
ensembled_results = pd.read_csv(os.path.join(working_dir, 'data/sample_submission.csv'))
ensembled_results

Unnamed: 0,file_name,class_id,confidence,point1_x,point1_y,point2_x,point2_y,point3_x,point3_y,point4_x,point4_y


In [26]:
df_concat.columns

Index(['file_name', 'class_id', 'confidence', 'point1_x', 'point1_y',
       'point2_x', 'point2_y', 'point3_x', 'point3_y', 'point4_x', 'point4_y'],
      dtype='object')

In [27]:
PASCAL_VOC = ['x_min', 'y_min', 'x_max', 'y_max']

X_COLS = [x for x in df_concat.columns if 'x' in x]
Y_COLS = [y for y in df_concat.columns if 'y' in y]
print(X_COLS)
print(Y_COLS)

['point1_x', 'point2_x', 'point3_x', 'point4_x']
['point1_y', 'point2_y', 'point3_y', 'point4_y']


In [36]:
all_file_names = []
all_bboxes = []
all_scores = []
all_labels = []

for id, file_name in enumerate(tqdm(file_names)):
    bboxes = []
    scores = []
    labels = []

    image_info = coco.loadImgs(id)[0]

    pred_df = df_concat.loc[df_concat['file_name'] == file_name]

    if len(pred_df) == 0:
        continue
    
    # bboxes
    pred_df['x_min'] = pred_df[X_COLS].min(axis=1).tolist()
    pred_df['y_min'] = pred_df[Y_COLS].min(axis=1).tolist()
    pred_df['x_max'] = pred_df[X_COLS].max(axis=1).tolist()
    pred_df['y_max'] = pred_df[Y_COLS].max(axis=1).tolist()

    bboxes.append(pred_df[PASCAL_VOC].values.tolist())

    # scores
    scores.append(pred_df['confidence'].tolist())

    # labels
    labels.append(pred_df['class_id'].tolist())

    if bboxes:
        ens_bboxes, ens_scores, ens_labels = weighted_boxes_fusion(
            bboxes,
            scores,
            labels,
            iou_thr=0.65,
        )

        # file_name : test 파일 이름
        # class_id : 검출한 객체 id
        # confidence : 검출한 객체의 정확도(0~1)
        # point1_x : 검출한 객체 좌상단 x좌표 == x_min
        # point1_y : 검출한 객체 좌상단 y좌표 == y_min
        # point2_x : 검출한 객체 우상단 x좌표 == x_max
        # point2_y : 검출한 객체 우상단 y좌표 == y_min
        # point3_x : 검출한 객체 우하단 x좌표 == x_max
        # point3_y : 검출한 객체 우하단 y좌표 == y_max
        # point4_x : 검출한 객체 좌하단 x좌표 == x_min
        # point4_y : 검출한 객체 좌하단 y좌표 == y_max

        if len(ens_bboxes) != 0:

            all_file_names.extend([file_name] * len(ens_bboxes))
            all_bboxes.extend(ens_bboxes)
            all_labels.extend(ens_labels)
            all_scores.extend(ens_scores)

            # x_mins = list(map(lambda x : x[0], ens_bboxes))
            # y_mins = list(map(lambda x : x[1], ens_bboxes))
            # x_maxes = list(map(lambda x : x[2], ens_bboxes))
            # y_maxs = list(map(lambda x : x[3], ens_bboxes))

            # ensembled_results['file_name'] = file_name
            # ensembled_results['class_id'] = ens_labels
            # ensembled_results['confidence'] = ens_scores
            # ensembled_results['point1_x'] = x_mins
            # ensembled_results['point1_y'] = y_mins
            # ensembled_results['point2_x'] = x_maxes
            # ensembled_results['point2_y'] = y_mins
            # ensembled_results['point3_x'] = x_maxes
            # ensembled_results['point3_y'] = y_maxs
            # ensembled_results['point4_x'] = x_mins
            # ensembled_results['point4_y'] = y_maxs

100%|██████████| 3400/3400 [01:29<00:00, 38.14it/s]


In [40]:
all_scores

[3.3425527e-13,
 5.9515855e-14,
 3.0212642e-14,
 2.766926e-14,
 3.8364093e-13,
 2.744374e-13,
 1.1740088e-13,
 7.349591e-14,
 1.7085207e-14,
 1.0572269e-14,
 8.2510676e-10,
 6.1133737e-10,
 5.2946314e-10,
 8.251914e-11,
 3.080376e-11,
 2.0524582e-11,
 6.331498e-12,
 5.7690523e-12,
 2.8902577e-12,
 2.3870849e-12,
 1.9054747e-12,
 1.8392072e-12,
 1.24546e-12,
 1.1822992e-12,
 8.014605e-13,
 8.0142533e-13,
 4.4370903e-13,
 4.178211596150838e-13,
 4.1386247e-13,
 3.201682e-13,
 1.6737158e-13,
 1.3459591e-13,
 1.06630816e-13,
 9.5722814e-14,
 9.364908e-14,
 8.532638e-14,
 5.3756604e-14,
 1.5878886e-13,
 4.3104358e-14,
 3.4669995e-12,
 6.6862084e-12,
 3.0781658e-12,
 2.309643e-12,
 2.48123e-11,
 1.4227133e-11,
 1.1585013e-11,
 1.3993017e-12,
 1.2070928e-12,
 1.1452266e-12,
 8.8157964e-13,
 8.376676e-13,
 6.1967516e-13,
 4.9793053e-13,
 3.8733705e-13,
 3.4782181e-13,
 2.1129158e-13,
 1.949069e-13,
 1.4402872e-13,
 5.0448012e-08,
 4.3991555e-08,
 3.653642e-08,
 6.4352013e-09,
 5.770568e-09,
 3

In [None]:
os.makedirs(os.path.join(SUBMISSION_PATH, 'submit/'), exist_ok=True)
ensembled_results.to_csv(os.path.join(SUBMISSION_PATH, 'submit/sample_WBF_ensemble.csv'), index=False)