# Load package

In [None]:
import os, glob, pandas as pd, json, numpy as np
import matplotlib.pyplot as plt

from utils.eval_utils import (
    polygonann2objdetect,
    coordinates_to_masks,
    get_oid_dict_gt,
    make_gt_dict_from_dataframe,
    subm_to_pred_df,
    get_oid_dict_pred,
    get_oid_metrics
)

# Load Ground Truth Data
Description: Make ground truth data in dictionary format

In [None]:
# Load directory configurations
data_dir = './data'
data_dir_eval = os.path.join(data_dir, 'k3_datasets')
submission_dir = os.path.join(data_dir_eval, 'kaggle-3-competition-dataset', 'winning_teams_final_submissions')
data_dir_kaggle = os.path.join(data_dir, 'kaggle')
submission_dir = os.path.join(data_dir_eval, 'kaggle-3-competition-dataset/winning_teams_final_submissions/')

# Load GT dataframe
df_solution = pd.read_csv(os.path.join(data_dir_eval, 'kaggle-3-competition-dataset', 'host_solution_file.csv'))
# convert to valid format
df_gt_obj = make_gt_dict_from_dataframe(df_solution)
# convert to dictionary format
gt_dicts = get_oid_dict_gt(df_gt_obj)
print('GT dictionary created')

In [None]:
# print number of images
print("# of images:", len(gt_dicts.keys()))

In [None]:
# print first 10 image_ids
print("first 10 image_ids: ", list(gt_dicts.keys())[:10])

In [None]:
# see the component (value) of the gt_dicts
sample_id = list(gt_dicts.keys())[0]
print("Components of gt_dicts for each sample: ", gt_dicts[sample_id].keys())

In [None]:
# groundtruth_boxes - has dimension as (n_objects, bbox)
gt_dicts[sample_id]['groundtruth_boxes'].shape

In [None]:
# groundtruth_classes - all objects are blood_vessel
len(gt_dicts[sample_id]['groundtruth_classes']), set(gt_dicts[sample_id]['groundtruth_classes'])

In [None]:
# groundtruth_group_of, groundtruth_image_classes - this is just dummy. there is no hierarchical structure in this dataset

# groundtruth_instance_masks - has dimension as (n_objects, img_height, img_width). This shows the mask of each instances
print(gt_dicts[sample_id]['groundtruth_instance_masks'].shape)
sample_instance_mask = gt_dicts[sample_id]['groundtruth_instance_masks']
background = np.zeros((1, *sample_instance_mask.shape[1:]))
sample_instance_mask_with_background = np.concatenate([background, sample_instance_mask])
plt.imshow(np.argmax(sample_instance_mask_with_background, axis = (0)))

# Calculate mAP


In [None]:
# generate IoU thresholds for COCOmAP - out target IoU is 0.6, which is already there in this
list_iou = np.arange(0.5, 1, 0.05)
metrics_by_iou = {
    threshold: {}
    for threshold in list_iou
}
# Load submission dataframe
list_files_submission = glob.glob(os.path.join(submission_dir, '*.csv'))
# for each submission files, calculate the mAP
list_valid_ids = sorted(df_solution['id'].values)
for file_submission in list_files_submission:
    print(f'Calculating mAPs using {file_submission}')
    # Load submission file
    df_submission = pd.read_csv(file_submission)
    # make sure only loading valid images
    df_submission = df_submission.loc[df_submission['id'].apply(lambda x: x in list_valid_ids)]
    # convert submission to prediction dataframe
    df_pred = subm_to_pred_df(df_submission)
    # convert prediction dataframe to dictionary format
    pred_dicts = get_oid_dict_pred(df_pred)
    print('Prediction dictionary loaded... Calculating mAPs')
    # get performance
    dict_metrics = get_oid_metrics(gt_dicts, pred_dicts, list_iou)
    for threshold, metrics in dict_metrics.items():
        # get team name
        filename = os.path.basename(file_submission)
        metrics_by_iou[threshold][filename] = metrics

# Patch-wise mAP