In [None]:
import json
from datasetinsights.stats.calculation import (
    convert_coco_annotations_to_df,
    get_bbox_heatmap,
    get_bbox_per_img_dict,
    get_bbox_relative_size_list,
    get_visible_keypoints_dict,
)
from datasetinsights.stats.visualization.constants import (
    COCO_KEYPOINTS,
    COCO_SKELETON,
)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from matplotlib import collections as mc
from matplotlib import cm, colors
from seaborn.distributions import _DistributionPlotter as db

In [None]:
sns.set_theme(style="ticks", font_scale=1.25, color_codes=True, palette='colorblind', font='sans-serif', context='paper', rc={"lines.linewidth": 1.5})

cmap_ = plt.get_cmap("twilight_shifted_r")
colors = cmap_(np.linspace(0, 1, 20))
COLORS = [colors[5], colors[15]]
cmap_ = plt.get_cmap("BuPu")
colors = cmap_(np.linspace(0, 1, 20))
COLORS = COLORS + [colors[15], colors[5]]

In [None]:
json_file_a = ""
coco_a = convert_coco_annotations_to_df(json_file_a)
json_file_b = ""
coco_b = convert_coco_annotations_to_df(json_file_b)
coco_df_list = [coco_a, coco_b]

In [None]:
coco_b.head()

In [None]:
def plot_bbox_heatmap(bbox_heatmap):
    bbox_heatmap_norm = bbox_heatmap / bbox_heatmap.max()
    fig, ax = plt.subplots(dpi=100, figsize=(8,8))
    pcm = ax.imshow(bbox_heatmap_norm[:,:,0], cmap="plasma", )

    plt.show()

In [None]:
bbox_heatmap = get_bbox_heatmap(coco_a)
plot_bbox_heatmap(bbox_heatmap)

In [None]:
def process_bbox_size(coco_data_list):
    data_hist_list = []
    for coco_data in coco_data_list:
        weight = [1/len(coco_data)]*len(coco_data)
        data_hist = plt.hist(coco_data, weights=weight, bins=50, edgecolor = 'black', range=(0,0.05))
        data_hist_list.append(data_hist)
    plt.close()
    return data_hist_list

def compare_bbox_size_distribution(coco_data_list):
    data_hist_list = process_bbox_size(coco_data_list)
    fig, ax = plt.subplots(1, 1, dpi=100,)
    fig.set_size_inches(16, 9)
    for i, data_hist in enumerate(data_hist_list):
        ax.plot(data_hist[1][1:],data_hist[0],  marker='o', linestyle='dashed',label=i, color=COLORS[i], linewidth=4, markeredgewidth=4)

    # where some data has already been plotted to ax
    handles, labels = ax.get_legend_handles_labels()

    # manually define a new patch
    for i, coco_data in enumerate(coco_data_list):
         handles.append(mpatches.Patch(color=COLORS[i], label=i))


    plt.legend(handles=handles, fontsize=32)

    ax.set_xlabel("Bouding Box relative size in an image", fontsize=32)
    ax.set_ylabel("Bouding Box Probability", fontsize=32)
    ax.yaxis.set_tick_params(labelsize=32)
    ax.xaxis.set_tick_params(labelsize=32)
    ax.legend(fontsize=32)


    fig.tight_layout()

    plt.show()

In [None]:
coco_data_list = [get_bbox_relative_size_list(df) for df in coco_df_list]
compare_bbox_size_distribution(coco_data_list)

In [None]:
def compare_keypoint_probability_in_bbox(coco_data_list):
    kpt_dict_keys = []
    for key in coco_data_list[0].keys():
        key_list = key.split("_")
        key_list = [x.capitalize() for x in key_list]
        kpt_dict_keys.append(" ".join(key_list))

    labels = kpt_dict_keys

    x = 2*np.arange(len(labels))  # the label locations
    width = 0.35  # the width of the bars

    fig, ax = plt.subplots(dpi=100)
    fig.set_size_inches(16, 9)
    for i, coco_data in enumerate(coco_data_list):
        ax.bar(x + (i-len(coco_data_list)//2)*width, coco_data.values(), width, label=i, color=COLORS[i])

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('Probability of a keypoint in a bounding box', fontsize=24)
    ax.set_xticks(x)
    ax.set_xticklabels(labels, fontsize=24, fontweight="bold")
    ax.yaxis.set_tick_params(labelsize=24)


    #legend_properties = {'weight':'bold'}
    #ax.legend(fontsize=15)
    plt.legend(fontsize=15, bbox_to_anchor=(0.20, 1.15),)
    plt.xticks(rotation=60)

    fig.tight_layout()
    plt.show()

In [None]:
coco_data_list = [get_visible_keypoints_dict(df.keypoints.values.tolist()) for df in coco_df_list]
compare_keypoint_probability_in_bbox(coco_data_list)

In [None]:
def compare_bbox_num_distribution(coco_data_list):
    bbox_num_df = pd.DataFrame([coco_data for coco_data in coco_data_list])
    bbox_num_df = bbox_num_df.reindex(sorted(bbox_num_df.columns), axis=1)

    labels = bbox_num_df.columns.values.tolist()

    x = 2*np.arange(len(labels))  # the label locations
    width = 0.35  # the width of the bars

    fig, ax = plt.subplots(dpi=100)
    fig.set_size_inches(18, 10)
    for i, coco_data in enumerate(coco_data_list):
        ax.bar(x + (i-len(coco_data_list)//2)*width, bbox_num_df.iloc[i].values.tolist(), width, label=i, color=COLORS[i])

    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_xlabel('Number of bouding box in a single image', fontsize=32)
    ax.set_ylabel('Probability of images in dataset', fontsize=32)
    ax.yaxis.set_tick_params(labelsize=32)
    ax.set_xticks(x)
    ax.set_xticklabels(labels, fontsize=32)
    ax.legend(fontsize=32)

    fig.tight_layout()
    plt.show()
    

In [None]:
coco_data_list = [get_bbox_per_img_dict(df) for df in coco_df_list]
compare_bbox_num_distribution(coco_data_list)