In [23]:
import os
import json
import warnings 
warnings.filterwarnings('ignore')
import cv2
import numpy as np
import pandas as pd
from pycocotools.coco import COCO
# 시각화를 위한 라이브러리
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
plt.rcParams['axes.grid'] = False

In [24]:
%matplotlib inline

dataset_path  = '/opt/ml/segmentation/input/data'
anns_file_path = dataset_path + '/' + 'train_all.json'

# Read annotations
with open(anns_file_path, 'r') as f:
    dataset = json.loads(f.read())

categories = dataset['categories']

anns_full = dataset['annotations']

imgs = dataset['images']
nr_cats = len(categories)+1
nr_annotations = len(anns_full)
nr_images = len(imgs)

# Load categories and super categories
cat_names = []
super_cat_names = ['Background']
super_cat_ids = {}
super_cat_last_name = ''
nr_super_cats = 1
for cat_it in categories:
    cat_names.append(cat_it['name'])
    super_cat_name = cat_it['supercategory']
    # Adding new supercat
    if super_cat_name != super_cat_last_name:
        super_cat_names.append(super_cat_name)
        super_cat_ids[super_cat_name] = nr_super_cats
        super_cat_last_name = super_cat_name
        nr_super_cats += 1

print('Number of super categories:', nr_super_cats)
print('Number of categories:', nr_cats)
print('Number of annotations:', nr_annotations)
print('Number of images:', nr_images)

Number of super categories: 11
Number of categories: 11
Number of annotations: 26240
Number of images: 3272


In [25]:
# Count annotations
cat_histogram_area = np.zeros(nr_cats,dtype=int)
cat_histogram_num = np.zeros(nr_cats, dtype=int)

for ann in anns_full:
    cat_histogram_num[ann['category_id']] += 1
    cat_histogram_area[ann['category_id']] += ann['area']
# import pdb;pdb.set_trace()
# Convert to DataFrame
cat_histogram_area[0] = 512*512*3272-np.sum(cat_histogram_area)
cat_histogram_num[0] = 3272
df = pd.DataFrame({'Categories': super_cat_names, 'Area': cat_histogram_area, 'Number of annotations': cat_histogram_num})

In [26]:
total_area = np.sum(cat_histogram_area)

In [27]:
df['Area'] = df['Area'].apply(lambda x: total_area/x)
df['Number of annotations'] = df['Number of annotations'].apply(lambda x: np.sum(cat_histogram_num)/x)
df

Unnamed: 0,Categories,Area,Number of annotations
0,Background,1.442613,9.01956
1,General trash,44.250517,10.608196
2,Paper,10.824152,3.169584
3,Paper pack,135.594632,44.783005
4,Metal,109.535488,52.512456
5,Glass,125.836443,48.380328
6,Plastic,34.887742,9.550809
7,Styrofoam,65.938979,21.974684
8,Plastic bag,8.482837,3.861311
9,Battery,2262.371497,468.444444


In [28]:
df['Area'] = df['Area'].apply(lambda x: x/df['Area'].sum())
df['Number of annotations'] = df['Number of annotations'].apply(lambda x: x/df['Number of annotations'].sum())
df


Unnamed: 0,Categories,Area,Number of annotations
0,Background,0.000482,0.01075
1,General trash,0.014796,0.012643
2,Paper,0.003619,0.003778
3,Paper pack,0.045339,0.053374
4,Metal,0.036626,0.062586
5,Glass,0.042076,0.057662
6,Plastic,0.011665,0.011383
7,Styrofoam,0.022048,0.02619
8,Plastic bag,0.002836,0.004602
9,Battery,0.756474,0.558311


In [29]:
zeros = np.zeros(nr_cats)
for idx, area in enumerate(df['Area']):
    zeros[idx] = 0.3*area + 0.7/11
df['Smooth weight Area'] = pd.Series(zeros)
for idx, num in enumerate(df['Number of annotations']):
    zeros[idx] = 0.3*num + 0.7/11
df['Smooth weight Num'] = pd.Series(zeros)

In [30]:
df

Unnamed: 0,Categories,Area,Number of annotations,Smooth weight Area,Smooth weight Num
0,Background,0.000482,0.01075,0.063781,0.066861
1,General trash,0.014796,0.012643,0.068075,0.067429
2,Paper,0.003619,0.003778,0.064722,0.06477
3,Paper pack,0.045339,0.053374,0.077238,0.079649
4,Metal,0.036626,0.062586,0.074624,0.082412
5,Glass,0.042076,0.057662,0.076259,0.080935
6,Plastic,0.011665,0.011383,0.067136,0.067051
7,Styrofoam,0.022048,0.02619,0.070251,0.071493
8,Plastic bag,0.002836,0.004602,0.064487,0.065017
9,Battery,0.756474,0.558311,0.290578,0.23113
