In [1]:
import pandas as pd
import numpy as np
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import matplotlib.patches as patches
import numpy as np
import seaborn as sns
import os

In [2]:
data_dir = '/opt/ml/yolo_data'
sub_data_dir = ['test', 'train', 'val']

In [3]:
labels = {0:'Single Helmet',1:'Single No Helmet',2:'Shared Helmet',3:'Shared No Helmet',}
label_colors = {0:'black',1:'darkcyan',2:'sienna',3:'gray',}

In [4]:
annotation=[]
for sub_path in sub_data_dir:
    ann_dir_path = os.path.join(data_dir, sub_path, "temp")
    for (dirpath, dirnames, filenames) in os.walk(ann_dir_path):
        for filename in filenames:
            f = open(os.path.join(dirpath, filename))
            data = json.load(f)
            img_height = data['size']['height']
            img_width = data['size']['width']
            img_area = img_height*img_width
            box_num = len(data['objects'])
            bbox_coordinate = dict()
            bbox_area = dict()
            bbox_center = dict()
            bbox_class = dict()
            bbox_tags = dict()
            bbox_width = dict()
            bbox_height = dict()
            for idx, obj in enumerate(data['objects']):
                tags = {
                    "orientation": None,
                    "helmet": None,
                    "alone": None,
                    "etc": None
                }
                for tag in obj['tags']:
                    tags[tag['name']] = tag['value']
                bbox_tags[idx] = tags
                x_min = obj['points']['exterior'][0][0] / img_width
                y_min = obj['points']['exterior'][0][1] / img_height
                x_max = obj['points']['exterior'][1][0] / img_width
                y_max = obj['points']['exterior'][1][1] / img_height
                bbox_width[idx] = x_max-x_min
                bbox_height[idx] = y_max-y_min
                bbox_coordinate[idx] = x_min,y_min,x_max,y_max
                bbox_area[idx] = bbox_width[idx]*bbox_height[idx]
                bbox_center[idx] = (x_min+x_max)/2,(y_min+y_max)/2
                bbox_class[idx] = int(obj['classTitle'])
            annotation.append([
                os.path.join(dirpath, filename),
                img_height,
                img_width,
                img_area,
                box_num,
                bbox_coordinate,
                bbox_area,
                bbox_center,
                bbox_class,
                bbox_tags,
                bbox_height,
                bbox_width])
                
            

In [5]:
df = pd.DataFrame(annotation, columns = ['Image dir','Image Height', 'Image Width', 'Image Area', 'Box Num', 'Box Coordinate', 'Box Area', 'Box Center', 'Box Class', 'Box Tag', 'Box Height', 'Box Width'])

In [6]:
from collections import Counter
cat_num = Counter()
for index, row in df.iterrows():
    for label in row['Box Class'].values():
        cat_num.update(str(label))


In [7]:
df_cat_num = pd.DataFrame.from_dict(cat_num, orient='index', columns=['Number of box']).sort_index()

In [8]:
df_cat_num

Unnamed: 0,Number of box
0,569
1,727
2,17
3,128


In [9]:
total_number = np.sum(df_cat_num['Number of box'])

In [10]:
total_number

1441

In [11]:
df_cat_num['Number of box'] = df_cat_num['Number of box'].apply(lambda x: total_number/x)
df_cat_num

Unnamed: 0,Number of box
0,2.532513
1,1.982118
2,84.764706
3,11.257812


In [12]:
zeros = np.zeros(4)
for idx, num in enumerate(df_cat_num['Number of box']):
    print(num, idx)
    zeros[idx] = 0.3*num + 0.7/4
print(zeros)
df_cat_num['Smooth weight'] = zeros

2.532513181019332 0
1.9821182943603852 1
84.76470588235294 2
11.2578125 3
[ 0.93475395  0.76963549 25.60441176  3.55234375]


In [13]:
norm = df_cat_num['Smooth weight'].sum()
norm

30.861144957319798

In [14]:
df_cat_num['Smooth weight norm'] = df_cat_num['Smooth weight'].apply(lambda x: x/norm)

In [15]:
df_cat_num

Unnamed: 0,Number of box,Smooth weight,Smooth weight norm
0,2.532513,0.934754,0.030289
1,1.982118,0.769635,0.024939
2,84.764706,25.604412,0.829665
3,11.257812,3.552344,0.115107
