In [1]:
import json
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
def extract_categories(annot_fpath, categories):
    f = open(annot_fpath)
    body = json.load(f)

    # iterate over all categories in the dataset and get
    # detailed info of the those listed in cat_names
    cat_id = []
    cat_subset = []
    for c in body["categories"]:
        if c["name"] in categories:
            cat_id.append(c["id"])
            cat_subset.append(c)

    # iterate over all annotations in the dataset 
    # and get images and annotations for the categories
    # of interest
    annot_subset = []
    img_id_list = []
    for annot in tqdm(body["annotations"]):
        if annot['category_id'] in cat_id:
            annot_subset.append(annot)
            img_id_list.append(annot["image_id"])

    img_id_list = list(np.unique(img_id_list))

    # iterate over all images in the dataset 
    # and get images labeled with categories
    # of interest
    img_subset = []
    for img in body["images"]:
        if img["id"] in img_id_list:
            img_subset.append(img)
    return cat_subset, annot_subset, img_subset

### ppe_v3
https://universe.roboflow.com/factory-2qr1r/ppe_v3

In [3]:
in_fpath1 = "ppe_v3/ppe_v3.v5i.coco/train/_annotations.coco.json"

In [4]:
cat_names1 = [
    'head_helmet',
    'head_nohelmet',
    'person',
    'vest'
]

In [5]:
ppev3_cat, ppev3_annot, ppev3_img = extract_categories(annot_fpath=in_fpath1, categories=cat_names1)

  0%|          | 0/19660 [00:00<?, ?it/s]

### ppe8-0mhax
https://universe.roboflow.com/ppe8/ppe8-0mhax

In [6]:
in_fpath2 = "ppe8-0mhax/ppe8.v1-raw.coco/train/_annotations.coco.json"
in_fpath3 = "ppe8-0mhax/ppe8.v1-raw.coco/valid/_annotations.coco.json"

In [7]:
# cat_names are exacly the same as in the case of ppe_v3 dataset
ppe8_cat, ppe8_annot1, ppe8_img1 = extract_categories(annot_fpath=in_fpath2, categories=cat_names1)
_, ppe8_annot2, ppe8_img2 = extract_categories(annot_fpath=in_fpath3, categories=cat_names1)

  0%|          | 0/869 [00:00<?, ?it/s]

  0%|          | 0/426 [00:00<?, ?it/s]

### personal-protective-equipment-combined-model
https://universe.roboflow.com/roboflow-universe-projects/personal-protective-equipment-combined-model

In [8]:
in_fpath4 = "personal-protective-equipment-combined-model/Personal Protective Equipment - Combined Model.v4-resize640_allclasses_noaugs.coco/train/_annotations.coco.json"
in_fpath5 = "personal-protective-equipment-combined-model/Personal Protective Equipment - Combined Model.v4-resize640_allclasses_noaugs.coco/valid/_annotations.coco.json"
in_fpath6 = "personal-protective-equipment-combined-model/Personal Protective Equipment - Combined Model.v4-resize640_allclasses_noaugs.coco/test/_annotations.coco.json"


In [9]:
cat_names2 = [
    "Hardhat",
    "NO-Hardhat",
    "NO-Safety Vest",
    "Person",
    "Safety Vest"
]

In [10]:
ppecm_cat, ppecm_annot1, ppecm_img1 = extract_categories(annot_fpath=in_fpath4, categories=cat_names2)
_, ppecm_annot2, ppecm_img2 = extract_categories(annot_fpath=in_fpath5, categories=cat_names2)
_, ppecm_annot3, ppecm_img3 = extract_categories(annot_fpath=in_fpath6, categories=cat_names2)

  0%|          | 0/76398 [00:00<?, ?it/s]

  0%|          | 0/22077 [00:00<?, ?it/s]

  0%|          | 0/11086 [00:00<?, ?it/s]

### Construction PPEs Computer Vision Project
https://universe.roboflow.com/aast-ni1jb/construction-ppes

In [11]:
in_fpath7 = "construction-ppes/Construction PPEs.v6i.coco/train/_annotations.coco.json"
in_fpath8 = "construction-ppes/Construction PPEs.v6i.coco/valid/_annotations.coco.json"
in_fpath9 = "construction-ppes/Construction PPEs.v6i.coco/test/_annotations.coco.json"

In [12]:
cat_names3 = [
    "Helmet",
    "Person",
    "Vest",
    "chest",
    "head"
]

In [13]:
cppecvp_cat, cppecvp_annot1, cppecvp_img1 = extract_categories(annot_fpath=in_fpath7, categories=cat_names3)
_, cppecvp_annot2, cppecvp_img2 = extract_categories(annot_fpath=in_fpath8, categories=cat_names3)
_, cppecvp_annot3, cppecvp_img3 = extract_categories(annot_fpath=in_fpath9, categories=cat_names3)

  0%|          | 0/92136 [00:00<?, ?it/s]

  0%|          | 0/25727 [00:00<?, ?it/s]

  0%|          | 0/13647 [00:00<?, ?it/s]

### Safety Vests Computer Vision Project
https://universe.roboflow.com/roboflow-universe-projects/safety-vests

*Covered by personal-protective-equipment-combined-model dataset*

In [14]:
# in_fpath10 = "safety-vests/Safety Vests.v6i.coco/train/_annotations.coco.json"
# in_fpath11 = "safety-vests/Safety Vests.v6i.coco/valid/_annotations.coco.json"
# in_fpath12 = "safety-vests/Safety Vests.v6i.coco/test/_annotations.coco.json"

In [15]:
# cat_names4 = [
#     "NO-Safety Vest",
#     "Safety Vest",
# ]

In [16]:
# svcvp_cat, svcvp_annot1, svcvp_img1 = extract_categories(annot_fpath=in_fpath10, categories=cat_names4)
# _, svcvp_annot2, svcvp_img2 = extract_categories(annot_fpath=in_fpath11, categories=cat_names4)
# _, svcvp_annot3, svcvp_img3 = extract_categories(annot_fpath=in_fpath12, categories=cat_names4)

### Hard Hat Workers
https://public.roboflow.com/object-detection/hard-hat-workers 

In [17]:
hhw_fpath1 = "hard-hat-workers/Hard Hat Workers.v2-raw.coco/train/_annotations.coco.json"
hhw_fpath2 = "hard-hat-workers/Hard Hat Workers.v2-raw.coco/test/_annotations.coco.json"

In [18]:
f1 = open(hhw_fpath1)
f2 = open(hhw_fpath2)

body1 = json.load(f1)
body2 = json.load(f2)

In [19]:
cat_dict = {}

for item in body1["categories"]:
    cat_dict[item["id"]] = item["name"]
    
print(cat_dict)

pd.DataFrame(body1["annotations"] + body2["annotations"])["category_id"].replace(cat_dict).value_counts() 

{0: 'Workers', 1: 'head', 2: 'helmet', 3: 'person'}


helmet    19747
head       6677
person      615
Name: category_id, dtype: int64

In [20]:
# no annotations assigning images to the 'Workers' class

## Summary

In [21]:
# consistent categories
categories = {
    'helmet': 'helmet',
    'Helmet': 'helmet',
    'head_helmet': 'helmet',
    'Hardhat': 'helmet',
    
    'head_nohelmet': 'head_nohelmet',
    'head': 'head_nohelmet',
    'NO-Hardhat': 'head_nohelmet',
    'Head': 'head_nohelmet',

    'vest': 'vest',
    'Vest': 'vest',
    'Safety Vest': 'vest',
    
    'NO-Safety Vest': 'no_vest',
    'chest': 'no_vest',
    
    'Person': 'person',
    'person': 'person',
}

In [22]:
def summary_prep(cat, annot, name, categories=categories):
    cat_dict = {}
    for item in cat:
        cat_dict[item["id"]] = item["name"]

    summ = pd.DataFrame(annot)["category_id"].replace(cat_dict).replace(categories).value_counts()
    return pd.DataFrame(summ).rename(columns={"category_id": name})

In [23]:
# Hard Hat Workers
cat_names5 = ['head', 'helmet', 'person']

hhw_cat, hhw_annot1, hhw_img1 = extract_categories(annot_fpath=hhw_fpath1, categories=cat_names5)
_, hhw_annot2, hhw_img2 = extract_categories(annot_fpath=hhw_fpath2, categories=cat_names5)

hhw_annot = hhw_annot1 + hhw_annot2
hhw_summ = summary_prep(cat=hhw_cat, annot=hhw_annot, name="hhw", categories=categories)

  0%|          | 0/20231 [00:00<?, ?it/s]

  0%|          | 0/6808 [00:00<?, ?it/s]

In [24]:
# ppe_v3
ppev3_summ = summary_prep(cat=ppev3_cat, annot=ppev3_annot, name="ppev3", categories=categories)

# ppe8-0mhax
ppe8_annot = ppe8_annot1 + ppe8_annot2
ppe8_summ = summary_prep(cat=ppe8_cat, annot=ppe8_annot, name="ppe8", categories=categories)

# personal-protective-equipment-combined-model
ppecm_annot = ppecm_annot1 + ppecm_annot2 + ppecm_annot3
ppecm_summ = summary_prep(cat=ppecm_cat, annot=ppecm_annot, name="ppecm", categories=categories)

# construction PPEs computer vision project
cppecvp_annot = cppecvp_annot1 + cppecvp_annot2 + cppecvp_annot3
cppecvp_summ = summary_prep(cat=cppecvp_cat, annot=cppecvp_annot, name="cppecvp", categories=categories)

# # safety vests computer vision project
# svcvp_annot = svcvp_annot1 + svcvp_annot2 + svcvp_annot3
# svcvp_summ = summary_prep(cat=svcvp_cat, annot=svcvp_annot, name="svcvp", categories=categories)

In [25]:
summary = pd.concat([
    hhw_summ,
    ppecm_summ,
    cppecvp_summ,
    ppev3_summ,
    # svcvp_summ,
    ppe8_summ], axis=1).fillna(0)

summary['TOTAL'] = summary.sum(axis=1)

summary

Unnamed: 0,hhw,ppecm,cppecvp,ppev3,ppe8,TOTAL
helmet,19747.0,42428,24440,3567.0,195.0,90377.0
head_nohelmet,6677.0,12965,639,1406.0,103.0,21790.0
person,615.0,1449,33603,5098.0,277.0,41042.0
vest,0.0,6448,21152,1798.0,23.0,29421.0
no_vest,0.0,2022,803,0.0,0.0,2825.0
