In [1]:
from google.colab import drive
drive.mount("/content/drive",force_remount=True)

Mounted at /content/drive


In [2]:
!ln -s "/content/drive/MyDrive/ai" "/projdir"

In [4]:
!unzip -q '/projdir/dataset/hard_hat_detection.zip' -d "data"

In [5]:
import os
import pathlib
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt

In [6]:
len(os.listdir("data/images"))

5000

In [7]:
!pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-0.12.0-py2.py3-none-any.whl (9.2 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.12.0


In [8]:
import xmltodict

In [9]:
object_dict = {
    "helmet":1,
    "head":0
}

In [62]:
def PASCAL_to_YOLO(label):
  lbl = xmltodict.parse(label)
  width = int(lbl['annotation']['size']['width'])
  height = int(lbl['annotation']['size']['height'])
  objects = list(lbl['annotation']['object'])
  yolo_labels = []
  for obj in objects:
    cls = obj['name']
    if cls not in object_dict.keys():
      continue
    obj_class = object_dict[obj['name']]
    xmin = int(obj['bndbox']['xmin'])
    ymin = int(obj['bndbox']['ymin'])
    xmax = int(obj['bndbox']['xmax'])
    ymax = int(obj['bndbox']['ymax'])
    img_wd = (xmax-xmin)
    img_ht = (ymax-ymin)
    img_x = (xmin+xmax)//2
    img_y = (ymin+ymax)//2
    yolo_labels.append(f"{obj_class} {img_x/width} {img_y/height} {img_wd/width} {img_ht/height}\n")
  return yolo_labels

In [14]:
with open("/content/data/annotations/hard_hat_workers0.xml","r") as f:
  lbl = f.read()
  print(PASCAL_to_YOLO(lbl))

['1 0.9134615384615384 0.3485576923076923 0.11298076923076923 0.14182692307692307', '1 0.05048076923076923 0.39663461538461536 0.08413461538461539 0.09134615384615384', '1 0.6346153846153846 0.3798076923076923 0.052884615384615384 0.09134615384615384', '1 0.7475961538461539 0.3918269230769231 0.055288461538461536 0.08653846153846154', '1 0.30528846153846156 0.39663461538461536 0.052884615384615384 0.06971153846153846', '1 0.21634615384615385 0.39663461538461536 0.04807692307692308 0.06971153846153846', '0 0.17307692307692307 0.3798076923076923 0.05048076923076923 0.0673076923076923', '0 0.8004807692307693 0.38221153846153844 0.055288461538461536 0.0889423076923077', '0 0.4423076923076923 0.4110576923076923 0.04567307692307692 0.07211538461538461', '0 0.5552884615384616 0.39903846153846156 0.04326923076923077 0.07451923076923077', '0 0.5 0.38221153846153844 0.038461538461538464 0.06490384615384616', '1 0.25240384615384615 0.3605769230769231 0.03365384615384615 0.04807692307692308', '0 0

In [26]:
annotations_path = pathlib.Path("data/annotations").glob("*.xml")
images_path = pathlib.Path("data/images").glob("*.png")

In [27]:
annotations = list(map(lambda x:str(x),annotations_path))
images = list(map(lambda x:str(x),images_path))

In [29]:
len(annotations),len(images)

(5000, 5000)

In [30]:
annotations.sort()
images.sort()

In [31]:
annotations[:5],images[:5]

(['data/annotations/hard_hat_workers0.xml',
  'data/annotations/hard_hat_workers1.xml',
  'data/annotations/hard_hat_workers10.xml',
  'data/annotations/hard_hat_workers100.xml',
  'data/annotations/hard_hat_workers1000.xml'],
 ['data/images/hard_hat_workers0.png',
  'data/images/hard_hat_workers1.png',
  'data/images/hard_hat_workers10.png',
  'data/images/hard_hat_workers100.png',
  'data/images/hard_hat_workers1000.png'])

In [32]:
import shutil

In [57]:
os.mkdir("obj")

In [58]:
def process_file(image_file,label_file):
  shutil.copy(image_file,"obj")
  file_name = image_file.split("/")[-1].split(".")[0]
  with open(label_file,"r") as f:
    label = f.read()
  with open("obj/"+file_name+".txt","w") as f:
    f.writelines(PASCAL_to_YOLO(label))

In [63]:
for idx,(image_file,label_file) in enumerate(zip(images,annotations)):
  if idx%500 == 0 and idx!=0:
    print(f"{idx+1} files processed Successfully")
  try:
    process_file(image_file,label_file)
  except Exception as e:
    pass

501 files processed Successfully
1001 files processed Successfully
1501 files processed Successfully
2001 files processed Successfully
2501 files processed Successfully
3001 files processed Successfully
3501 files processed Successfully
4001 files processed Successfully
4501 files processed Successfully


In [68]:
len(os.listdir("obj"))

10000

In [None]:
! zip -r "obj.zip" "obj"

In [74]:
! cp "obj.zip" "/projdir/dataset/obj.zip"