# **PROJECT: YOLOv8 PEE DETECTION**

## **DATA PREPROCESSING**
The images will be renamed and put together in a single folder to send to roboflow.

### **Count Files and Images**

In [1]:
import os

# Count jpg images function
def count_jpg_images(root_dir):
    total_images = 0
    total_files = 0
    print('Images(jpg)\tFiles\tPath')
    print(30*'-')
    for root, dirs, files in os.walk(root_dir):
      dir_path = root.removeprefix(root_dir) if root != root_dir else '.'
      files_count = len(files)
      images_count = len([f for f in files if f.endswith('.jpg')])

      total_images += images_count
      total_files += files_count
      print(f'{images_count:05} images from {files_count:05} files in {dir_path}')

    print(30*'-')
    print(f'{total_images:05} total images from {total_files:05} total files')

**Count Raw Images**

In [2]:
# Get the current dir
current_dir = os.getcwd()

# Count raw images
# Raw data path
data_raw_path = os.path.join(current_dir, '../data/raw/')
raw_images = count_jpg_images(data_raw_path)



Images(jpg)	Files	Path
------------------------------
00000 images from 00002 files in .
00042 images from 00042 files in hijab_dataset
00070 images from 00070 files in harness_unsafe_dataset
00067 images from 00067 files in cap_dataset
00115 images from 00116 files in google_harness
00024 images from 00024 files in try_dataset
00241 images from 00241 files in google_miscellaneous
00077 images from 00077 files in full_body_harness_dataset
00500 images from 00500 files in jyfyfi_dataset
00100 images from 00100 files in ppe3_dataset
00146 images from 00146 files in detect_worker_dataset
00108 images from 00108 files in ppe_v1_1_dataset
00029 images from 00029 files in hairnet_detection_dataset
00543 images from 00543 files in face_detect_dataset
00047 images from 00047 files in harness_belt_dataset
00138 images from 00138 files in vest_dataset
00329 images from 00329 files in k3_dataset
00043 images from 00043 files in goggle_mask_dataset
00090 images from 00090 files in ppe_safety_datas

**Count Interim Images**

In [3]:
# Get the current dir
current_dir = os.getcwd()

# Count interim images
# Interim data path
data_interim_path = os.path.join(current_dir, '../data/interim/')
interim_images = count_jpg_images(data_interim_path)

Images(jpg)	Files	Path
------------------------------
00000 images from 00001 files in .
07968 images from 07968 files in 01_2024_11_21
00000 images from 00000 files in 03_2024_11_21
00000 images from 00000 files in 02_2024_11_21
------------------------------
07968 total images from 07969 total files


**Count Captures Images**

In [71]:
# Get the current dir
current_dir = os.getcwd()

# Count captures images
# Captures data path
data_interim_path = os.path.join(current_dir, '../data/captures/')
interim_images = count_jpg_images(data_interim_path)

Images(jpg)	Files	Path
------------------------------
00000 images from 00000 files in .
00000 images from 00000 files in 2024_11_04
00403 images from 00403 files in 2024_10_31
00000 images from 00000 files in 2024_11_05
------------------------------
00403 total images from 00403 total files


**Count External Images**

In [72]:
# Get the current dir
current_dir = os.getcwd()

# Count external images
# External data path
data_interim_path = os.path.join(current_dir, '../data/external/')
interim_images = count_jpg_images(data_interim_path)

Images(jpg)	Files	Path
------------------------------
00000 images from 00001 files in .
00000 images from 00003 files in helmet
00000 images from 00001 files in helmet/train
02043 images from 02043 files in helmet/train/images
00000 images from 02043 files in helmet/train/labels
00000 images from 00001 files in helmet/valid
00195 images from 00195 files in helmet/valid/images
00000 images from 00195 files in helmet/valid/labels
00000 images from 00000 files in helmet/test
00097 images from 00097 files in helmet/test/images
00000 images from 00097 files in helmet/test/labels
------------------------------
02335 total images from 04676 total files


**Count Processed Images**

In [73]:
# Get the current dir
current_dir = os.getcwd()

# Count processed images
# Processed data path
data_interim_path = os.path.join(current_dir, '../data/processed/')
interim_images = count_jpg_images(data_interim_path)

Images(jpg)	Files	Path
------------------------------
00000 images from 00001 files in .
------------------------------
00000 total images from 00001 total files


## **Images Requiriments**

In [80]:
import cv2

# Verify image dimensions
def verify_dimensions(root_dir):
  # Desired dimensions
  dim_h = 640
  dim_w = 640

  # Counter of total images misshapen
  total_misshapen = 0

  # Loop in all directories
  for root, dirs, files in os.walk(root_dir):
    dir_path = root.removeprefix(root_dir) if root != root_dir else '.'

    # Loop in all files
    for file in files:

      # Process images
      if file.endswith('.jpg'):
        # Read the image
        image = cv2.imread(os.path.join(root, file))

        # Image dimensions
        (h, w) = image.shape[:2]

        # Verify dimensions
        if h != dim_h or w != dim_w:
          total_misshapen += 1
          print(f'{h} x {w} image {file} in {dir_path}')

  print(30*'-')
  print(f'{total_misshapen} images misshapen')

**Verify dimensions in interim data**

In [84]:
# Get the current dir
current_dir = os.getcwd()

# Interim processed images
# Interim data path
data_interim_path = os.path.join(current_dir, '../data/interim/')
interim_images = verify_dimensions(data_interim_path)

350 x 347 image 61PZ6lOsBYL._AC_UY350_.jpg in google_harness
1390 x 866 image securite-des-travaux-en-hauteur-sur-la-construction-protection-du-travailleur-tout-en-travaillant-sur-la-construction-d-un-balcon-ky45jy.jpg in google_harness
408 x 612 image istockphoto-1302299491-612x612.jpg in google_harness
240 x 429 image 240_F_611094510_wZVBknAB7hdg6mH6uBWyaSXic5RvHB0r.jpg in google_harness
360 x 540 image 360_F_874938291_THitIjGG5joSfT15ijDbJSypVnGpxOAf.jpg in google_harness
408 x 612 image istockphoto-1302299541-612x612.jpg in google_harness
1000 x 613 image 61nWTBsiUyL._AC_UF1000,1000_QL80_.jpg in google_harness
240 x 319 image 240_F_643614441_15zQOA25sIUgpYvsBPxcw3B8tuegIX4s.jpg in google_harness
333 x 250 image 009-Seg.-Altura-B.jpg in google_harness
1000 x 996 image 61SF6QfW5lL._AC_UF1000,1000_QL80_.jpg in google_harness
1500 x 1500 image 71vcWqvB2UL.jpg in google_harness
894 x 894 image 71G7TKDVD9S._AC_UF894,1000_QL80_.jpg in google_harness
350 x 425 image 81KVU8y6bPL._AC_UY350_.

In [None]:
# Install
$ pip install roboflow

# Authenticate
$ roboflow authenticate

# Import
$ roboflow import -w deeplearning-cwudo -p yolo_ppe_detection /path/to/data
       