## Libraries

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

from google.colab import drive
import os
import shutil
import random

import xml.etree.ElementTree as ET

## Config

In [2]:
data_loc = "/content/drive/My Drive/data_storage/mask_detection/"
output_loc = "/content/drive/MyDrive/projects/Face-Mask-Detection/data"

## Proocess Datasets
### Create folder directories

In [3]:
# Create train, test, validation folders
for folder in ["train", "test", "validation"]:
  os.makedirs(os.path.join(output_loc, folder, "images"), exist_ok=True)
  os.makedirs(os.path.join(output_loc, folder, "labels_xml"), exist_ok=True)
  os.makedirs(os.path.join(output_loc, folder, "labels"), exist_ok=True)

### Get the file names

In [None]:
# List of all images in data_loc
images = [file for file in os.listdir(data_loc) if file.endswith(".png")]
labels = [file for file in os.listdir(data_loc) if file.endswith(".xml")]

# Ensure images and labels match
images.sort()
labels.sort()

print(f"Total images: {len(images)}")
print(f"Total labels: {len(labels)}")

Total images: 853
Total labels: 853


### Shuffle and split the data

In [None]:
# Shuffle and split the data
data = list(zip(images, labels))
random.seed(42)
random.shuffle(data)

# Define the split sizes
train_size = int(0.7 * len(data))
val_size = int(0.2 * len(data))

# Split data
train_data = data[:train_size]
val_data = data[train_size:train_size+val_size]
test_data = data[train_size+val_size:]

print(f"Train size: {len(train_data)}")
print(f"Validation size: {len(val_data)}")
print(f"Test size: {len(test_data)}")

Train size: 597
Validation size: 170
Test size: 86


### Copy data files over the new directory

In [None]:
# Define function to copy the files
def copy_data(data, folder):
  for img, label in data:
    shutil.copy(os.path.join(data_loc, img), os.path.join(output_loc, f'{folder}/images',img))
    shutil.copy(os.path.join(data_loc, label), os.path.join(output_loc, f'{folder}/labels_xml', label))

# Copy the files
copy_data(train_data, "train")
copy_data(val_data, "validation")
copy_data(test_data, "test")

### Convert XML to YOLO format
#### Config

In [33]:
# Define the class mapping
class_mapping = {
    "without_mask": 0,
    "with_mask": 1,
    "mask_weared_incorrect": 2
}

# Paths
annotations_dir = '/content/drive/MyDrive/projects/Face-Mask-Detection/data/train/labels_xml'  # XML files
output_dir = '/content/drive/MyDrive/projects/Face-Mask-Detection/data/train/labels'  # Target TXT files


def convert_to_yolo(xml_file, output_dir):
  # Parse the XML file
  tree = ET.parse(xml_file)
  root = tree.getroot()

  # Get image dimensions
  size = root.find('size')
  img_width = int(size.find('width').text)
  img_height = int(size.find('height').text)

  # Prepare YOLO label file path
  txt_filename = os.path.join(output_dir, os.path.basename(xml_file).replace('.xml', '.txt'))

  # Convert XML to YOLO format
  with open(txt_filename, 'w') as txt_file:
      for obj in root.findall('object'):
          class_name = obj.find('name').text
          if class_name not in class_mapping:
              print(f"Class {class_name} not in mapping. Skipping.")
              continue
          class_id = class_mapping[class_name]

          # Get bounding box coordinates
          bndbox = obj.find('bndbox')
          xmin = int(bndbox.find('xmin').text)
          ymin = int(bndbox.find('ymin').text)
          xmax = int(bndbox.find('xmax').text)
          ymax = int(bndbox.find('ymax').text)

          # Convert to YOLO format
          x_center = ((xmin + xmax) / 2) / img_width
          y_center = ((ymin + ymax) / 2) / img_height
          width = (xmax - xmin) / img_width
          height = (ymax - ymin) / img_height

          # Write to the YOLO format text file
          txt_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

  print(f"Converted {xml_file} to {txt_filename}")


In [36]:
import os
import xml.etree.ElementTree as ET

# Define the class mapping
class_mapping = {
    "without_mask": 0,
    "with_mask": 1,
    "mask_weared_incorrect": 2
}

# Base path for datasets
output_loc = '/content/drive/MyDrive/projects/Face-Mask-Detection/data'

def convert_to_yolo(xml_file, output_dir):
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Parse the XML file
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get image dimensions
    size = root.find('size')
    img_width = int(size.find('width').text)
    img_height = int(size.find('height').text)

    # Prepare YOLO label file path
    txt_filename = os.path.join(output_dir, os.path.basename(xml_file).replace('.xml', '.txt'))

    # Convert XML to YOLO format
    with open(txt_filename, 'w') as txt_file:
        for obj in root.findall('object'):
            class_name = obj.find('name').text
            if class_name not in class_mapping:
                print(f"Class {class_name} not in mapping. Skipping.")
                continue
            class_id = class_mapping[class_name]

            # Get bounding box coordinates
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)

            # Convert to YOLO format
            x_center = ((xmin + xmax) / 2) / img_width
            y_center = ((ymin + ymax) / 2) / img_height
            width = (xmax - xmin) / img_width
            height = (ymax - ymin) / img_height

            # Write to the YOLO format text file
            txt_file.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

#### Process data

In [37]:
# Process all XML files
for folder in ["train", "validation", "test"]:
    annotations_dir = os.path.join(output_loc, f'{folder}/labels_xml')
    output_dir = os.path.join(output_loc, f'{folder}/labels')

    # Process each XML file
    for xml_file in os.listdir(annotations_dir):
        if xml_file.endswith('.xml'):
            convert_to_yolo(os.path.join(annotations_dir, xml_file), output_dir)

    print(f"Conversion completed for {folder}. Labels saved to {output_dir}.")

Conversion completed for train. Labels saved to /content/drive/MyDrive/projects/Face-Mask-Detection/data/train/labels.
Conversion completed for validation. Labels saved to /content/drive/MyDrive/projects/Face-Mask-Detection/data/validation/labels.
