# Detection and Classification of People With and Without Masks in Images
## Preprocessing

In [None]:
# Import Packages
import os
import re
import pandas as pd
from bs4 import BeautifulSoup as soup
from PIL import Image

In [None]:
# Annotation Extraction Function
def Extract(path):
    # Safely open annotation
    with open(path) as annotation:
        # Read annotation with Beautiful Soup
        rawXML = annotation.read()
        bsXML = soup(rawXML,"xml")

        # Stores image metadata in variables
        size = bsXML.find('size')
        index =int(re.search(r'\d+',path).group())
        width = int(size.find('width').get_text())
        height = int(size.find('height').get_text())
        depth = int(size.find('depth').get_text())

        # Stores labels in a list of dictionaries
        faces = []
        objects = bsXML.find_all('object')
        n = len(objects)
        for face in objects:
            labels={}
            labels['index'] = index
            labels['width'] = width
            labels['height'] = height
            labels['depth'] = depth
            labels['nObjects'] = n
            labels['label'] = face.find('name').get_text()
            labels['xmin'] = int(face.find('xmin').get_text())
            labels['xmax'] = int(face.find('xmax').get_text())
            labels['ymin'] = int(face.find('ymin').get_text())
            labels['ymax'] = int(face.find('ymax').get_text())
            faces.append(labels)
        return faces

In [None]:
# Collect Samples
imagePath = "./Data/images/"
imageList = [[int(re.search(r'\d+',f).group()),os.path.join(imagePath,f)] for f in os.listdir(imagePath) if os.path.isfile(os.path.join(imagePath,f))]
images = pd.DataFrame(imageList,columns=['index','path']).set_index('index')

In [None]:
# Collect Labels
annotationPath = "./Data/annotations/"
labelList = [x for f in os.listdir(annotationPath) for x in Extract(os.path.join(annotationPath,f))]
labels = pd.DataFrame(labelList).set_index('index')

In [None]:
# Find and remove images with more than 20 labels or incorrectly worn mask labels
exclude = labels[(labels['label']=='mask_weared_incorrect') | (labels['nObjects']>20)].index.unique()
labels.drop(exclude,inplace=True)
images.drop(exclude,inplace=True)

In [None]:
# Move images to Train & Test Directories
for idx, path in images['path'].iteritems():
    with Image.open(path) as im:
        l = labels.loc[[idx]]
        for i in range(len(l)):
            tindex = str(idx)+"_"+str(i)
            imRGB = im.convert('RGB')
            c = imRGB.crop((l.iloc[i]['xmin'],l.iloc[i]['ymin'],l.iloc[i]['xmax'],l.iloc[i]['ymax']))
            if idx<600:
                t = "train"
            else:
                t = "validation"
            if l.iloc[i]['label']=='with_mask':
                savePath = "./Data/"+t+"/with_mask/"+tindex+".jpg"
            else:
                savePath = "./Data/"+t+"/without_mask/"+tindex+".jpg"
            c.save(savePath)