# Detection and Classification of People With and Without Masks in Images
## Preprocessing

In [1]:
# Import Packages
import os
import re
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as soup
from PIL import Image
import cv2

In [2]:
# Annotation Extraction Function
def Extract(path):
    # Safely open annotation
    with open(path) as annotation:
        # Read annotation with Beautiful Soup
        rawXML = annotation.read()
        bsXML = soup(rawXML,"xml")

        # Stores image metadata in variables
        size = bsXML.find('size')
        index =int(re.search(r'\d+',path).group())
        width = int(size.find('width').get_text())
        height = int(size.find('height').get_text())
        depth = int(size.find('depth').get_text())

        # Stores labels in a list of dictionaries
        faces = []
        objects = bsXML.find_all('object')
        n = len(objects)
        for face in objects:
            labels={}
            labels['index'] = index
            labels['width'] = width
            labels['height'] = height
            labels['depth'] = depth
            labels['nObjects'] = n
            labels['label'] = face.find('name').get_text()
            labels['xmin'] = int(face.find('xmin').get_text())
            labels['xmax'] = int(face.find('xmax').get_text())
            labels['ymin'] = int(face.find('ymin').get_text())
            labels['ymax'] = int(face.find('ymax').get_text())
            faces.append(labels)
        return faces

In [3]:
# Collect Samples
imagePath = "../Data/images/"
imageList = [[int(re.search(r'\d+',f).group()),os.path.join(imagePath,f)] for f in os.listdir(imagePath) if os.path.isfile(os.path.join(imagePath,f))]
images = pd.DataFrame(imageList,columns=['index','path']).set_index('index')

In [4]:
# Collect Labels
annotationPath = "../Data/annotations/"
labelList = [x for f in os.listdir(annotationPath) for x in Extract(os.path.join(annotationPath,f))]
labels = pd.DataFrame(labelList).set_index('index')

In [5]:
# Find and remove images with more than 20 labels or incorrectly worn mask labels
exclude = labels[(labels['label']=='mask_weared_incorrect') | (labels['nObjects']>20)].index.unique()
labels.drop(exclude,inplace=True)
images.drop(exclude,inplace=True)

In [6]:
# Resize images
resizedI=[]
resize_path = "../Data/images400x400/maksssksksss"
for idx, image in images['path'].iteritems():
    p = resize_path+str(idx)+".jpg"
    with Image.open(image) as im:
        imRGB = im.convert('RGB')
        resize = imRGB.resize((400,400))
        resize.save(p)
    resizedI.append([idx,p])
resizedImages = pd.DataFrame(resizedI, columns=['index','path']).set_index('index')

In [7]:
# Resize labels
x_scale = 400/labels['width']
y_scale = 400/labels['height']
resizedLabels = labels.copy()
resizedLabels['xmin'] = x_scale*labels['xmin']
resizedLabels['xmax'] = x_scale*labels['xmax']
resizedLabels['ymin'] = y_scale*labels['ymin']
resizedLabels['ymax'] = y_scale*labels['ymax']
resizedLabels = resizedLabels.astype({'xmin':'int','xmax':'int','ymin':'int','ymax':'int'})


In [8]:
# Extract and resize templates
templates = []
templatesResized = []
for idx, path in images['path'].iteritems():
    with Image.open(path) as im:
        l = labels.loc[[idx]]
        for i in range(len(l)):
            tindex = str(idx)+"_"+str(i)
            imRGB = im.convert('RGB')
            c = imRGB.crop((l.iloc[i]['xmin'],l.iloc[i]['ymin'],l.iloc[i]['xmax'],l.iloc[i]['ymax']))
            resize = c.resize((32,32))
            tempPath = "../Data/templates/maksssksksss"+tindex+".jpg"
            resizePath = "../Data/templates32x32/maksssksksss"+tindex+".jpg"
            c.save(tempPath)
            resize.save(resizePath)
            templates.append([idx,i,l.iloc[i]['label'],tempPath])
            templatesResized.append([idx,i,l.iloc[i]['label'],resizePath])
templates = pd.DataFrame(templates,columns=['index','i','label','path']).set_index('index')
resizedTemplates = pd.DataFrame(templatesResized,columns=['index','i','label','path']).set_index('index')

In [9]:
# Train test split
split = np.random.rand(len(images)) < 0.8
trainImages = images[split]
testImages = images[~split]
trainLabels = labels.drop(testImages.index)
testLabels = labels.drop(trainImages.index)

trainImagesResized = resizedImages[split]
testImagesResized = resizedImages[~split]
trainLabelsResized = resizedLabels.drop(testImages.index)
testLabelsResized = resizedLabels.drop(trainImages.index)

trainTemplates = templates.drop(testImages.index)
testTemplates = templates.drop(trainImages.index)

trainTemplatesResized = resizedTemplates.drop(testImages.index)
testTemplatesResized = resizedTemplates.drop(trainImages.index)

In [10]:
# Write Dataframes to CSV
processedPath = "../Data/processed/"

trainImages.to_csv(processedPath+"trainImages.csv")
testImages.to_csv(processedPath+"testImages.csv")
trainLabels.to_csv(processedPath+"trainLabels.csv")
testLabels.to_csv(processedPath+"testLabels.csv")

trainImagesResized.to_csv(processedPath+"trainImagesResized.csv")
testImagesResized.to_csv(processedPath+"testImagesResized.csv")
trainLabelsResized.to_csv(processedPath+"trainLabelsResized.csv")
testLabelsResized.to_csv(processedPath+"testLabelsResized.csv")

trainTemplates.to_csv(processedPath+"trainTemplates.csv")
testTemplates.to_csv(processedPath+"testTemplates.csv")

trainTemplatesResized.to_csv(processedPath+"trainTemplatesResized.csv")
testTemplatesResized.to_csv(processedPath+"testTemplatesResized.csv")