In [68]:
import pandas as pd
import os
import cv2
import numpy as np
from PIL import Image
import xml.etree.ElementTree as ET
from io import BytesIO
import dlib
from xmltodict import parse

In [69]:
path_data3 = "../data/data3/"
path_im3="../data/data3/images/" 

In [70]:
df = pd.read_csv(path_data3+'train.csv')
data3_df = df[(df['classname'] == 'face_with_mask') | (df['classname'] == 'face_no_mask')]
image3_df = data3_df.apply(lambda row: cv2.cvtColor(cv2.imread(os.path.join(path_im3,row['name'])),cv2.COLOR_BGR2RGB), axis=1)
data3_df = data3_df.merge(image3_df.rename('image'), left_index=True, right_index=True)
data3_df.to_csv('data3.csv')

In [71]:
path_im2="../data/data2/images/" 
path_xml2="../data/data2/annotations/" 

In [72]:
def getObjectProperties(filename, objectList):
    objectDict = {}
    for event_type, element in  ET.iterparse(filename, events=('start', 'end')):
        if event_type == 'start' and element.tag == 'filename':
            objectDict['name'] = element.text
        if event_type == 'end' and element.tag == 'name':
            objectDict['classname'] = element.text
        if event_type == 'end' and element.tag == 'xmin':
            objectDict['x1'] = element.text
        if event_type == 'end' and element.tag == 'ymin':
            objectDict['y1'] = element.text
        if event_type == 'end' and element.tag == 'xmax':
            objectDict['x2'] = element.text
        if event_type == 'end' and element.tag == 'ymax':
            objectDict['y2'] = element.text
        if event_type == 'end' and element.tag == 'object':
            objectList.append(dict(objectDict))
    return objectList

In [73]:
objectList = []
for filename in os.listdir(path_xml2):
    with open(path_xml2+filename) as fd:
        getObjectProperties(fd,objectList)

In [74]:
data2_df = pd.DataFrame.from_records(objectList)
image2_df = data2_df.apply(lambda row: cv2.cvtColor(cv2.imread(os.path.join(path_im2,row['name'])),cv2.COLOR_BGR2RGB), axis=1)
data2_df = data2_df.merge(image2_df.rename('image'), left_index=True, right_index=True)

data2_df['classname'] = data2_df['classname'].replace(['with_mask','without_mask'],['face_with_mask','face_no_mask'])
data2_df.to_csv('data2.csv')

In [75]:
path_data1 = "../data/data1/"
path_im1_with="../data/data1/with_mask/"
path_im1_without="../data/data1/without_mask/" 

In [76]:
def getBoundaryBoxPoints(filename, objectList, net, label=None):
    img = cv2.imread(filename)
    h, w = img.shape[:2]
    blob = cv2.dnn.blobFromImage(img, 1.0,(300, 300), (104.0, 117.0, 123.0))
    net.setInput(blob)
    faces = net.forward()
    #to draw faces on image
    rectangle_list = []
    for i in range(faces.shape[2]):
            objectDict = {}
            confidence = faces[0, 0, i, 2]
            if confidence > 0.76:
                box = faces[0, 0, i, 3:7] * np.array([w, h, w, h])
                (x, y, x1, y1) = box.astype("int")
                (startX, startY) = (max(0, x), max(0, y))
                (endX, endY) = (min(w - 1, x1), min(h - 1, y1))
                
                objectDict['name'] = filename.rsplit('/', 1)[-1]
                objectDict['classname'] = label
                objectDict['x1'] = startX
                objectDict['y1'] = startY
                objectDict['x2'] = endX
                objectDict['y2'] = endY
                objectDict['image'] = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
                objectList.append(dict(objectDict))
    

In [77]:
#TODO: change code later 
modelFile = "models/res10_300x300_ssd_iter_140000.caffemodel"
configFile = "models/deploy.prototxt"
net = cv2.dnn.readNetFromCaffe(path_data1+configFile, path_data1+modelFile)
objectList = []

for filename in os.listdir(path_im1_with):
    getBoundaryBoxPoints(path_im1_with+filename, objectList, net, 'face_with_mask')
    
for filename in os.listdir(path_im1_without):
    getBoundaryBoxPoints(path_im1_without+filename, objectList, net, 'face_no_mask')

In [78]:
data1_df = pd.DataFrame.from_records(objectList)
data1_df.to_csv('data1.csv')

In [79]:
#combine all csv
data1 = pd.read_csv("data1.csv",header=0)
data2 = pd.read_csv("data2.csv",header=0)
data3 = pd.read_csv("data3.csv",header=0)
merged_data = pd.concat([data1,data2,data3],ignore_index=True,sort=False)
merged_data.to_csv('all_data.csv')

In [80]:
merged_data.isnull().values.any()

False