# Script to detect faces using dlib.

### Importing libraries

In [9]:
import gc
import os
import sys
import math
import glob
import tqdm
import random
import numpy as np
from tqdm import tqdm
from time import sleep

In [10]:
import pandas as pd
import xml.etree.cElementTree as ET

In [11]:
import cv2
import dlib
from imutils import face_utils
from skimage.feature import hog
from skimage import data,exposure

### Defining paths

In [12]:
path_folder_all_frames = '/media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_all_in_one'

In [13]:
path_folder = '/media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise'

## Write an xml file, given the folder of images and path for detector(optional)

### Generating an xml file

#### From folder structure 
    folder_data
        folder_video_0
            video_0_frame_1.jpg
            video_0_frame_2.jpg
            video_0_frame_3.jpg
        folder_video_1
            video_0_frame_1.jpg
            video_0_frame_2.jpg
            video_0_frame_3.jpg
        .
        .
        .

In [4]:
def writeXMLForDetectedFaces(path_folder_of_folders_of_images, detector,predictor, output_file_name = 'output',isCNN=False):
    """
    writes XML after detecting faces given source folder of video folders containing image frames, a landmark predictor and a face detector
    """
    
    #Writing an XML
    root = ET.Element("dataset")
    name = ET.SubElement(root, "name").text = "Labelled faces"
    comment = ET.SubElement(root, "comment").text = "These are labelled images from Bagamoyo"
    images = ET.SubElement(root, "images")
    
    list_folder_images = glob.glob(path_folder_of_folders_of_images + '/*')
    print("Number of folders found in the folder are: ", len(list_folder_images))
    for vid_no,vid_folder in enumerate(list_folder_images):
        list_path_frames = glob.glob(vid_folder+'/*')
        print("Number of images found in the folder number {}, {} are: ".format(vid_no,vid_folder), len(list_path_frames))
        try:
            for path_frame in list_path_frames:
                #add image to images in XML
                image_node = ET.SubElement(images, "image", file=path_frame.split('/')[-1])
#                 print(path_frame)
#             try:
                image=cv2.imread(path_frame)
                gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
                    
                if(isCNN):
                    dets = detector(gray,1)
                    rects = dlib.rectangles()
                    rects.extend([d.rect for d in dets])
                else:
                    rects = detector(gray,1)
                for i,rect in enumerate(rects):                    
                    shape = predictor(gray, rect)
                    shape = face_utils.shape_to_np(shape)
                    (x, y, w, h) = face_utils.rect_to_bb(rect)
                    #add box to image in XML
#                     print(x,y,w,h)
                    box = ET.SubElement(image_node, "box", height=str(h), left=str(x), top=str(y), width=str(w))
                #write XML
            tree = ET.ElementTree(root)
            dest_name = output_file_name+str(vid_no)+'.xml'
            tree.write(dest_name)
            print("xml written to {}".format(dest_name))            
        except KeyboardInterrupt:
            break
        except:
            continue

In [5]:
def saveXMLForHOGDetector(path_folder_of_folders_of_images,output_file_name='output'):
    args={"shape_predictor":"shape_predictor_68_face_landmarks.dat"}
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(args["shape_predictor"])
    writeXMLForDetectedFaces(path_folder_of_folders_of_images=path_folder_of_folders_of_images, detector=detector,predictor=predictor,output_file_name = output_file_name)

In [47]:
def saveXMLForCNNDetector(path_folder_of_folders_of_images,output_file_name='output'):
    args={"shape_predictor":"shape_predictor_68_face_landmarks.dat"}
    detector = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')
    predictor = dlib.shape_predictor(args["shape_predictor"])
    writeXMLForDetectedFaces(path_folder_of_folders_of_images=path_folder_of_folders_of_images, detector=detector,predictor=predictor,output_file_name = output_file_name,isCNN=True)

In [48]:
if __name__ == "__main__":
    saveXMLForHOGDetector(path_folder,'hog_output')

('Number of folders found in the folder are: ', 524)
('Number of images found in the folder number 0, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_00-male-surprise_20180516_091529_219393302 (5-29-2018 10-29-50 AM) are: ', 104)
xml written to hog_output0.xml
('Number of images found in the folder number 1, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_00-male-surprise_20180516_091541_1754978908 (5-27-2018 5-58-50 AM) are: ', 107)
xml written to hog_output1.xml
('Number of images found in the folder number 2, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_00-male-surprise_20180516_091551_1272027070 (5-29-2018 10-32-30 AM) are: ', 134)
xml written to hog_output2.xml
('Number of images found in the folder number 3, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_00-male-surprise_20180516_091601_315735604 (5-27-2018 5-40-04 AM) are: ', 172)
xml written to hog

xml written to hog_output33.xml
('Number of images found in the folder number 34, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_03-female-delight_20180516_104052_1280076545 (5-29-2018 10-05-58 AM) are: ', 300)
xml written to hog_output34.xml
('Number of images found in the folder number 35, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_03-female-delight_20180516_104413_411561700 (5-27-2018 6-00-56 AM) are: ', 127)
xml written to hog_output35.xml
('Number of images found in the folder number 36, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_03-female-delight_20180516_104906_1866004401 (5-27-2018 5-33-22 AM) are: ', 112)
xml written to hog_output36.xml
('Number of images found in the folder number 37, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_03-female-delight_20180516_104912_855688121 (5-27-2018 6-41-56 AM) are: ', 28)
xml written to hog_output37.xm

xml written to hog_output67.xml
('Number of images found in the folder number 68, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_03-female-delight_20180517_111312_437693398 (5-27-2018 5-29-36 AM) are: ', 131)
xml written to hog_output68.xml
('Number of images found in the folder number 69, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_03-female-delight_20180517_111514_642995551 (5-29-2018 10-31-40 AM) are: ', 135)
xml written to hog_output69.xml
('Number of images found in the folder number 70, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_03-female-delight_20180517_111724_1970497648 (5-27-2018 5-58-14 AM) are: ', 145)
xml written to hog_output70.xml
('Number of images found in the folder number 71, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_03-female-delight_20180517_111924_558997555 (5-29-2018 10-05-48 AM) are: ', 148)
xml written to hog_output71.x

xml written to hog_output101.xml
('Number of images found in the folder number 102, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_06-female-confusion_20180517_111249_1387834407 (5-27-2018 7-25-12 AM) are: ', 119)
xml written to hog_output102.xml
('Number of images found in the folder number 103, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_06-female-confusion_20180517_111452_215187933 (5-27-2018 7-26-30 AM) are: ', 97)
xml written to hog_output103.xml
('Number of images found in the folder number 104, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_06-female-confusion_20180517_111701_2052176380 (5-29-2018 10-28-16 AM) are: ', 168)
xml written to hog_output104.xml
('Number of images found in the folder number 105, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_06-female-confusion_20180517_111901_1604592638 (5-27-2018 6-03-18 AM) are: ', 121)
xml written t

xml written to hog_output135.xml
('Number of images found in the folder number 136, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_10-male-confusion_20180517_050147_1758874434 (5-27-2018 5-34-44 AM) are: ', 153)
xml written to hog_output136.xml
('Number of images found in the folder number 137, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_10-male-confusion_20180517_050417_1704274614 (5-27-2018 7-24-42 AM) are: ', 168)
xml written to hog_output137.xml
('Number of images found in the folder number 138, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_10-male-confusion_20180517_050650_461318670 (5-29-2018 10-06-22 AM) are: ', 124)
xml written to hog_output138.xml
('Number of images found in the folder number 139, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_10-male-confusion_20180517_050909_470363800 (5-27-2018 7-25-32 AM) are: ', 134)
xml written to hog_ou

xml written to hog_output167.xml
('Number of images found in the folder number 168, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_1526475571181 bored_20180516_091216_1511931664 (5-27-2018 5-37-28 AM) are: ', 258)
xml written to hog_output168.xml
('Number of images found in the folder number 169, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_1526475571181 bored_20180516_091657_1119402126 (5-27-2018 5-59-02 AM) are: ', 612)
xml written to hog_output169.xml
('Number of images found in the folder number 170, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_1526475571181 bored_20180516_102535_304843369 (5-27-2018 5-29-58 AM) are: ', 225)
xml written to hog_output170.xml
('Number of images found in the folder number 171, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_1526475571181 bored_20180516_102633_735716527 (5-27-2018 6-03-02 AM) are: ', 108)
xml written to

xml written to hog_output201.xml
('Number of images found in the folder number 202, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_1526475571181 bored_20180517_050804_1876713609 (5-27-2018 5-39-16 AM) are: ', 186)
xml written to hog_output202.xml
('Number of images found in the folder number 203, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_1526475571181 bored_20180517_051006_925186803 (5-27-2018 6-26-04 AM) are: ', 140)
xml written to hog_output203.xml
('Number of images found in the folder number 204, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_1526475571181 bored_20180517_111152_1911056236 (5-27-2018 5-45-26 AM) are: ', 321)
xml written to hog_output204.xml
('Number of images found in the folder number 205, /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_frames_folder_wise/VIDEO_1526475571181 bored_20180517_111359_1727095365 (5-29-2018 10-06-28 AM) are: ', 215)
xml written 

In [49]:
saveXMLForCNNDetector(path_folder,'cnn_output')

RuntimeError: Unable to open mmod_human_face_detector.dat for reading.

In [12]:
cnn_face_detector = dlib.cnn_face_detection_model_v1('mmod_human_face_detector.dat')

In [None]:
writeXMLForDetectedFaces()

Writing XML for Histogram normalised

In [14]:
path_histogram_images = '/media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised/'

In [15]:
file_names = [path_histogram_images+os.path.basename(f) for f in glob.glob(path_folder_all_frames+'/*.jpg')]

In [16]:
path_output_xml = "/media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/"

In [17]:
args={"shape_predictor":"shape_predictor_68_face_landmarks.dat"}
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(args["shape_predictor"])

In [None]:
list_path_frames = file_names
print("Number of images found are:{} ".format(len(list_path_frames)))
# Writing an XML
root = ET.Element("dataset")
name = ET.SubElement(root, "name").text = "Labelled faces"
comment = ET.SubElement(root, "comment").text = "These are labelled images from Bagamoyo"
images = ET.SubElement(root, "images")
for j,path_frame in enumerate(list_path_frames):
#     print(i,path_frame)
    if(j > 18233)
        try:
        #add image to images in XML
            image_node = ET.SubElement(images, "image", file=path_frame.split('/')[-1])
        #                 print(path_frame)
    #                 try:
    #         os.path.e
            gray=cv2.imread(path_frame)
        #         gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

        #         if(isCNN):
        #             dets = detector(gray,1)
        #             rects = dlib.rectangles()
        #             rects.extend([d.rect for d in dets])
        #         else:
    #         print(gray)
            rects = detector(gray,1)
    #         print(rects)
            for i,rect in enumerate(rects):                    
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(shape)
                (x, y, w, h) = face_utils.rect_to_bb(rect)
                #add box to image in XML
    #             print(x,y,w,h)
                box = ET.SubElement(image_node, "box", height=str(h), left=str(x), top=str(y), width=str(w))
            #write XML
    #         print(j%5)
            if(j%1000==0):
                tree = ET.ElementTree(root)
                dest_name = path_output_xml+str(j)+'.xml'
    #             print(tree.args)
                tree.write(dest_name)
    #             print(done)
                print("xml written to {}".format(dest_name))            
        except KeyboardInterrupt:
            break
        except:
            continue

Number of images found are:44492 
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/19000.xml
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/20000.xml
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/21000.xml
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/22000.xml
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/23000.xml
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/24000.xml
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/25000.xml
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/26000.xml
xml written to /media/amogh/Stuff/CMU/datasets/bagamoyo_data/bagamoyo_histogram_normalised_output/2700

In [49]:
! ls /media/amogh/Stuff/CMU/datasets/bagamoyo_histogram_normalised_output/

ls: cannot access '/media/amogh/Stuff/CMU/datasets/bagamoyo_histogram_normalised_output/': No such file or directory
