# Importing libraries

In [1]:
import os
import sys
import math
import glob
import tqdm
import random
import numpy as np
from tqdm import tqdm
from time import sleep
from shutil import copy

In [2]:
import pandas as pd
import xml.etree.cElementTree as ET

In [3]:
import cv2
import dlib
from imutils import face_utils
from skimage.feature import hog
from skimage import data,exposure

In [4]:
path_folder_bagamoyo_data = '/media/amogh/Stuff/CMU/datasets/bagamoyo_data/'

In [5]:
path_folder_subject_videos = path_folder_bagamoyo_data + 'bagamoyo_videos/subject_sorted_videos/'

In [6]:
path_folder_framewise_videos = path_folder_bagamoyo_data + 'bagamoyo_frames_folder_wise/'

In [7]:
list_path_subject_folders = glob.glob(path_folder_subject_videos+'/*/')
list_path_subject_folders

[]

In [8]:
path_folder_data = 'faces/'

Subjects to use for training and testing

In [9]:
list_training_subjects = []
list_testing_subjects = []

### Save data (frames)

In [10]:
def getListSubjectVideos(path_folder_subject_videos, subject_num):
    """
    Returns the path to a particular subject's videos given the path to folder of subject videos and subject number
    """
    path_subject_videos = path_folder_subject_videos + str(subject_num) +'/'
    list_subject_videos = glob.glob(path_subject_videos + '/*')
    return list_subject_videos

In [11]:
def getPathFolderFrames(base_name_video, path_folder_frames_wise):
    """
    Returns the path of the folder with frames given the name of the video.
    Eg- VIDEO_00-male-surprise_20180517_050935_780615161 is the videlo basename
    returns the path <path_to_bagamoyo_frames_folder_wise>/VIDEO_00-male-surprise_20180517_050935_780615161 (5-29-2018 10-05-16 AM)
    """
    list_folders = glob.glob(path_folder_frames_wise + '/*')
    path_frames_folder = ''
    for folder in list_folders:
        if base_name_video in folder:
            path_frames_folder = folder
    return path_frames_folder    

In [12]:
def generateTrainTestData(list_train_test_subjects,path_folder_subject_videos, path_folder_data):
    """
    Copies the images from the train and test folders in the faces folder given the list with number of subjects.
    """
    for subject_num in list_train_test_subjects:
        list_subject_videos = getListSubjectVideos(path_folder_subject_videos,subject_num)
        #get images from the relevant folder and save in faces
        for video in tqdm(list_subject_videos):
            basename_folder_video = os.path.splitext(os.path.basename(video))[0]
            #get the folder with the frames
            path_folder_frames = getPathFolderFrames(basename_folder_video,path_folder_framewise_videos)
            list_path_frames = glob.glob(path_folder_frames + '/*.jpg')
            for path_frame in list_path_frames:
                print(path_frame, path_folder_data)
                copy(path_frame, path_folder_data)

### Generate the training.xml and testing.xml

Aim- 
training subjects
 - get video names of these subjects
    - get frame names under these subjects
    - get boxes under these frames (from data xml(generate for all videos in a folder))
        - put in training boxes.
        - put in test boxes.        

Generate the xml for complete data from csv using the script written earlier.
Given the subject numbers, take those videos, get all frames in them, get all elements in them and make training and test xml before starting the script.

In [13]:
def generateDataXML(list_subject_num,path_data_xml,path_output,subsample=True,num_subsample=2000):
    """
    Given a list of subject numbers, path of the xml with bounding box labels, and the path of the output xml file, subsample is to take a random sample of all the images optionally.
    """
    
    #parsing data and creating dictionary 'image_file_name.jpg : image_node'
    tree = ET.parse(path_data_xml)
    root = tree.getroot()
    dic_file_to_image = {}
    for image in root.iter('image'):
        dic_file_to_image[image.attrib['file']] = image
#     print((dic_file_to_image.keys()))
    
    #Writing an XML
    root = ET.Element("dataset")
    name = ET.SubElement(root, "name").text = "Labelled faces"
    comment = ET.SubElement(root, "comment").text = "These are labelled images from Bagamoyo"
    images = ET.SubElement(root, "images")
    
    #get the list of images
    for subject_num in list_subject_num:
        list_subject_videos = getListSubjectVideos(path_folder_subject_videos,subject_num)
        #get images from the relevant folder and save in faces
        for video in tqdm(list_subject_videos):
            basename_folder_video = os.path.splitext(os.path.basename(video))[0]
            #get the folder with the frames
            path_folder_frames = getPathFolderFrames(basename_folder_video,path_folder_framewise_videos)
            list_path_frames = glob.glob(path_folder_frames + '/*.jpg')
            print("{} frames present".format(len(list_path_frames)))
            i=0
            for path_frame in list_path_frames:
#                 get the frame tag in xml, if present append.
                image_file_name = os.path.basename(path_frame)
                if image_file_name in dic_file_to_image:
                    im_node = dic_file_to_image[image_file_name]
                    images.append(im_node)
                    pass
                else:
                    i+=1
            print("{} frames with no detection".format(i))
    num_images = len(images)
    print(num_images)
    if subsample == True:
        num_subsample = min(num_subsample,num_images)
        index_ignore = random.sample(list(range(num_images)), num_images-num_subsample)
        print(len(index_ignore))
        list_images = list(images)
        for j, image in enumerate(list_images):
    #         print j, len(images)
            if j in index_ignore:
                images.remove(image)
    print(len(images))

    #write the XML file
    tree = ET.ElementTree(root)
    dest_name = path_output
    tree.write(dest_name)
    print("file written at: ", dest_name)

### Train 200, test 300

In [15]:
generateDataXML([1,2,3,4,5,6],'/media/amogh/Stuff/CMU/datasets/bagamoyo_data/openface_outputs/library/openface_199_folders_75_confidence.xml','faces/training.xml',num_subsample=200)

generateDataXML([7,8,9],'/media/amogh/Stuff/CMU/datasets/bagamoyo_data/openface_outputs/library/openface_199_folders_75_confidence.xml','faces/testing.xml',num_subsample=300)

  4%|▍         | 1/24 [00:00<00:03,  5.85it/s]

186 frames present
19 frames with no detection
119 frames present
0 frames with no detection


 21%|██        | 5/24 [00:00<00:02,  8.65it/s]

136 frames present
14 frames with no detection
142 frames present
8 frames with no detection
257 frames present
136 frames with no detection


 29%|██▉       | 7/24 [00:00<00:01,  9.21it/s]

150 frames present
4 frames with no detection
135 frames present
0 frames with no detection


 38%|███▊      | 9/24 [00:01<00:02,  5.91it/s]

100 frames present
99 frames with no detection
420 frames present
315 frames with no detection
134 frames present
4 frames with no detection


 54%|█████▍    | 13/24 [00:01<00:01,  7.26it/s]

143 frames present
2 frames with no detection
140 frames present
3 frames with no detection
172 frames present
7 frames with no detection
233 frames present


 62%|██████▎   | 15/24 [00:01<00:01,  7.62it/s]

11 frames with no detection
149 frames present
19 frames with no detection
131 frames present
1 frames with no detection


 79%|███████▉  | 19/24 [00:02<00:00,  8.41it/s]

161 frames present
2 frames with no detection
129 frames present
0 frames with no detection
248 frames present
131 frames with no detection
105 frames present
3 frames with no detection


 88%|████████▊ | 21/24 [00:02<00:00,  8.68it/s]

141 frames present
1 frames with no detection
155 frames present
1 frames with no detection


100%|██████████| 24/24 [00:02<00:00,  8.81it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

137 frames present
1 frames with no detection
176 frames present
5 frames with no detection


 13%|█▎        | 2/15 [00:00<00:01,  8.06it/s]

145 frames present
15 frames with no detection
699 frames present
531 frames with no detection
130 frames present
2 frames with no detection


 33%|███▎      | 5/15 [00:00<00:00, 12.46it/s]

111 frames present
2 frames with no detection
101 frames present
99 frames with no detection
124 frames present
0 frames with no detection


 60%|██████    | 9/15 [00:00<00:00, 12.44it/s]

42 frames present
42 frames with no detection
118 frames present
0 frames with no detection
85 frames present
85 frames with no detection


 73%|███████▎  | 11/15 [00:00<00:00, 12.56it/s]

127 frames present
3 frames with no detection
110 frames present
2 frames with no detection
239 frames present
122 frames with no detection


100%|██████████| 15/15 [00:01<00:00, 12.96it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

84 frames present
84 frames with no detection
96 frames present
96 frames with no detection
126 frames present
0 frames with no detection


 15%|█▌        | 2/13 [00:00<00:00, 13.01it/s]

195 frames present
12 frames with no detection
132 frames present
5 frames with no detection
138 frames present
10 frames with no detection


 38%|███▊      | 5/13 [00:00<00:00, 12.98it/s]

117 frames present
3 frames with no detection
168 frames present
12 frames with no detection


 62%|██████▏   | 8/13 [00:00<00:00, 11.86it/s]

263 frames present
204 frames with no detection
165 frames present
3 frames with no detection
147 frames present
4 frames with no detection


 77%|███████▋  | 10/13 [00:00<00:00, 10.20it/s]

305 frames present
155 frames with no detection
148 frames present
6 frames with no detection


 92%|█████████▏| 12/13 [00:01<00:00,  9.98it/s]

122 frames present
2 frames with no detection
226 frames present
117 frames with no detection


100%|██████████| 13/13 [00:01<00:00,  9.86it/s]
  0%|          | 0/13 [00:00<?, ?it/s]

123 frames present
9 frames with no detection
124 frames present
0 frames with no detection


 15%|█▌        | 2/13 [00:00<00:00, 12.15it/s]

149 frames present
11 frames with no detection
145 frames present
0 frames with no detection
608 frames present


 38%|███▊      | 5/13 [00:00<00:00, 10.57it/s]

467 frames with no detection
153 frames present
0 frames with no detection
46 frames present
46 frames with no detection


 62%|██████▏   | 8/13 [00:00<00:00, 10.92it/s]

132 frames present
7 frames with no detection
165 frames present
0 frames with no detection
148 frames present
2 frames with no detection


 92%|█████████▏| 12/13 [00:01<00:00, 11.25it/s]

128 frames present
0 frames with no detection
135 frames present
0 frames with no detection
116 frames present
0 frames with no detection


100%|██████████| 13/13 [00:01<00:00, 11.12it/s]
  7%|▋         | 1/14 [00:00<00:01,  8.92it/s]

137 frames present
4 frames with no detection
128 frames present
0 frames with no detection


 21%|██▏       | 3/14 [00:00<00:01,  9.62it/s]

106 frames present
0 frames with no detection
197 frames present
11 frames with no detection
130 frames present
1 frames with no detection


 50%|█████     | 7/14 [00:00<00:00, 11.49it/s]

152 frames present
4 frames with no detection
40 frames present
40 frames with no detection
227 frames present
6 frames with no detection


 71%|███████▏  | 10/14 [00:00<00:00, 10.93it/s]

388 frames present
10 frames with no detection
212 frames present
6 frames with no detection
118 frames present
1 frames with no detection


 86%|████████▌ | 12/14 [00:01<00:00, 10.81it/s]

138 frames present
0 frames with no detection
171 frames present
16 frames with no detection
141 frames present
5 frames with no detection


100%|██████████| 14/14 [00:01<00:00, 11.52it/s]
  7%|▋         | 1/14 [00:00<00:01,  8.88it/s]

130 frames present
14 frames with no detection
164 frames present
0 frames with no detection


 21%|██▏       | 3/14 [00:00<00:01,  8.41it/s]

205 frames present
7 frames with no detection
499 frames present
376 frames with no detection


 43%|████▎     | 6/14 [00:00<00:00,  9.53it/s]

657 frames present
504 frames with no detection
139 frames present
0 frames with no detection
142 frames present
15 frames with no detection


 57%|█████▋    | 8/14 [00:00<00:00,  9.82it/s]

139 frames present
6 frames with no detection
129 frames present
3 frames with no detection
116 frames present
8 frames with no detection


 86%|████████▌ | 12/14 [00:01<00:00, 10.61it/s]

134 frames present
4 frames with no detection
168 frames present
20 frames with no detection
114 frames present
0 frames with no detection


100%|██████████| 14/14 [00:01<00:00, 10.69it/s]


125 frames present
2 frames with no detection
117 frames present
0 frames with no detection
11890
11690
200


  0%|          | 0/11 [00:00<?, ?it/s]

('file written at: ', 'faces/training.xml')


  9%|▉         | 1/11 [00:00<00:01,  5.97it/s]

150 frames present
4 frames with no detection
0 frames present
0 frames with no detection
115 frames present
15 frames with no detection


 36%|███▋      | 4/11 [00:00<00:01,  4.67it/s]

143 frames present
10 frames with no detection


 64%|██████▎   | 7/11 [00:01<00:00,  4.33it/s]

320 frames present
163 frames with no detection
118 frames present
4 frames with no detection
105 frames present
7 frames with no detection


 91%|█████████ | 10/11 [00:01<00:00,  5.40it/s]

425 frames present
327 frames with no detection
86 frames present
86 frames with no detection
181 frames present
1 frames with no detection


100%|██████████| 11/11 [00:01<00:00,  5.59it/s]
  0%|          | 0/24 [00:00<?, ?it/s]

113 frames present
0 frames with no detection
226 frames present
6 frames with no detection


 17%|█▋        | 4/24 [00:00<00:01, 12.64it/s]

146 frames present
0 frames with no detection
140 frames present
0 frames with no detection
95 frames present
95 frames with no detection


 25%|██▌       | 6/24 [00:00<00:01, 12.89it/s]

108 frames present
0 frames with no detection
117 frames present
0 frames with no detection
67 frames present
67 frames with no detection


 42%|████▏     | 10/24 [00:00<00:01, 12.92it/s]

522 frames present
391 frames with no detection
81 frames present
81 frames with no detection
137 frames present
0 frames with no detection


 50%|█████     | 12/24 [00:00<00:00, 13.22it/s]

78 frames present
78 frames with no detection
119 frames present
7 frames with no detection
161 frames present
3 frames with no detection


 71%|███████   | 17/24 [00:01<00:00, 13.15it/s]

216 frames present
111 frames with no detection
109 frames present
2 frames with no detection
77 frames present
77 frames with no detection
116 frames present
0 frames with no detection


 79%|███████▉  | 19/24 [00:01<00:00, 12.83it/s]

72 frames present
72 frames with no detection
115 frames present
10 frames with no detection
80 frames present
80 frames with no detection


 96%|█████████▌| 23/24 [00:01<00:00, 13.42it/s]

149 frames present
4 frames with no detection
81 frames present
81 frames with no detection
151 frames present
5 frames with no detection


100%|██████████| 24/24 [00:01<00:00, 12.86it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

399 frames present
302 frames with no detection
138 frames present
0 frames with no detection


 40%|████      | 4/10 [00:00<00:00, 14.42it/s]

111 frames present
0 frames with no detection
102 frames present
99 frames with no detection
24 frames present
24 frames with no detection
176 frames present
4 frames with no detection


 80%|████████  | 8/10 [00:00<00:00, 15.24it/s]

117 frames present
0 frames with no detection
87 frames present
87 frames with no detection
93 frames present
93 frames with no detection


100%|██████████| 10/10 [00:00<00:00, 14.40it/s]


106 frames present
0 frames with no detection
130 frames present
3 frames with no detection
4003
3703
300
('file written at: ', 'faces/testing.xml')


In [17]:
!python train_dlib_detector.py faces/

Training with C: 5
Training with epsilon: 0.01
Training using 4 threads.
Training with sliding window 80 pixels wide by 80 pixels tall.
Training on both left and right flipped versions of images.
objective:     150.76
objective gap: 150.755
risk:          30.151
risk gap:      30.151
num planes:    3
iter:          1

objective:     44.5491
objective gap: 44.462
risk:          8.89245
risk gap:      8.89241
num planes:    4
iter:          2

objective:     67.3976
objective gap: 67.2146
risk:          13.443
risk gap:      13.4429
num planes:    5
iter:          3

objective:     36.3016
objective gap: 36.0886
risk:          7.2178
risk gap:      7.21772
num planes:    6
iter:          4

objective:     38.8748
objective gap: 38.5198
risk:          7.70409
risk gap:      7.70396
num planes:    7
iter:          5

objective:     12.1702
objective gap: 11.7724
risk:          2.35463
risk gap:      2.35449
num planes:    8
iter:          6

objective:     18.9005
objective gap: 18.4646
ri

objective:     3.08205
objective gap: 0.0662297
risk:          0.351396
risk gap:      0.0132459
num planes:    27
iter:          63

objective:     3.08566
objective gap: 0.068823
risk:          0.351212
risk gap:      0.0137646
num planes:    27
iter:          64

objective:     3.07711
objective gap: 0.0577248
risk:          0.351373
risk gap:      0.011545
num planes:    28
iter:          65

objective:     3.0749
objective gap: 0.0536946
risk:          0.35017
risk gap:      0.0107389
num planes:    28
iter:          66

objective:     3.07315
objective gap: 0.0496515
risk:          0.349139
risk gap:      0.00993029
num planes:    28
iter:          67

objective:     3.07453
objective gap: 0.0492334
risk:          0.349604
risk gap:      0.00984667
num planes:    28
iter:          68

objective:     3.52006
objective gap: 0.493729
risk:          0.438943
risk gap:      0.0987457
num planes:    29
iter:          69

objective:     3.36534
objective gap: 0.325505
risk:          0.3

In [18]:
generateDataXML([1,2,3,7,8,9],'/media/amogh/Stuff/CMU/datasets/bagamoyo_data/openface_outputs/library/openface_199_folders_75_confidence.xml','faces/training.xml',num_subsample=200)

generateDataXML([4,5,6],'/media/amogh/Stuff/CMU/datasets/bagamoyo_data/openface_outputs/library/openface_199_folders_75_confidence.xml','faces/testing.xml',num_subsample=300)

100%|██████████| 24/24 [00:00<00:00, 383.82it/s]
100%|██████████| 15/15 [00:00<00:00, 239.50it/s]
100%|██████████| 13/13 [00:00<00:00, 203.62it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

186 frames present
19 frames with no detection
119 frames present
0 frames with no detection
136 frames present
14 frames with no detection
142 frames present
8 frames with no detection
257 frames present
136 frames with no detection
150 frames present
4 frames with no detection
135 frames present
0 frames with no detection
100 frames present
99 frames with no detection
420 frames present
315 frames with no detection
134 frames present
4 frames with no detection
143 frames present
2 frames with no detection
140 frames present
3 frames with no detection
172 frames present
7 frames with no detection
233 frames present
11 frames with no detection
149 frames present
19 frames with no detection
131 frames present
1 frames with no detection
161 frames present
2 frames with no detection
129 frames present
0 frames with no detection
248 frames present
131 frames with no detection
105 frames present
3 frames with no detection
141 frames present
1 frames with no detection
155 frames present
1 fr

100%|██████████| 11/11 [00:00<00:00, 175.63it/s]
100%|██████████| 24/24 [00:00<00:00, 379.99it/s]
100%|██████████| 10/10 [00:00<00:00, 329.45it/s]


150 frames present
4 frames with no detection
0 frames present
0 frames with no detection
115 frames present
15 frames with no detection
143 frames present
10 frames with no detection
320 frames present
163 frames with no detection
118 frames present
4 frames with no detection
105 frames present
7 frames with no detection
425 frames present
327 frames with no detection
86 frames present
86 frames with no detection
181 frames present
1 frames with no detection
113 frames present
0 frames with no detection
226 frames present
6 frames with no detection
146 frames present
0 frames with no detection
140 frames present
0 frames with no detection
95 frames present
95 frames with no detection
108 frames present
0 frames with no detection
117 frames present
0 frames with no detection
67 frames present
67 frames with no detection
522 frames present
391 frames with no detection
81 frames present
81 frames with no detection
137 frames present
0 frames with no detection
78 frames present
78 frames 

100%|██████████| 13/13 [00:00<00:00, 418.96it/s]
100%|██████████| 14/14 [00:00<00:00, 443.01it/s]
  0%|          | 0/14 [00:00<?, ?it/s]

('file written at: ', 'faces/training.xml')
124 frames present
0 frames with no detection
149 frames present
11 frames with no detection
145 frames present
0 frames with no detection
608 frames present
467 frames with no detection
153 frames present
0 frames with no detection
46 frames present
46 frames with no detection
132 frames present
7 frames with no detection
165 frames present
0 frames with no detection
148 frames present
2 frames with no detection
128 frames present
0 frames with no detection
135 frames present
0 frames with no detection
116 frames present
0 frames with no detection
137 frames present
4 frames with no detection
128 frames present
0 frames with no detection
106 frames present
0 frames with no detection
197 frames present
11 frames with no detection
130 frames present
1 frames with no detection
152 frames present
4 frames with no detection
40 frames present
40 frames with no detection
227 frames present
6 frames with no detection
388 frames present
10 frames wit

100%|██████████| 14/14 [00:00<00:00, 222.89it/s]


5716
5416
300
('file written at: ', 'faces/testing.xml')


In [19]:
!python train_dlib_detector.py faces/

Training with C: 5
Training with epsilon: 0.01
Training using 4 threads.
Training with sliding window 79 pixels wide by 81 pixels tall.
Training on both left and right flipped versions of images.
objective:     152.664
objective gap: 152.659
risk:          30.5317
risk gap:      30.5317
num planes:    3
iter:          1

objective:     45.4223
objective gap: 45.3193
risk:          9.06389
risk gap:      9.06385
num planes:    4
iter:          2

objective:     75.4265
objective gap: 75.2136
risk:          15.0428
risk gap:      15.0427
num planes:    5
iter:          3

objective:     39.521
objective gap: 39.2651
risk:          7.85311
risk gap:      7.85302
num planes:    6
iter:          4

objective:     57.4875
objective gap: 57.0654
risk:          11.4132
risk gap:      11.4131
num planes:    7
iter:          5

objective:     16.5313
objective gap: 16.0529
risk:          3.21077
risk gap:      3.21059
num planes:    8
iter:          6

objective:     22.897
objective gap: 22.367

objective:     2.85611
objective gap: 0.0536315
risk:          0.28846
risk gap:      0.0107263
num planes:    28
iter:          63

objective:     2.85562
objective gap: 0.0517032
risk:          0.289447
risk gap:      0.0103406
num planes:    29
iter:          64

objective:     2.84321
objective gap: 0.0383863
risk:          0.285858
risk gap:      0.00767725
num planes:    30
iter:          65

objective:     2.84619
objective gap: 0.0395715
risk:          0.28773
risk gap:      0.00791429
num planes:    30
iter:          66

objective:     3.93161
objective gap: 1.12396
risk:          0.50516
risk gap:      0.224792
num planes:    30
iter:          67

objective:     3.36415
objective gap: 0.54763
risk:          0.380706
risk gap:      0.109526
num planes:    30
iter:          68

objective:     3.3121
objective gap: 0.468933
risk:          0.361682
risk gap:      0.0937866
num planes:    30
iter:          69

objective:     3.18755
objective gap: 0.337312
risk:          0.327931


In [20]:
generateDataXML([4,5,6,7,8,9],'/media/amogh/Stuff/CMU/datasets/bagamoyo_data/openface_outputs/library/openface_199_folders_75_confidence.xml','faces/training.xml',num_subsample=200)

generateDataXML([1,2,3],'/media/amogh/Stuff/CMU/datasets/bagamoyo_data/openface_outputs/library/openface_199_folders_75_confidence.xml','faces/testing.xml',num_subsample=300)

100%|██████████| 13/13 [00:00<00:00, 421.71it/s]
100%|██████████| 14/14 [00:00<00:00, 224.96it/s]
100%|██████████| 14/14 [00:00<00:00, 223.55it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

124 frames present
0 frames with no detection
149 frames present
11 frames with no detection
145 frames present
0 frames with no detection
608 frames present
467 frames with no detection
153 frames present
0 frames with no detection
46 frames present
46 frames with no detection
132 frames present
7 frames with no detection
165 frames present
0 frames with no detection
148 frames present
2 frames with no detection
128 frames present
0 frames with no detection
135 frames present
0 frames with no detection
116 frames present
0 frames with no detection
137 frames present
4 frames with no detection
128 frames present
0 frames with no detection
106 frames present
0 frames with no detection
197 frames present
11 frames with no detection
130 frames present
1 frames with no detection
152 frames present
4 frames with no detection
40 frames present
40 frames with no detection
227 frames present
6 frames with no detection
388 frames present
10 frames with no detection
212 frames present
6 frames w

100%|██████████| 11/11 [00:00<00:00, 174.11it/s]
100%|██████████| 24/24 [00:00<00:00, 386.32it/s]
100%|██████████| 10/10 [00:00<00:00, 328.51it/s]


226 frames present
6 frames with no detection
146 frames present
0 frames with no detection
140 frames present
0 frames with no detection
95 frames present
95 frames with no detection
108 frames present
0 frames with no detection
117 frames present
0 frames with no detection
67 frames present
67 frames with no detection
522 frames present
391 frames with no detection
81 frames present
81 frames with no detection
137 frames present
0 frames with no detection
78 frames present
78 frames with no detection
119 frames present
7 frames with no detection
161 frames present
3 frames with no detection
216 frames present
111 frames with no detection
109 frames present
2 frames with no detection
77 frames present
77 frames with no detection
116 frames present
0 frames with no detection
72 frames present
72 frames with no detection
115 frames present
10 frames with no detection
80 frames present
80 frames with no detection
149 frames present
4 frames with no detection
81 frames present
81 frames w

100%|██████████| 24/24 [00:00<00:00, 379.28it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

('file written at: ', 'faces/training.xml')
186 frames present
19 frames with no detection
119 frames present
0 frames with no detection
136 frames present
14 frames with no detection
142 frames present
8 frames with no detection
257 frames present
136 frames with no detection
150 frames present
4 frames with no detection
135 frames present
0 frames with no detection
100 frames present
99 frames with no detection
420 frames present
315 frames with no detection
134 frames present
4 frames with no detection
143 frames present
2 frames with no detection
140 frames present
3 frames with no detection
172 frames present
7 frames with no detection
233 frames present
11 frames with no detection
149 frames present
19 frames with no detection
131 frames present
1 frames with no detection
161 frames present
2 frames with no detection
129 frames present
0 frames with no detection
248 frames present
131 frames with no detection
105 frames present
3 frames with no detection
141 frames present
1 fram

100%|██████████| 15/15 [00:00<00:00, 485.20it/s]
100%|██████████| 13/13 [00:00<00:00, 411.59it/s]


126 frames present
0 frames with no detection
195 frames present
12 frames with no detection
132 frames present
5 frames with no detection
138 frames present
10 frames with no detection
117 frames present
3 frames with no detection
168 frames present
12 frames with no detection
263 frames present
204 frames with no detection
165 frames present
3 frames with no detection
147 frames present
4 frames with no detection
305 frames present
155 frames with no detection
148 frames present
6 frames with no detection
122 frames present
2 frames with no detection
226 frames present
117 frames with no detection
123 frames present
9 frames with no detection
6174
5874
300
('file written at: ', 'faces/testing.xml')


In [21]:
!python train_dlib_detector.py faces/

Training with C: 5
Training with epsilon: 0.01
Training using 4 threads.
Training with sliding window 80 pixels wide by 80 pixels tall.
Training on both left and right flipped versions of images.
objective:     149.824
objective gap: 149.819
risk:          29.9639
risk gap:      29.9639
num planes:    3
iter:          1

objective:     44.0061
objective gap: 43.9102
risk:          8.78208
risk gap:      8.78204
num planes:    4
iter:          2

objective:     69.342
objective gap: 69.1551
risk:          13.8311
risk gap:      13.831
num planes:    5
iter:          3

objective:     37.9795
objective gap: 37.7567
risk:          7.55142
risk gap:      7.55133
num planes:    6
iter:          4

objective:     48.3867
objective gap: 48.0253
risk:          9.60518
risk gap:      9.60506
num planes:    7
iter:          5

objective:     14.6084
objective gap: 14.1992
risk:          2.84
risk gap:      2.83985
num planes:    8
iter:          6

objective:     20.2266
objective gap: 19.7683
r

objective:     2.73798
objective gap: 0.0797228
risk:          0.263711
risk gap:      0.0159446
num planes:    30
iter:          63

objective:     2.73707
objective gap: 0.0757351
risk:          0.265392
risk gap:      0.015147
num planes:    30
iter:          64

objective:     2.73619
objective gap: 0.0721747
risk:          0.263978
risk gap:      0.0144349
num planes:    29
iter:          65

objective:     2.73549
objective gap: 0.0682016
risk:          0.261763
risk gap:      0.0136403
num planes:    29
iter:          66

objective:     2.72325
objective gap: 0.0546159
risk:          0.260946
risk gap:      0.0109232
num planes:    28
iter:          67

objective:     2.72166
objective gap: 0.0509474
risk:          0.259692
risk gap:      0.0101895
num planes:    29
iter:          68

objective:     2.72345
objective gap: 0.0503301
risk:          0.260217
risk gap:      0.010066
num planes:    29
iter:          69

objective:     2.72374
objective gap: 0.0491583
risk:          0

Train 500, test 3000

### Testing with default detector

In [29]:
def getPathFrames(path_file_xml):
    tree = ET.parse(path_file_xml)
    root = tree.getroot()
    list_path_frames = []
    for image in tqdm(root.iter('image')):
        name_file = image.attrib['file']
        list_path_frames.append(name_file)
    print(list_path_frames)
    return list_path_frames

In [31]:
def testDefault(list_path_frames,path_folder):
    args={"shape_predictor":"shape_predictor_68_face_landmarks.dat"}
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor(args["shape_predictor"])
    print("Number of images found are:{} ".format(len(list_path_frames)))
    # Writing an XML
    root = ET.Element("dataset")
    name = ET.SubElement(root, "name").text = "Labelled faces"
    comment = ET.SubElement(root, "comment").text = "These are labelled images from Bagamoyo"
    images = ET.SubElement(root, "images")
    for j,path_frame in tqdm(enumerate(list_path_frames)):
    #     print(i,path_frame)
        try:
        #add image to images in XML
            path_frame_new = path_folder + path_frame
            image_node = ET.SubElement(images, "image", file=path_frame_new.split('/')[-1])
        #                 print(path_frame)
    #                 try:
    #         os.path.e
            gray=cv2.imread(path_frame_new)
        #         gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

        #         if(isCNN):
        #             dets = detector(gray,1)
        #             rects = dlib.rectangles()
        #             rects.extend([d.rect for d in dets])
        #         else:
    #         print(gray)
            rects = detector(gray,1)
    #         print(rects)
            for i,rect in enumerate(rects):                    
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(shape)
                (x, y, w, h) = face_utils.rect_to_bb(rect)
#                 print(x,y,w,h)
                #add box to image in XML
    #             print(x,y,w,h)
                box = ET.SubElement(image_node, "box", height=str(h), left=str(x), top=str(y), width=str(w))
            #write XML
    #         print(j%5)           
        except KeyboardInterrupt:
            break
        except:
            continue
    tree = ET.ElementTree(root)
    dest_name = 'out.xml'
    tree.write(dest_name)
    print("xml written to {}".format(dest_name)) 

In [36]:
l = getPathFrames('analysis/testing3.xml')

300it [00:00, 130311.85it/s]

['VIDEO_1526475571181 bored_20180517_050804_1876713609 014.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 021.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 048.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 078.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 105.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 118.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 126.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 141.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 142.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 145.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 156.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 157.jpg', 'VIDEO_1526475571181 bored_20180517_050804_1876713609 166.jpg', 'VIDEO_00-male-surprise_20180517_050935_780615161 037.jpg', 'VIDEO_00-male-surprise_20180517_050935_780615161 059.jpg', 'VIDEO_00-male-surprise_20180517_050935_7806151




In [37]:
testDefault(l,'faces/')

0it [00:00, ?it/s]

Number of images found are:300 


300it [06:21,  1.27s/it]

xml written to out.xml





## Getting statistics

In [58]:
def getStats(path_test_xml, path_out_xml):
    tree1 = ET.parse(path_test_xml)
    root1 = tree1.getroot()
    dic_test = {}
    for image in tqdm(root1.iter('image')):
        name_file = image.attrib['file']
        num_boxes = len(image)
        dic_test[name_file]=num_boxes
    tree2 = ET.parse(path_out_xml)
    root2 = tree2.getroot()
    dic_out = {}
    for image in tqdm(root1.iter('image')):
        name_file = image.attrib['file']
        num_boxes = len(image)
        dic_out[name_file]=num_boxes
    return dic_test, dic_out

In [68]:
a,b = getStats('analysis/testing1.xml','analysis/out1.xml')

300it [00:00, 181049.09it/s]
300it [00:00, 171242.68it/s]


In [69]:
arr1 = b.values()

In [70]:
sum(arr1)/300.0

0.99

In [71]:
c,d = getStats('analysis/testing3.xml','analysis/out3.xml')

300it [00:00, 54913.64it/s]
300it [00:00, 144431.96it/s]


In [72]:
arr1 = b.values()

In [73]:
sum(arr1)/300.0

0.99

In [74]:
e,f = getStats('analysis/testing2.xml','analysis/out2.xml')

300it [00:00, 61829.45it/s]
300it [00:00, 170847.41it/s]


In [75]:
arr1 = b.values()

In [76]:
sum(arr1)/300.0

0.99

In [None]:
dlib.test_simple_object_detector()

In [None]:
Take random sample of 10 frames and manually estimate

To determine epsilon (sensitivity analysis)
Manually perturb the location of face and see when face is being detected.
Cross Environmental. How general is it compared to the baseline.
mDo tracking.

The number of kids necessary to beat the baseline
1. histogram normalisation
2. tracking

accuracy take location into account

### Rest

In [18]:
def try1():
    tree = ET.parse('/home/amogh/cmu/dlib/examples/faces/training.xml')
    root = tree.getroot()
    dic = {}
    for image in root.iter('image'):
        dic[image.attrib['file']] = image
    print(dic)
try1()

{'2008_001322.jpg': <Element 'image' at 0x7ff7b77e4ab0>, '2007_007763.jpg': <Element 'image' at 0x7ff7b77e4e10>, '2008_002079.jpg': <Element 'image' at 0x7ff7b77e4030>, '2008_001009.jpg': <Element 'image' at 0x7ff7b77e4a20>}


### Training