# Analysing Face Detection on African data

### Importing Libraries

In [61]:
import gc
import os
import sys
import math
import glob
import tqdm
import random
import numpy as np
from tqdm import tqdm
from time import sleep

In [42]:
import pandas as pd
import xml.etree.ElementTree as ET

In [10]:
import cv2
import dlib
from imutils import face_utils
from skimage.feature import hog
from skimage import data,exposure

In [8]:
import matplotlib 
%matplotlib inline
import matplotlib.pyplot as plt

### Defining Paths

#### Path to Data Folder

In [15]:
path_folder_bagamoyo_data = '/media/amogh/Stuff/CMU/datasets/bagamoyo_data/'

#### Path to Frames

In [16]:
path_folder_all_frames = path_folder_bagamoyo_data + '/bagamoyo_frames_all_in_one/'

In [17]:
path_folder_wise_frames = path_folder_bagamoyo_data + '/bagamoyo_frames_folder_wise/'

#### Path to xml files

In [25]:
!ls

data-analysis.ipynb  train_dlib_detector.py  ZFace label analysis.ipynb
README.md	     training.xml


comment: will change

In [28]:
path_xml_file = 'training.xml'

## Things to be calculated

1. Absolute - Find the number of frames in total in which face is detected, and how many in each frame
2. Check continuity - a visualisation for seeing which frames in the continuity.
3. This can be done by writing a single script which does these things if an xml file is generated with the name of the image and the coordinates of the bounding boxes in it.

### Let's take an example of training.xml and try to get some visualisations. 

1. Once the relevant visualisations can be obtained, then just generate an XML from every face detector's output and run the same script.
2. Then you must be able to take these different plots and plot them in one.
3. Or when plotting one, you should be able to take multiple XML and plot them on a single axis.

#### Reading xml file

In [87]:
"VIDEO_06-female-confusion_20180516_091808_1375649000 046.jpg".rsplit(' ',1)

['VIDEO_06-female-confusion_20180516_091808_1375649000', '046.jpg']

In [88]:
def getDataframeFromXML(path_file_xml):
    """
    Returns the dataframe(columns- videoName,frameNo,faceNo,left,right,width,height) from given xml file path holding bounding boxes for each frame.
    
    Parameters
    ----------
    path_file_xml : path of the XML file.
    
    Returns
    -------
    Pandas dataframe
        Information about images and their boxes.
    """
    
    #make a new dataframe to store the data.
    df = pd.DataFrame(columns=['name_image','name_video','num_frame','num_box','left','top','width','height'])
    
    #parsing XML and populating dataframe
    tree = ET.parse(path_file_xml)
    root = tree.getroot()
    for image in tqdm(root.iter('image')):
        name_file = image.attrib['file']
        name_video = name_file.split('.')[0].rsplit(' ',1)[0]
        num_frame = (int)(name_file.split('.')[0].rsplit(' ',1)[1])
        
        #if no box, box attributes are np.nan
        if (len(image) == 0):
            row_data = [name_file, name_video, num_frame, np.nan, np.nan, np.nan, np.nan, np.nan]
            df.loc[len(df)] = row_data
        else:
            for box_num,box in enumerate(image):
                box_attribs = box.attrib
                row_data = [name_file, name_video, num_frame, box_num+1, box_attribs['left'], box_attribs['top'],box_attribs['width'],box_attribs['height']]
                df.loc[len(df)] = row_data
    return df
    

In [89]:
df_zface = getDataframeFromXML(path_xml_file)

44492it [06:46, 109.35it/s]


In [93]:
df_zface

Unnamed: 0,name_image,name_video,num_frame,num_box,left,top,width,height
0,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,1,1,1044,987,412,447
1,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,2,,,,,
2,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,3,,,,,
3,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,4,,,,,
4,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,5,,,,,
5,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,6,,,,,
6,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,7,,,,,
7,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,8,,,,,
8,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,9,,,,,
9,VIDEO_00-male-surprise_20180516_091529_2193933...,VIDEO_00-male-surprise_20180516_091529_219393302,10,,,,,


In [92]:
set(df_zfacedcsds['name_video'])  

{'VIDEO_00-male-surprise_20180516_091529_219393302',
 'VIDEO_00-male-surprise_20180516_091541_1754978908',
 'VIDEO_00-male-surprise_20180516_091551_1272027070',
 'VIDEO_00-male-surprise_20180516_091601_315735604',
 'VIDEO_00-male-surprise_20180516_091612_1350564397',
 'VIDEO_00-male-surprise_20180516_091622_222850491',
 'VIDEO_00-male-surprise_20180516_091859_1464474503',
 'VIDEO_00-male-surprise_20180516_091908_1711675951',
 'VIDEO_00-male-surprise_20180516_103058_1712496391',
 'VIDEO_00-male-surprise_20180516_103111_732090686',
 'VIDEO_00-male-surprise_20180516_103333_484753936',
 'VIDEO_00-male-surprise_20180516_103627_470608290',
 'VIDEO_00-male-surprise_20180516_103859_1577732647',
 'VIDEO_00-male-surprise_20180516_104130_28129801',
 'VIDEO_00-male-surprise_20180516_104436_2035574348',
 'VIDEO_00-male-surprise_20180516_104931_1469741845',
 'VIDEO_00-male-surprise_20180516_105241_358096433',
 'VIDEO_00-male-surprise_20180516_105557_1402434414',
 'VIDEO_00-male-surprise_20180516_105