In [6]:
import os
import glob
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
import tensorflow_datasets as tfds

## WIDER Faces Dataset

The most diverse and larget face detection dataset.

Source: http://shuoyang1213.me/WIDERFACE/

The WIDER Faces dataset provides text files for annotations and gets a diverse set of images as jpg's under discrete categories.
**dob**: date of birth (Matlab serial date number)  
**photo_taken**: year when the photo was taken  
**full_path**: path to file  
**gender**: 0 for female and 1 for male, NaN if unknown  
**name**: name of the celebrity  
**face_location**: location of the face (bounding box)  
**face_score**: detector score (the higher the better). Inf implies that no face was found in the image and the face_location then just returns the entire image  
**second_face_score**: detector score of the face with the second highest score. This is useful to ignore images with more than one face. second_face_score is NaN if no second face was detected.  
**celeb_names**: list of all celebrity names  
**celeb_id**: index of celebrity name  

In [2]:
# i like to make data a symlink to an external drive using 'mklink .../data <datadir>'
print(os.getcwd())
print(os.listdir())

C:\Users\nick\Desktop\image\face_detection
['.ipynb_checkpoints', 'create_tfds_WiderFace.ipynb', 'data']


In [4]:
datadir = 'D:\\face_detection\\WIDERFaces'
os.listdir(datadir)

['WIDER_val.zip',
 'WIDER_test.zip',
 'WIDER_train.zip',
 'WIDER_train',
 'WIDER_val',
 'WIDER_test',
 'wider_face_split.zip',
 'wider_face_split']

In [5]:
print('Discrete Categories:\n')
print(os.listdir(os.path.join(datadir, 'WIDER_train', 'images')))

Discrete Categories:

['0--Parade', '1--Handshaking', '10--People_Marching', '11--Meeting', '12--Group', '13--Interview', '14--Traffic', '15--Stock_Market', '16--Award_Ceremony', '17--Ceremony', '18--Concerts', '19--Couple', '2--Demonstration', '20--Family_Group', '21--Festival', '22--Picnic', '23--Shoppers', '24--Soldier_Firing', '25--Soldier_Patrol', '26--Soldier_Drilling', '27--Spa', '28--Sports_Fan', '29--Students_Schoolkids', '3--Riot', '30--Surgeons', '31--Waiter_Waitress', '32--Worker_Laborer', '33--Running', '34--Baseball', '35--Basketball', '36--Football', '37--Soccer', '38--Tennis', '39--Ice_Skating', '4--Dancing', '40--Gymnastics', '41--Swimming', '42--Car_Racing', '43--Row_Boat', '44--Aerobics', '45--Balloonist', '46--Jockey', '47--Matador_Bullfighter', '48--Parachutist_Paratrooper', '49--Greeting', '5--Car_Accident', '50--Celebration_Or_Party', '51--Dresses', '52--Photographers', '53--Raid', '54--Rescue', '55--Sports_Coach_Trainer', '56--Voter', '57--Angler', '58--Hockey',

In [6]:
train_images = [x for x in glob.glob(os.path.join(datadir, 'WIDER_train', '**',  '*.jpg'), recursive=True)]
validation_images = [x for x in glob.glob(os.path.join(datadir, 'WIDER_val', '**',  '*.jpg'), recursive=True)]
test_images = [x for x in glob.glob(os.path.join(datadir, 'WIDER_test', '**',  '*.jpg'), recursive=True)]

print(f'{len(train_images)} training images')
print(f'{len(validation_images)} validation images')
print(f'{len(test_images)} test images')
train_images[:5]

12880 training images
3226 validation images
16097 test images


['D:\\face_detection\\WIDERFaces\\WIDER_train\\images\\0--Parade\\0_Parade_marchingband_1_100.jpg',
 'D:\\face_detection\\WIDERFaces\\WIDER_train\\images\\0--Parade\\0_Parade_marchingband_1_1015.jpg',
 'D:\\face_detection\\WIDERFaces\\WIDER_train\\images\\0--Parade\\0_Parade_marchingband_1_1018.jpg',
 'D:\\face_detection\\WIDERFaces\\WIDER_train\\images\\0--Parade\\0_Parade_marchingband_1_1022.jpg',
 'D:\\face_detection\\WIDERFaces\\WIDER_train\\images\\0--Parade\\0_Parade_marchingband_1_1030.jpg']

In [7]:
#see IDMB Faces example for .mat example
# we'll skip test set since it isnt labeled 
with open(os.path.join(datadir, 'wider_face_split', 'wider_face_train_bbx_gt.txt'), 'r') as f:
    meta_train = f.read()
with open(os.path.join(datadir, 'wider_face_split', 'wider_face_val_bbx_gt.txt'), 'r') as f:
    meta_val = f.read()

In [15]:
meta_train = meta_train.split('\n')
meta_train[:5]

['0--Parade/0_Parade_marchingband_1_849.jpg',
 '1',
 '449 330 122 149 0 0 0 0 0 0 ',
 '0--Parade/0_Parade_Parade_0_904.jpg',
 '1']

In [19]:
#chunkify
split_train=[]
y=[]
for line in meta_train:
    if '.jpg' in line:
        split_train.append(y)
        y=[line]
    else:
        y.append(line)
split_train = split_train[1:]

In [27]:
formatted_ann=[]
for meta in split_train:
    #print(ann)
    out = {'image/filename': meta[0], 'faces': []}
    annotations = meta[2:]
    for ann in annotations:
        ann = ann.split(' ', 12)
        face_annotation = {'bbox': ann[:4]} # (x1, y1, w, h)
        face_annotation['blur'] = ann[4]
        face_annotation['expression'] = ann[5]
        face_annotation['illumination'] = ann[6]
        face_annotation['invalid'] = ann[7]
        face_annotation['occlusion'] = ann[8]
        face_annotation['pose'] = ann[9]
        out['faces'].append(face_annotation)
    formatted_ann.append(out)

## Define the Dataset with `GeneratorBasedBuilder`

Most datasets subclass `tfds.core.GeneratorBasedBuilder`, which is a subclass of `tfds.core.DatasetBuilder` that simplifies defining a dataset. It works well for datasets that can be generated on a single machine. Its subclasses implement:

* `_info`: builds the DatasetInfo object describing the dataset


* `_split_generators`: downloads the source data and defines the dataset splits


* `_generate_examples`: yields (key, example) tuples in the dataset from the source data

In this exercise, you will use the `GeneratorBasedBuilder`.

In [36]:
xmin, ymin, bbox_width, bbox_height = np.array(['1', '2', '.5', '.75'], dtype='float32')
xmin + bbox_height

1.75

In [None]:
449 330 122 149 0 0 0 0 0 0 

In [7]:
ann = "449 330 122 149 488.906 373.643 0.0 542.089 376.442 0.0 515.031 412.83 0.0 485.174 425.893 0.0 538.357 431.491 0.0 0.82"
ann = ann.split()
np.array(ann[4:19], dtype='float32').reshape((5,3))

array([[488.906, 373.643,   0.   ],
       [542.089, 376.442,   0.   ],
       [515.031, 412.83 ,   0.   ],
       [485.174, 425.893,   0.   ],
       [538.357, 431.491,   0.   ]], dtype=float32)

In [None]:
%%writefile tensorflow_datasets/object_detection/wider_face.py

# coding=utf-8
# Copyright 2019 The TensorFlow Datasets Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""IMDB Faces dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import os
import re
import numpy as np

import tensorflow as tf
import tensorflow_datasets as tfds

_DESCRIPTION = """\
Since the publicly available face image datasets are often of small to medium size, rarely exceeding tens of thousands of images, and often without age information we decided to collect a large dataset of celebrities. For this purpose, we took the list of the most popular 100,000 actors as listed on the IMDb website and (automatically) crawled from their profiles date of birth, name, gender and all images related to that person. Additionally we crawled all profile images from pages of people from Wikipedia with the same meta information. We removed the images without timestamp (the date when the photo was taken). Assuming that the images with single faces are likely to show the actor and that the timestamp and date of birth are correct, we were able to assign to each such image the biological (real) age. Of course, we can not vouch for the accuracy of the assigned age information. Besides wrong timestamps, many images are stills from movies - movies that can have extended production times. In total we obtained 460,723 face images from 20,284 celebrities from IMDb and 62,328 from Wikipedia, thus 523,051 in total.

As some of the images (especially from IMDb) contain several people we only use the photos where the second strongest face detection is below a threshold. For the network to be equally discriminative for all ages, we equalize the age distribution for training. For more details please the see the paper.
"""

_PROJECT_URL = 'http://shuoyang1213.me/WIDERFACE/'

# _WIDER_TRAIN_URL = ('https://drive.google.com/uc?export=download&'
#                     'id=0B6eKvaijfFUDQUUwd21EckhUbWs')

# _WIDER_VAL_URL = ('https://drive.google.com/uc?export=download&'
#                   'id=0B6eKvaijfFUDd3dIRmpvSk8tLUk')

# _WIDER_TEST_URL = ('https://drive.google.com/uc?export=download&'
#                    'id=0B6eKvaijfFUDbW4tdGpaYjgzZkU')

# _WIDER_ANNOT_URL = ('https://drive.google.com/uc?export=download&'
#                     'id=1sAl2oml7hK6aZRdgRjqQJsjV5CEr7nl4')

_WIDER_TRAIN_URL = ('https://drive.google.com/file/d/0B6eKvaijfFUDQUUwd21EckhUbWs/view?usp=sharing')
_WIDER_VAL_URL = ('https://drive.google.com/file/d/0B6eKvaijfFUDd3dIRmpvSk8tLUk/view?usp=sharing')
_WIDER_TEST_URL = ('https://drive.google.com/file/d/0B6eKvaijfFUDbW4tdGpaYjgzZkU/view?usp=sharing')
_WIDER_ANNOT_URL = ('http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/bbx_annotation/wider_face_split.zip')


_CITATION = """
@inproceedings{yang2016wider,
	Author = {Yang, Shuo and Luo, Ping and Loy, Chen Change and Tang, Xiaoou},
	Booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
	Title = {WIDER FACE: A Face Detection Benchmark},
	Year = {2016}}
"""

_DESCRIPTION = """
WIDER FACE dataset is a face detection benchmark dataset, of which images are 
selected from the publicly available WIDER dataset. We choose 32,203 images and 
label 393,703 faces with a high degree of variability in scale, pose and 
occlusion as depicted in the sample images. WIDER FACE dataset is organized 
based on 61 event classes. For each event class, we randomly select 40%/10%/50% 
data as training, validation and testing sets. We adopt the same evaluation 
metric employed in the PASCAL VOC dataset. Similar to MALF and Caltech datasets,
we do not release bounding box ground truth for the test images. Users are 
required to submit final prediction files, which we shall proceed to evaluate.
"""


class WiderFace(tfds.core.GeneratorBasedBuilder):
    """WIDER Face dataset."""

    VERSION = tfds.core.Version("0.1.1")

    def _info(self):
        return tfds.core.DatasetInfo(
            builder=self,
            description=_DESCRIPTION,
            # Describe the features of the dataset by following this url
            # https://www.tensorflow.org/datasets/api_docs/python/tfds/features
            features=tfds.features.FeaturesDict({
                'image': tfds.features.Image(encoding_format='jpeg'),
                'image/filename': tfds.features.Text(),
                'faces': tfds.features.Sequence({
                    'bbox': tfds.features.BBoxFeature(),
                    'blur': tf.uint8,
                    'expression': tf.bool,
                    'illumination': tf.bool,
                    'occlusion': tf.uint8,
                    'pose': tf.bool,
                    'invalid': tf.bool
                })
            }), 
            #supervised_keys=("image", "category"),
            homepage=_PROJECT_URL,
            citation=_CITATION)

    def _split_generators(self, dl_manager):
        # Download the dataset and then extract it.
        extracted_dirs = dl_manager.download_and_extract({
            'wider_train': _WIDER_TRAIN_URL,
            'wider_val': _WIDER_VAL_URL,
            #'wider_test': _WIDER_TEST_URL,
            'wider_annot': _WIDER_ANNOT_URL
        })

        # Parsing the mat file which contains the list of train images

        return [
           tfds.core.SplitGenerator(
               name=tfds.Split.TRAIN,
               gen_kwargs={
                   'split': 'train',
                   'extracted_dirs': extracted_dirs
               }),
            tfds.core.SplitGenerator(
                name=tfds.Split.VALIDATION,
                gen_kwargs={
                    'split': 'val',
                    'extracted_dirs': extracted_dirs
                })
            #excluding test data
#             tfds.core.SplitGenerator(
#                 name=tfds.Split.TEST,
#                 gen_kwargs={
#                     'split': 'test',
#                     'extracted_dirs': extracted_dirs
#                 })
        ]

    def _get_bounding_box_values(self, bbox_annotations, img_width, img_height):
        """Function to get normalized bounding box values.

        Args:
          bbox_annotations: list of bbox values in kitti format
          img_width: image width
          img_height: image height

        Returns:
          Normalized bounding box xmin, ymin, xmax, ymax values
        """
        ymax = np.clip(ymin + hbox, a_min=0, a_max=img_height)
        xmax = np.clip(xmin + wbox, a_min=0, a_max=img_width)
        ymin = np.clip(ymin, a_min=0, a_max=img_height)
        xmin = np.clip(xmin, a_min=0, a_max=img_width)

        ymin = bbox_annotations[0] / img_height
        xmin = bbox_annotations[1] / img_width
        ymax = bbox_annotations[2] / img_height
        xmax = bbox_annotations[3] / img_width
        return ymin, xmin, ymax, xmax
  
    def _get_image_shape(self, image_path):
        image = tf.io.read_file(image_path)
        image = tf.image.decode_image(image, channels=3)
        shape = image.shape[:2]
        return shape
    
    def _parse_annotation_file(self, filename):
        with tf.io.gfile.GFile(filename, 'r') as f:
            meta = f.read()
        meta = metea.split('\n')
        #chunkify
        split_train=[]
        y=[]
        for line in meta_train:
            if '.jpg' in line:
                split_train.append(y)
                y=[line]
            else:
                y.append(line)
        split = split[1:]
        
        formatted_annotations=[]
        for meta in split:
            out = {'image/filename': meta[0], 'faces': []}
            annotations = meta[2:]
            for ann in annotations:
                ann = ann.split(' ', 12)
                face_annotation = {'bbox': ann[:4]} # (x1, y1, w, h)
                face_annotation['blur'] = ann[4]
                face_annotation['expression'] = ann[5]
                face_annotation['illumination'] = ann[6]
                face_annotation['invalid'] = ann[7]
                face_annotation['occlusion'] = ann[8]
                face_annotation['pose'] = ann[9]
                out['faces'].append(face_annotation)
            formatted_annotations.append(out)
        return formatted_annotations

    def _generate_examples(self, split, extracted_dir):
        image_dir = os.path.join(extracted_dirs[f'wider_{split}'], f'WIDER_{split}', 'images')
        annotation_dir = os.path.join(extracted_dirs['wider_annot'], 'wider_face_split', f'wider_face_{}_bbx_gt.text')
        
        annotations = self._parse_annotation_file(annotation_dir)
        
        for ann in annotations:
            # this includes a category subdirectory (ex: 0--Parade\0_Parade_marchingband_1_5.jpg)
            img_pth = os.path.join(image_dir, ann['image/filename'])
            img_width, img_height = self._get_image_shape(img_path)
            #noramlize bounding pox points
            ymin, xmin, ymax, xmax = self._get_bounding_box_values(ann['bbox'])
            
            record = {
                'image': img_path,
                'image/filename': ann['image/filename'],
                'faces': {
                    'bbox': tfds.features.BBox(xmin=xmin,
                                               ymin=ymin,
                                               xmax=xmax,
                                               ymax=ymax),
                    'blur': ann['blur'],
                    'expression': ann['expression'],
                    'illumination': ann['illumination'],
                    'invalid': ann['invalid'], 
                    'occlusion': ann['occlusion'],
                    'pose': ann['pose'],
                }
            }
            # Yield a feature dictionary 
            yield filename, record
