In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

!pip install -q tensorflow-gpu==2.0.0-rc1
import tensorflow as tf

from tensorflow.keras import datasets, layers, models

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Dataset Preprocessing

Get face label image from 'Wider Face Dataset'

Get other label image from VOC2007 dataset

In [0]:
# using wider face dataset
# http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html
import cv2

annotation_path = '/content/drive/My Drive/Colab Notebooks/wider_face_train_bbx_gt.txt'
annotation = []
with open(annotation_path) as f:
    for line in f:
        annotation.append(line.rstrip())

def crop_face(index, bbox):
  image_path = '/content/drive/My Drive/Colab Notebooks/images/' + annotation[index] 
  img = cv2.imread(image_path, cv2.IMREAD_COLOR)
  crop = img[bbox[1]:bbox[3], bbox[0]:bbox[2], :]
  crop = cv2.resize(crop, dsize=(112, 112), interpolation=cv2.INTER_LINEAR)
  return crop

num_image = 0
i = 0
max_num = 10000

face_image = np.zeros((max_num, 112, 112, 3), dtype=np.uint8)
while True:
  image_path = annotation[i]
  num_face = int(annotation[i+1])
  for j in range(num_face):
    bbox = annotation[i + 2 + j].split()
    [sx, sy, ex, ey] = [0, 0, 0, 0]
    
    # heavy blur
    if int(bbox[4]) == 2 or int(bbox[4]) == 1:
      [sx, sy, ex, ey] = [-1, -1, -1, -1]
    # extreme illumination
    elif int(bbox[6]) == 1:
      [sx, sy, ex, ey] = [-1, -1, -1, -1]
    # invalid image 
    elif int(bbox[6]) == 1:
      [sx, sy, ex, ey] = [-1, -1, -1, -1]
    # hard occlusion
    elif int(bbox[8]) == 2:
      [sx, sy, ex, ey] = [-1, -1, -1, -1]
    # atypical pose
    elif int(bbox[9]) == 1:
      [sx, sy, ex, ey] = [-1, -1, -1, -1]
    # appropriate face    
    else:
      sx = int(bbox[0])
      sy = int(bbox[1])
      ex = sx + int(bbox[2])
      ey = sy + int(bbox[3])
      if int(bbox[3]) > 60:
        face_image[num_image] = crop_face(i, [sx, sy, ex, ey])
        if num_image == max_num - 1:
          break
        else:
          num_image = num_image + 1
  if num_image == max_num - 1:
    break
  if num_face == 0:
    j = 0
  i = (i + 2 + j) + 1

print("[Extracted " + str(len(face_image))+" face images from wider face dataset]")

[Extracted 10000 face images from wider face dataset]


In [0]:
np.save('/content/drive/My Drive/Colab Notebooks/wide_face_image_save', face_image)

In [0]:
face_image = np.load('/content/drive/My Drive/Colab Notebooks/wide_face_image_save.npy')

In [0]:
# load Voc2007 dataset
import tensorflow_datasets as tfds
voc_builder = tfds.builder('voc')
voc_builder.download_and_prepare()
voc_ds = voc_builder.as_dataset(split='test')

In [0]:
# get object image from voc2007
not_face_image = []
i = 0
for example in tfds.as_numpy(voc_ds):
  image, objects = example['image'], example['objects']
  num_objects = len(objects['label'])
  for j in range(num_objects):
    # find objects not labeled as face (label 'face'=14)
    if not objects['label'][j] == 14:
      sx = int(objects['bbox'][j][1] * image.shape[1])
      sy = int(objects['bbox'][j][0] * image.shape[0])
      ex = int(objects['bbox'][j][3] * image.shape[1])
      ey = int(objects['bbox'][j][2] * image.shape[0])
      crop = image[sy:ey, sx:ex, :]
      crop = cv2.resize(crop, dsize=(112, 112), interpolation=cv2.INTER_LINEAR)
      crop =  cv2.cvtColor(crop, cv2.COLOR_RGB2BGR)
      not_face_image.append(crop)
      i = i + 1
  
print("[Extracted " + str(len(not_face_image)) + " object images from VOC2007 Dataset]")

[Extracted 9749 object images from VOC2007 Dataset]


In [0]:
# convert list to numpy array
not_face_image = np.asarray(not_face_image)
np.save('/content/drive/My Drive/Colab Notebooks/not_face_image_save', not_face_image)

In [0]:
not_face_image = np.load('/content/drive/My Drive/Colab Notebooks/not_face_image_save.npy')

In [0]:
# final training data

# make label (1: face, 0: other)
face_label = np.ones((face_image.shape[0], 1), dtype=int)
not_face_label = np.zeros((not_face_image.shape[0], 1), dtype=int)

# concatenate two data
final_train_image = np.vstack((face_image, not_face_image))
final_train_label = np.vstack((face_label, not_face_label))

# shuffle dataset
dataset = [[x, y] for x, y in zip(final_train_image, final_train_label)]
random.shuffle(dataset)

x_train = [n[0] for n in dataset]
y_train = [n[1] for n in dataset]

x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)

x_train = x_train / 255.0

Training Model:
VGG16 with Batch Normalization

In [0]:
# VGG16 architecture
model = models.Sequential()

# conv1 (112x112 -> 112x112)
model.add(layers.BatchNormalization(input_shape=(112, 112, 3)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv1_1'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv1_2'))

# conv2 (112x112 -> 56x56)
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv2_1'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv2_2'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2'))

# conv3 (56x56 -> 28x28)
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv3_1'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv3_2'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv3_3'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3'))

# conv4 (28x28 -> 14x14)
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv4_1'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv4_2'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv4_3'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4'))

# conv5 (14x14 -> 7x7)
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv5_1'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv5_2'))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer='l2', name='conv5_3'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool5'))

# fc1 (512x7x7 -> 4096)
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(4096, (7, 7), activation='relu', kernel_initializer='he_normal', kernel_regularizer='l2', name='fc1'))
model.add(layers.Flatten())
model.add(layers.Dropout(rate=0.2))

# fc2 (4096 -> 4096)
model.add(layers.BatchNormalization())
model.add(layers.Dense(4096, activation='relu', name='fc2'))
model.add(layers.Dropout(rate=0.2))

#fc3 (4096 -> 2)
model.add(layers.BatchNormalization())
model.add(layers.Dense(2, activation='softmax', name='fc3'))
model.summary()


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_16 (Batc (None, 112, 112, 3)       12        
_________________________________________________________________
conv1_1 (Conv2D)             (None, 112, 112, 64)      1792      
_________________________________________________________________
batch_normalization_17 (Batc (None, 112, 112, 64)      256       
_________________________________________________________________
conv1_2 (Conv2D)             (None, 112, 112, 64)      36928     
_________________________________________________________________
batch_normalization_18 (Batc (None, 112, 112, 64)      256       
_________________________________________________________________
conv2_1 (Conv2D)             (None, 112, 112, 128)     73856     
_________________________________________________________________
batch_normalization_19 (Batc (None, 112, 112, 128)    

In [0]:
# model train
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)

Train on 19749 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fe711809588>

Model Evaluation

Test dataset is set of only face images

In [0]:
test_images = np.load('/content/drive/My Drive/Colab Notebooks/test_images_save.npy')
test_labels = np.load('/content/drive/My Drive/Colab Notebooks/test_labels_save.npy')

In [0]:
x_test = tf.convert_to_tensor(test_images, dtype=tf.float32)
y_test = tf.convert_to_tensor(test_labels, dtype=tf.float32)

x_test = x_test / 255.0

In [0]:
# model evaluate
model.evaluate(x_test, y_test, verbose=2)

100/1 - 4s - loss: 4.8659 - accuracy: 1.0000


[4.863677768707276, 1.0]

In [0]:
model.predict(x_test[:10])

array([[0.4394248 , 0.5605751 ],
       [0.44259417, 0.5574058 ],
       [0.44719857, 0.55280143],
       [0.4590391 , 0.5409609 ],
       [0.44431838, 0.55568165],
       [0.44096082, 0.5590392 ],
       [0.4571382 , 0.54286176],
       [0.46420243, 0.5357976 ],
       [0.44153726, 0.5584627 ],
       [0.44923243, 0.5507676 ]], dtype=float32)

Save Model

In [0]:
model.save_weights('/content/drive/My Drive/Colab Notebooks/vgg16_face_checkpoint')
model.save('/content/drive/My Drive/Colab Notebooks/vgg16_face.h5')