In [1]:
import os
import numpy as np

from PIL import Image, ImageDraw
from scipy.io import savemat, loadmat

import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.optimizers import Adam

In [2]:
dataset_dir = "/content/drive/MyDrive/dataset/"
!ls $dataset_dir

train.mat  wider_face_split  WIDER_train  y_train.mat
val.mat    WIDER_test	     WIDER_val	  y_val.mat


**Extract the zipped dataset:**  
dataset: http://shuoyang1213.me/WIDERFACE/

In [None]:
import zipfile

zipfile.ZipFile(dataset_dir + "wider_face_split.zip").extractall(dataset_dir)
# zipfile.ZipFile(dataset_dir + "WIDER_test.zip").extractall(dataset_dir)
zipfile.ZipFile(dataset_dir + "WIDER_train.zip").extractall(dataset_dir)
zipfile.ZipFile(dataset_dir + "WIDER_val.zip").extractall(dataset_dir)

**Prepare images to be used as inputs of the network:**

In [None]:
def preparation(set_name):

    images_path = dataset_dir + "WIDER_" + set_name + "/images/"
    dataset = loadmat(dataset_dir + "wider_face_split/wider_face_" + set_name + ".mat", squeeze_me=True)
    event_list = dataset["event_list"]
    file_list = dataset["file_list"]
    face_list = dataset["face_bbx_list"]
    input_list = []

    for i in range(61):
        print(i, event_list[i])
        temp_list = []

        for j in range(len(file_list[i])):
            image = images_path + event_list[i] + '/' + file_list[i][j] + ".jpg"
            img = Image.open(image).convert('L')    # convert to grayscale

            faces = face_list[i][j]
            if faces.ndim == 1:
                faces.resize(1, 4)

            m = min(img.size)
            x = (img.width - m) // 2
            y = (img.height - m) // 2

            img = img.crop((x, y, (img.width + m)//2, (img.height + m)//2))    # crop the largest square in the center of the image
            img.thumbnail((128, 128))    # resize

            # change coordinates of faces to match the new size image
            faces[:, 0] -= x
            faces[:, 1] -= y
            face_list[i][j] = (faces * (img.width / m)).astype(int)

            temp_list.append(np.array(img) / 255)    # convert values of the pixels to a number between zero and one

        temp_list = np.array(temp_list)
        temp_list = temp_list.reshape(temp_list.shape[0], 128, 128, 1)
        input_list.append(temp_list)

    savemat(dataset_dir + set_name + ".mat", {"event_list":event_list, "file_list":file_list, "face_list":face_list, "input_list":np.array(input_list)})


In [None]:
preparation("train")

In [None]:
preparation("val")

**Generate outputs as black and white images:**

In [None]:
def y_generator(set_name):
    face_list = loadmat(dataset_dir + set_name + ".mat", squeeze_me=True)["face_list"]
    y_list = []

    for i in range(61):
        temp_list = []

        for j in range(len(face_list[i])):
            faces = face_list[i][j]
            if faces.ndim == 1:
                faces = [faces]

            img = Image.new('L', (128, 128))
            draw = ImageDraw.Draw(img)

            for f in faces:
                draw.rectangle((f[0], f[1], f[0]+f[2], f[1]+f[3]), fill="white", width=0)

            temp_list.append(np.array(img).flatten() // 255)

        y_list.append(np.array(temp_list))

    savemat(dataset_dir + "y_" + set_name + ".mat", {"y_list":np.array(y_list)})

In [None]:
y_generator("train")

In [None]:
y_generator("val")

.  
.  


**Load train and validation inputs and outputs**

In [3]:
x_train = loadmat(dataset_dir + "train.mat")["input_list"][0]
y_train = loadmat(dataset_dir + "y_train.mat")["y_list"][0]

x_val = loadmat(dataset_dir + "val.mat")["input_list"][0]
y_val = loadmat(dataset_dir + "y_val.mat")["y_list"][0]

**Build and compile the model:**

In [4]:
img_shape = (128, 128, 1)

model = Sequential([
    Conv2D(filters=36, kernel_size=7, activation="relu", input_shape=img_shape),
    MaxPooling2D(pool_size=2),
    Conv2D(filters=54, kernel_size=5, activation="relu", input_shape=img_shape),
    MaxPooling2D(pool_size=2),
    Conv2D(filters=81, kernel_size=3, activation="relu", input_shape=img_shape),
    MaxPooling2D(pool_size=2),
    Flatten(),
    Dense(2048, activation="relu"),
    Dropout(0.5),
    Dense(1024, activation="relu"),
    Dropout(0.5),
    Dense(512, activation="relu"),
    Dropout(0.5),
    Dense(16384, activation="softmax")
])

# from keras.optimizers import SGD

model.compile(
    # loss = "categorical_crossentropy",
    # loss = "poisson",
    loss = "kullback_leibler_divergence",
    optimizer = Adam(learning_rate=0.001),
    # optimizer = SGD(learning_rate=0.01, momentum=0.9),
    metrics = ["accuracy"]
)

**Summary:**

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 122, 122, 36)      1800      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 61, 61, 36)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 57, 57, 54)        48654     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 28, 28, 54)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 26, 26, 81)        39447     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 81)        0         
_________________________________________________________________
flatten (Flatten)            (None, 13689)             0

**Train the model:**

In [None]:
for epoch in range(3):

    for i in range(61):

        model.fit(
            np.array(x_train[i]), np.array(y_train[i]), 
            batch_size=64, epochs=epoch+1, initial_epoch=epoch, verbose=2,
            validation_data=(np.array(x_val[i]), np.array(y_val[i])),
        )
