# Face recognition with deep learning
- Andrew J. Graves
- 04/19/21
- Run on Google Colab with GPUs

# Load Packages

In [1]:
# For VGG-Face transfer learning framework
!pip install git+https://github.com/rcmalli/keras-vggface.git
!pip install keras_applications

# Import modules
from google.colab import drive
from tensorflow.math import exp
from tensorflow.random import set_seed
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model as KerasModel
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.resnet import preprocess_input
from keras_vggface.vggface import VGGFace

Collecting git+https://github.com/rcmalli/keras-vggface.git
  Cloning https://github.com/rcmalli/keras-vggface.git to /tmp/pip-req-build-xm04vz5r
  Running command git clone -q https://github.com/rcmalli/keras-vggface.git /tmp/pip-req-build-xm04vz5r
Building wheels for collected packages: keras-vggface
  Building wheel for keras-vggface (setup.py) ... [?25l[?25hdone
  Created wheel for keras-vggface: filename=keras_vggface-0.6-cp37-none-any.whl size=8312 sha256=70b573127e3de8be2c64e1f61a3928afce1933c7b2d20be06041e707b30db90d
  Stored in directory: /tmp/pip-ephem-wheel-cache-7xtr2otx/wheels/36/07/46/06c25ce8e9cd396dabe151ea1d8a2bc28dafcb11321c1f3a6d
Successfully built keras-vggface
Installing collected packages: keras-vggface
Successfully installed keras-vggface-0.6
Collecting keras_applications
[?25l  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)
[K     |███████

The next cell assumes the DeepFake database (deepfake_database.zip) is located within the main directory of your Google Drive, and that you are working on Google Colab. If you need to download the data, the link is [here](https://e.pcloud.link/publink/show?code=XZnsxkZkEAgI1OgQIJHLnNl9ErhV4vpHuV0).

In [2]:
# Assumes deepfake database is located within your Google Drive
drive.mount('/content/drive')

# Copy the zipped data from your Google Drive
!cp "/content/drive/MyDrive/deepfake_database.zip" .
# Unzip the copied data
!echo 'N' | unzip -q deepfake_database.zip
# Remove the zipped file from Google Collab
!rm deepfake_database.zip

# Download the DeepFake weights for MesoInception-4
!wget -O MesoInception_DF.h5 https://github.com/DariusAf/MesoNet/blob/master/weights/MesoInception_DF.h5?raw=true

Mounted at /content/drive
--2021-04-18 18:01:44--  https://github.com/DariusAf/MesoNet/blob/master/weights/MesoInception_DF.h5?raw=true
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/DariusAf/MesoNet/raw/master/weights/MesoInception_DF.h5 [following]
--2021-04-18 18:01:44--  https://github.com/DariusAf/MesoNet/raw/master/weights/MesoInception_DF.h5
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/DariusAf/MesoNet/master/weights/MesoInception_DF.h5 [following]
--2021-04-18 18:01:44--  https://raw.githubusercontent.com/DariusAf/MesoNet/master/weights/MesoInception_DF.h5
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercont

Here is the code from *classifiers.py* borrowed from this [repository](https://github.com/DariusAf). We are only including their best-performing model, the MesoInception-4.

In [3]:
# See https://github.com/DariusAf/MesoNet/blob/master/classifiers.py

# We do not need these modules to run our model,
# but they are used by MesoInception-4
from tensorflow.keras.layers import Flatten, Conv2D, MaxPooling2D, \
    BatchNormalization, Dropout, Reshape, Concatenate, LeakyReLU, Lambda

IMGWIDTH = 256

class Classifier:
    def __init__():
        self.model = 0
    
    def predict(self, x):
        return self.model.predict(x)
    
    def fit(self, x, y):
        return self.model.train_on_batch(x, y)
    
    def get_accuracy(self, x, y):
        return self.model.test_on_batch(x, y)
    
    def load(self, path):
        self.model.load_weights(path)

class MesoInception4(Classifier):
    def __init__(self, learning_rate = 0.001):
        self.model = self.init_model()
        optimizer = Adam(lr = learning_rate)
        self.model.compile(optimizer = optimizer, loss='mean_squared_error', 
                           metrics=['accuracy'])
    
    def InceptionLayer(self, a, b, c, d):
        def func(x):
            x1 = Conv2D(a, (1, 1), padding='same', activation='relu')(x)
            
            x2 = Conv2D(b, (1, 1), padding='same', activation='relu')(x)
            x2 = Conv2D(b, (3, 3), padding='same', activation='relu')(x2)
            
            x3 = Conv2D(c, (1, 1), padding='same', activation='relu')(x)
            x3 = Conv2D(c, (3, 3), dilation_rate=2, strides=1, 
                        padding='same', activation='relu')(x3)
            
            x4 = Conv2D(d, (1, 1), padding='same', activation='relu')(x)
            x4 = Conv2D(d, (3, 3), dilation_rate=3, strides=1, 
                        padding='same', activation='relu')(x4)

            y = Concatenate(axis = -1)([x1, x2, x3, x4])
            
            return y
        return func
    
    def init_model(self):
        x = Input(shape = (IMGWIDTH, IMGWIDTH, 3))
        
        x1 = self.InceptionLayer(1, 4, 4, 2)(x)
        x1 = BatchNormalization()(x1)
        x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1)
        
        x2 = self.InceptionLayer(2, 4, 4, 2)(x1)
        x2 = BatchNormalization()(x2)
        x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2)        
        
        x3 = Conv2D(16, (5, 5), padding='same', activation='relu')(x2)
        x3 = BatchNormalization()(x3)
        x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3)
        
        x4 = Conv2D(16, (5, 5), padding='same', activation='relu')(x3)
        x4 = BatchNormalization()(x4)
        x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4)
        
        y = Flatten()(x4)
        y = Dropout(0.5)(y)
        y = Dense(16)(y)
        y = LeakyReLU(alpha=0.1)(y)
        y = Dropout(0.5)(y)
        y = Dense(1, activation='sigmoid')(y)

        return KerasModel(inputs=x, outputs=y)

For convenience we are not going to refit the MesoInception-4, but instead will attempt to replicate their accuracy metrics using their openly available model weights to generate predictions on the validation (test) set.

In [4]:
# Specify base directory for database
base_dir = '/content/deepfake_database/deepfake_database/'

# Build the data generator
meso_gen = ImageDataGenerator(rescale=1./255)
meso_test = meso_gen.flow_from_directory(
        f'{base_dir}validation/',
        target_size=(256, 256),
        class_mode='binary')

# Instantiate the MesoInception4-model
meso_inception4 = MesoInception4()
# Endow the model with pre-trained weights
meso_inception4.load('/content/MesoInception_DF.h5')
# Evaluate on the validation (test) set
_, meso_acc = meso_inception4.model.evaluate(meso_test, 
                                             verbose=0)
# Print accuracy results (close but not exact replication of paper)
print(f'\nMeso-Inception4 Accuracy: {meso_acc}')

Found 7104 images belonging to 2 classes.

Meso-Inception4 Accuracy: 0.913147509098053


Now we will build our own model class named *TransferLearning*, which will take a different strategy than the original authors. Rather than trying to develop our own convolutional neural network architecture, we will stand on the shoulder of giants and modify pre-trained weights of an existing framework. Specifically, we will use the [Visual Geometry Group](https://www.robots.ox.ac.uk/~vgg/software/vgg_face/) (VGG)-Face weights trained with a ResNet 50 architecture. These weights could prove to be useful for discriminating real from fake faces, given the weights were trained on various face images. We will modify the weights of the upper layers with a slow learning rate to appropriately adapt learning to our current task. We will also use cross-entropy as our loss function rather than mean squared error. We will handle all rescaling/ pre-processing within the model itself, rather than within the image generator.

In [5]:
class TransferLearning(Classifier):
    def __init__(self, learning_rate=1e-5, eps=1e-9):
        self.model = self.init_model()
        # Specify low learning rate and low epsilon
        optimizer = Adam(lr=learning_rate, epsilon=eps)
        # Use binary crossentropy instead of mean squared error
        self.model.compile(optimizer=optimizer, 
                           loss='binary_crossentropy', 
                           metrics=['accuracy'])
    
    def init_model(self): 
        
        # Use VGGFace weights for transfer learning
        base_model = VGGFace(model='resnet50', weights='vggface', 
                             pooling='avg', include_top=False)

        # Update weights after this layer index
        layer_idx = 100
        for layer in base_model.layers[:layer_idx]:
            # Allow training for all BatchNorm statistics
            if layer.__class__.__name__ != 'BatchNormalization':
                layer.trainable = False

        # Specify input dimensions
        x = Input(shape=(img_size, img_size, 3))
        # Preprocess for ResNet 50
        preproc = preprocess_input(x)
        # Feed preprocessed inputs into ResNet 50
        res_net = base_model(preproc)

        # Apply sigmoid layer on output
        y = Dense(1, activation='sigmoid')(res_net)

        return KerasModel(inputs=x, outputs=y)

# Build a learning rate schedule
def lr_schedule(epoch, lr):
    if epoch < 2:
        return lr
    else:
        # Exponentially decay the learning rate
        return lr*exp(-0.2)
lr_sched = LearningRateScheduler(lr_schedule)

# Apply early stopping
patience = 5
early_stop = EarlyStopping(monitor='val_accuracy', 
                           patience=patience, 
                           restore_best_weights=False)

Next, we train our own model to see how its performance compares with MesoInception-4. 

First, we will find the best number of epochs through early stopping by splitting the training set into two small sets. We expect the validation accuracy in this case to be much smaller, due to the reduced size of the training set. However, it could provide a reasonable estimate of the optimal number of epochs for the entire training set. 

In [6]:
# Set seed
set_seed(42)

# Specify size of batches and size of images for ResNet50
batch_size = 32
img_size = 224

# Instantiate the validation data generator (split in half)
val_gen = ImageDataGenerator(validation_split=0.5)

# Get half of the training set
train_half = val_gen.flow_from_directory(
        f'{base_dir}train:test/',
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='binary',
        subset='training')

# The other half comprises the validation set
val = val_gen.flow_from_directory(
        f'{base_dir}train:test/',
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='binary',
        subset='validation')

# Instantiate the transfer learning model
tl_val = TransferLearning()
# Fit the transfer learning model
tl_val.model.fit(train_half, validation_data=val, epochs=30,
             callbacks=[lr_sched, early_stop],
             verbose=0)
# Extract the optimal number of epochs
n_epochs = early_stop.stopped_epoch - patience + 1

Found 6177 images belonging to 2 classes.
Found 6176 images belonging to 2 classes.
Downloading data from https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_notop_resnet50.h5


Now we will fit the model to the entire training set using the number of epochs found by the previous validation callback. Then we will evaluate the performance of the model on the validation (test) set.

In [7]:
# Instantiate the full data generator
gen = ImageDataGenerator()

# Get the full training set
train = gen.flow_from_directory(
        f'{base_dir}train:test/',
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='binary')

# Get the full test set
test = gen.flow_from_directory(
        f'{base_dir}validation/',
        target_size=(img_size, img_size),
        batch_size=batch_size,
        class_mode='binary')

# Instantiate the transfer learning model
tl = TransferLearning()
# Fit the model to the full training set
tl.model.fit(train, 
             # Use number of epochs from early stopping
             epochs=n_epochs,
             callbacks=[lr_sched])

# Final prediction performance (accuracy)
_, tl_acc = tl.model.evaluate(test)
# Print accuracy results (beats original results)
print(f'\nOur Transfer Learning Framework Accuracy: {tl_acc}')

Found 12353 images belonging to 2 classes.
Found 7104 images belonging to 2 classes.
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6

Our Transfer Learning Framework Accuracy: 0.9369369149208069


In their paper, the original authors reported $91.7\%$ for their best accuracy on the individual DeepFake images using their MesoInception-4 model. We were able to *nearly* replicate their findings using their MesoInception-4 pre-trained weights. For fairness, we will compare our results to their published accuracy metrics, rather than what we replicated locally (which was worse than the published score).

Our transfer learning framework outperforms their published metric by several percentage points by achieving $\approx 93.7\%$ (to be fair, our model has many more parameters than theirs does!). We believe this to be the case simply because transfer learning is a powerful framework when your available data is smaller than data trained by benchmark models.