# deepfake images detection with vgg face

In [24]:
# import 
import os

import cv2
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn import metrics

from keras_vggface.vggface import VGGFace
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Dropout
from keras.callbacks import ReduceLROnPlateau

from keras.optimizers import Adam, RMSprop
from keras.preprocessing.image import ImageDataGenerator


In [30]:
base_path = 'archive/real_vs_fake/real-vs-fake/'
image_gen = ImageDataGenerator(rescale=1./255.)
batch_size = 64
image_size = (224,224)

train_flow = image_gen.flow_from_directory(
    base_path + 'train/',
    image_size,
    batch_size=batch_size,
    class_mode='binary'
)

valid_flow = image_gen.flow_from_directory(
    base_path + 'valid/',
    image_size,
    batch_size=batch_size,
    class_mode='binary'
)

test_flow = image_gen.flow_from_directory(
    base_path + 'test/',
    image_size,
    batch_size=1,
    shuffle=False,
    class_mode='binary'
)


Found 100000 images belonging to 2 classes.
Found 20000 images belonging to 2 classes.
Found 20000 images belonging to 2 classes.


In [26]:
vgg_model = VGGFace(include_top=False, input_shape = (224,224,3))

last_layer = vgg_model.get_layer('pool5').output
flat_layer = Flatten(name='flatten')(last_layer)
fc1 = Dense(2048, activation='relu', name='fc1')(flat_layer)
dense2 = Dense(1, activation='sigmoid', name='dense2')(fc1)

custom_vgg_model = Model(vgg_model.input, dense2)

In [27]:
custom_vgg_model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 224, 224, 64)      1792      
_________________________________________________________________
conv1_2 (Conv2D)             (None, 224, 224, 64)      36928     
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 112, 112, 64)      0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 112, 112, 128)     73856     
_________________________________________________________________
conv2_2 (Conv2D)             (None, 112, 112, 128)     147584    
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 56, 56, 128)      

In [31]:
# compile
custom_vgg_model.compile(
    loss='binary_crossentropy',
    optimizer=Adam(0.0002), 
    metrics=['acc']
)

In [None]:
train_steps = 400//batch_size
valid_steps = 80//batch_size

# fit
history = custom_vgg_model.fit_generator(
    train_flow,
    epochs = 5,
    steps_per_epoch = train_steps,
    validation_data = valid_flow,
    validation_steps = valid_steps
)

Epoch 1/5
Epoch 2/5

In [None]:
#custom_vgg_model.save('vggface_v1.h5')

In [None]:
plt.plot(history.history['acc'], label = 'accuracy')
plt.plot(history.history['val_acc'], label = 'validation accuracy')
plt.title('accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='lower left')
plt.show()

#Loss
plt.plot(history.history['loss'], label = "loss")
plt.plot(history.history['val_loss'], label = "validation loss")
plt.title('loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='lower left')
plt.show()

In [None]:
test_datagen = ImageDataGenerator(rescale=1. / 255)
testpath = 'testdata/test/'

eval_generator = test_datagen.flow_from_directory(testpath,target_size=image_size,
                                                  batch_size=1,shuffle=True,seed=42,class_mode="binary")
eval_generator.reset()
print(len(eval_generator))
x = custom_vgg_model.evaluate_generator(eval_generator,steps = np.ceil(len(eval_generator)),
                           use_multiprocessing = False,verbose = 1,workers=1)
print('Test loss:' , x[0])
print('Test accuracy:',x[1])

In [None]:
y_pred = custom_vgg_model.predict(test_flow)
y_test = test_flow.classes

In [None]:
print("ROC-AUC Score:", metrics.roc_auc_score(y_test, y_pred))
print("AP Score:", metrics.average_precision_score(y_test, y_pred))
print()
print(metrics.classification_report(y_test, y_pred > 0.5))

In [None]:
from sklearn.manifold import TSNE

intermediate_layer_model = tf.keras.models.Model(inputs=custom_vgg_model.input,
                                        outputs=custom_vgg_model.get_layer('dense2').output)

tsne_eval_generator = test_datagen.flow_from_directory(base_path,target_size=image_size,
                                                  batch_size=1,shuffle=False,seed=42,class_mode="binary")

    
pred = intermediate_layer_model.predict_generator(tsne_eval_generator,270,verbose=1)
print(pred.shape)
features = TSNE(n_components=2).fit_transform(pred)
print(features.shape)

x1,x2,y1,y2 = [],[],[],[]
cls = tsne_eval_generator.classes
for i in range(len(features)):
    if cls[i] == 0: 
        x1.append(features[i, 0])
        y1.append(features[i, 1])    
    else:
        x2.append(features[i, 0])
        y2.append(features[i, 1])

plt.figure()
plt.plot(x1, y1, 'ro', label="real")
plt.plot(x2, y2, 'bo', label="fake")
plt.legend(loc='upper right')