# PART 1: Preparations and Data Preprocessing

In this notebook, I will build a CNN-based network that is able to detect whether images of faces have been photoshopped or not. First, let´s load the necessary packages.

In [0]:
# This needs to be executed beforehand if keras_vggface is not installed yet.
pip install git+https://github.com/rcmalli/keras-vggface.git

In [0]:
import numpy as np
import glob
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.models import Sequential, Model
from keras.regularizers import l2
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.applications.vgg16 import VGG16, preprocess_input
from keras_vggface import utils
from keras_vggface.vggface import VGGFace
from google.colab import drive
from google.colab import files
np.random.seed(123)
drive.mount('/content/drive')

The data provided consists of pictures of faces that have, and pictures of faces that have not been photoshopped. A third set of pictures is given, for which we are asked to predict whether these pictures have been photoshopped or not.

Let´s start with preprocessing the images by converting and storing them as multidimensional numpy-arrays. As I am going to use different pretrained models later on, the image data needs to be preprocessed in different ways. The reason for this step is the fact that images should be scaled the same way as the training data that had been used by the researchers when training the model. This includes whether images were rescaled to values between zero and one, or minus one and one, or whether they just need to be centered using the mean values of the RGB-channels of the image data used by the researchers. These differently preprcessed images are then stored in different numpy arrays and will be used when the respective pretrained model requires the specific preprocessing of the image data.

In [0]:
l =[]
l0 = []
l1 = []
l2 = []

for file in glob.glob('/content/drive/My Drive/Colab Notebooks/data/real/*.jpg'):
    x_add = load_img(file, target_size=(224, 224))
    x_add = img_to_array(x_add)
    x_add0 = preprocess_input(x_add)
    x_add1 = utils.preprocess_input(x_add, version=1)
    x_add2 = utils.preprocess_input(x_add, version=2)
    l.append(x_add)
    l0.append(x_add0)
    l1.append(x_add1)
    l2.append(x_add2)

for file in glob.glob('/content/drive/My Drive/Colab Notebooks/data/fake/*.jpg'):
    x_add = load_img(file, target_size=(224, 224))
    x_add = img_to_array(x_add)
    x_add0 = preprocess_input(x_add)
    x_add1 = utils.preprocess_input(x_add, version=1)
    x_add2 = utils.preprocess_input(x_add, version=2)
    l.append(x_add)
    l0.append(x_add0)
    l1.append(x_add1)
    l2.append(x_add2)

x = np.stack((l))
x0 = np.stack((l0))
x1 = np.stack((l1))
x2 = np.stack((l2))

np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x", x)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x0", x0)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x1", x1)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x2", x2)

l =[]
l0 = []
l1 = []
l2 = []
names = []

for file in glob.glob('/content/drive/My Drive/Colab Notebooks/data/unknown/*.jpg'):
    name = file
    names.append(name)
    x_add = load_img(file, target_size=(224, 224))
    x_add = img_to_array(x_add)
    x_add0 = preprocess_input(x_add)
    x_add1 = utils.preprocess_input(x_add, version=1)
    x_add2 = utils.preprocess_input(x_add, version=2)
    l.append(x_add)
    l0.append(x_add0)
    l1.append(x_add1)
    l2.append(x_add2)

x_pred = np.stack((l))
x_pred0 = np.stack((l0))
x_pred1 = np.stack((l1))
x_pred2 = np.stack((l2))

for num in range(481):
  names[num] = names[num].split("/")[-1]
for num in range(481):
  names[num] = names[num].split(".")[0]

np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred", x_pred)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred0", x_pred0)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred1", x_pred1)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred2", x_pred2)
np.save("/content/drive/My Drive/Colab Notebooks/names", names)

To clarify, the mean values used in the preprocessing functions from keras_vggface to substract from the original values are as follows (for the Red, Green, and Blue channel respectively), while no scaling is performed:

```
x1[..., 0] -= 93.5940
x1[..., 1] -= 104.7624
x1[..., 2] -= 129.1863

x2[..., 0] -= 129.1863
x2[..., 1] -= 103.8827
x2[..., 2] -= 131.0912
```
The resulting arrays have a shape of (224,224,3), which corresponds to the number of pictures given, the number of pixels in the horizontal and vertical dimension, and the three colour channels:

In [0]:
x.shape

(1354, 224, 224, 3)

In [0]:
# representation of the first pixel of the first picture (unpreprocessed)
x[0,0,0]

array([188., 173.,  78.], dtype=float32)

In [0]:
# preprocessed pixels are obtained by substracting the respective mean from the training data used by the researchers
x[0,0,0]-x2[0,0,0]

array([ 91.4953, 103.8827, 131.0912], dtype=float32)

In [0]:
#for internal use
!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x.npy" "x.npy"
x = np.load("x.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x0.npy" "x0.npy"
x0 = np.load("x0.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x1.npy" "x1.npy"
x1 = np.load("x1.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x2.npy" "x2.npy"
x2 = np.load("x2.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred.npy" "x_pred.npy"
x_pred = np.load("x_pred.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred0.npy" "x_pred0.npy"
x_pred0 = np.load("x_pred0.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred1.npy" "x_pred1.npy"
x_pred1 = np.load("x_pred1.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred1.npy" "x_pred1.npy"
x_pred1 = np.load("x_pred1.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_pred2.npy" "x_pred2.npy"
x_pred2 = np.load("x_pred2.npy")

Lastly, the target array y is created in a straightforward manner.

In [0]:
y = np.concatenate((np.repeat(0, 891),np.repeat(1, 463)), axis=None)

As the data is ordered by first listing the non-photoshopped and then listing the photoshopped pictures, it is important to always shuffle the data when creating the training and test set, while additional shuffling between epochs might also be an option.

# PART 2: Creating and Training Different Models

## PART 2.1: Building a Model from scratch

First, let´s start with creating a simple CNN-based model from scratch, consisting of three convolutional layers. Before training, the image data is rescaled to values between zero and one, as usual learning rates might otherwise be too small to optimize the network´s parameters.

In [0]:
x = np.divide(x, 255)

In [0]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, shuffle=True, random_state = 0)

In [0]:
input_shape = (224, 224, 3)

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [None]:
story = model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test))

In [None]:
predictions = model.predict(x_test)
actual = y_test
#print('AUC: ', metrics.roc_auc_score(actual, predictions))
false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(actual, predictions)
roc_auc = metrics.auc(false_positive_rate, true_positive_rate)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',
label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
plt.plot(story.history['acc'])
plt.plot(story.history['val_acc'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'test_accuracy'], loc='best')
plt.show()

## Part 2.2: Implementing Pretrained Models

Several networks that already have been trained on image data are publicly available. In total, I choose to implement four of them. Highly relevant for the application of face images should be the pretrained models developed for face recognition by the Visual Geometric Group from the University of Oxford. Here, the library "keras_vggface" (installation see Part 0) makes it fairly easy to load and use severeal of their models, i.e. their pretrained models based on the VGG16, the ResNet50, and the SENet50 architecture. The respective papers can be found <a href='http://www.robots.ox.ac.uk/~vgg/publications/2015/Parkhi15/parkhi15.pdf'>here</a> and  <a href='https://arxiv.org/pdf/1710.08092.pdf'>here</a>. The ResNet50-based network, for example, has been trained on 3.3 million face images.

Furthermore, Keras itself provides several models that were pretrained on the ImageNet dataset. Although these models should be less relevant for this application due to the ImageNet dataset not being restricted to face images, I will implement the VGG16 model trained on the ImageNet dataset as a benchmark.

All four pretrained networks consist of two parts: The first part is always a sophisticated structure of convolutional layers, while the second one is a relatively simple multilayer perceptron used to classify the images. As the top networks were trained for a different purpose than detecting whether face images have been photoshopped or not, the second part of the network is rather uninteresting for me. For example, the ResnNet50-based network was trained to classify to which of 9131 persons a picture belongs to. Yet, in the spirit of transfer learning, the first part of the networks should be valuable. Desite not being interested in the classification of the 9131 individuals, one can use the first part of this network as a feature extractor of face pictures in general. After that, these extracted features can be used to train a simple network from scratch for another purpose, in this case the detection of photoshopped faces.

To work computationally efficient, I will first extract the features from the respective preprocessed numpy arrays using the different pretrained models and save these features in a separate numpy file. By doing so, I only need to extract the features of the images once and not for every epoch of training the top network. This decreases the training time needed for one epoch of the top network from minutes to a few seconds.

#### Feature extraction

First, lets extract the features using the different pretrained models by feeding the respective preprocessed image data to the network. As the include_top=False argument also excludes the last Flatten layer, I delete the top layers manually and keep the Flatten layer.

In [0]:
featuremodel = VGGFace(model='resnet50')
for i in range(0, 1):
    featuremodel.layers.pop()
x_resnet_faces = featuremodel.predict(x2)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_resnet_faces", x_resnet_faces)
dim_resnet = 8631



featuremodel = VGGFace(model='senet50')
for i in range(0, 1):
    featuremodel.layers.pop()
x_senet_faces = featuremodel.predict(x2)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_senet_faces", x_senet_faces)
dim_senet = 8631



featuremodel = VGGFace(model='vgg16')
for i in range(0, 6):
    featuremodel.layers.pop()
x_vgg16_faces = featuremodel.predict(x1)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_vgg16_faces", x_vgg16_faces)
dim_vgg16 = 2622



featuremodel = VGG16(weights='imagenet')
for i in range(0, 3):
    featuremodel.layers.pop()
    x_vgg16_imagenet = featuremodel.predict(x0)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_vgg16_imagenet", x_vgg16_imagenet)

In [0]:
#for internal use
!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_resnet_faces.npy" "x_resnet_faces.npy"
x_resnet_faces = np.load("x_resnet_faces.npy")
dim_resnet = 8631

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_senet_faces.npy" "x_senet_faces.npy"
x_senet_faces = np.load("x_senet_faces.npy")
dim_senet = 2622

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_vgg16_faces.npy" "x_vgg16_faces.npy"
x_vgg16_faces = np.load("x_vgg16_faces.npy")
dim_vgg16 = 2622

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_vgg16_imagenet.npy" "x_vgg16_imagenet.npy"
x_vgg16_imagenet = np.load("x_vgg16_imagenet.npy")
dim_vgg16 = 2622

In [0]:
xx = x_resnet_faces
input_dim = dim_resnet
x_train, x_test, y_train, y_test = train_test_split(xx, y, test_size = 0.2, shuffle=True, random_state = 0)

In [0]:
model = Sequential()
model.add(Dense(1000, activation='relu', input_dim = input_dim))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
story = model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test), shuffle=True)

In [None]:
predictions = model.predict(x_test)
actual = y_test
#print('AUC: ', metrics.roc_auc_score(actual, predictions))
false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(actual, predictions)
roc_auc = metrics.auc(false_positive_rate, true_positive_rate)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',
label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
plt.title('Training and Testing Accuracy')
plt.plot(story.history['acc'])
plt.plot(story.history['val_acc'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'test_accuracy'], loc='best')
plt.show()

In [None]:
print("Confusion Matrix:")
print(metrics.confusion_matrix(y_test, np.reshape(model.predict(x_test).astype('int'),newshape = (271,)), labels=range(2)))

## PART 2.3: Implementing Keras' ImageDataGenerator

To help a model to generalize better, one can use Keras' ImageDataGenerator to artificially augment the data by applying random transformations, such as zooming or slightly shifting the picture vertically or horizontally. After these transformations, the respective preprocessing function necessary for the respective pretrained model is applied. To not unnecessarily reduce the performance on the test data, only the preprocessing function is applied on pictures from the test set, without performing any random transformations.

Again, to be computationally efficient, the features from the newly generated pictures are extracted by the pretrained models and stored before training the top network. In the following, I will use the ResNet50 network from keras_vggface to extract the features from the generated images.

In [0]:
def preproc(x):
  #x = preprocess_input(x)
  #x = utils.preprocess_input(x, version=1)
  x = utils.preprocess_input(x, version=2)
  return x

generator_train = ImageDataGenerator(preprocessing_function = preproc,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range = 0.1, 
                                   zoom_range = 0.1) 

generator_test = ImageDataGenerator(preprocessing_function = preproc)

set_train = generator_train.flow_from_directory('/content/drive/My Drive/Colab Notebooks/data1/train', target_size = (224, 224), batch_size = 1, class_mode= 'binary')

set_test = generator_test.flow_from_directory('/content/drive/My Drive/Colab Notebooks/data1/validation', target_size = (224, 224), batch_size = 1, class_mode = 'binary')

Found 1083 images belonging to 2 classes.
Found 276 images belonging to 2 classes.


In [0]:
model = VGGFace(model='resnet50')
for i in range(0, 1):
    model.layers.pop()

x_resnet_faces_generated_train = model.predict_generator(set_train, 2000)
x_resnet_faces_generated_test = model.predict_generator(set_test, 200)

np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_resnet_faces_generated_train", x_resnet_faces_generated_train)
np.save("/content/drive/My Drive/Colab Notebooks/tempfiles/x_resnet_faces_generated_test", x_resnet_faces_generated_test)

In [0]:
#for internal use
!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_resnet_faces_generated_train.npy" "x_resnet_faces_generated_train.npy"
x_resnet_faces_generated_train = np.load("x_resnet_faces_generated_train.npy")

!cp "/content/drive/My Drive/Colab Notebooks/tempfiles/x_resnet_faces_generated_test.npy" "x_resnet_faces_generated_test.npy"
x_resnet_faces_generated_test = np.load("x_resnet_faces_generated_test.npy")

In [0]:
train_data = x_resnet_faces_generated_train
test_data = x_resnet_faces_generated_test
train_labels = np.array([0] * 1000 + [1] * 1000)
test_labels = np.array([0] * 100 + [1] * 100)

In [0]:
input_dim = 8631

model = Sequential()
model.add(Dense(1000, activation='relu', input_dim = input_dim))
model.add(Dropout(0.2))
model.add(Dense(100, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
story = model.fit(train_data, train_labels, batch_size=64, epochs=10, validation_data=(test_data, test_labels))

In [None]:
predictions = model.predict(x_test)
actual = y_test
#print('AUC: ', metrics.roc_auc_score(actual, predictions))
false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(actual, predictions)
roc_auc = metrics.auc(false_positive_rate, true_positive_rate)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',
label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
plt.plot(story.history['acc'])
plt.plot(story.history['val_acc'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'test_accuracy'], loc='best')
plt.show()

## Part 2.4: Finetuning Top Layers of Pretrained Models

Another way of implementing pretrained models is to allow the network to fine-tune the last block of convolutional layers, while freezing all the previous blocks. This is a compromise between tuning the entire pretrained model and only training the top network without touching the convolutional part at all. If I randomly initialize the weights of the top network and set the last convolutional block to be trainable, however, the large gradient updates from the top network would heavily influence the gradients for the convolutional part. To avoid this problem, I first train a "premodel" as the top network and then load the learned weights from this premodel as the initial weights for the fine-tuning model.

In the following, I will use the VGG16 network trained on face images for that. The model is again not able to generalize on the test set.

In [0]:
#premodel for the initial weights of the featuremodel

!cp "/content/drive/My Drive/Colab Notebooks/preproc10/x1_vgg16faces_features.npy" "x1_vgg16faces_features.npy"
x_vgg16_faces = np.load("x1_vgg16faces_features.npy")
dim_vgg16 = 2622

x_train, x_test, y_train, y_test = train_test_split(x_vgg16_faces, y, test_size = 0.2, shuffle=True, random_state = 0)

premodel = Sequential()
premodel.add(Dense(4096, activation='relu', input_dim = dim_vgg16))
premodel.add(Dropout(0.5))
premodel.add(Dense(4096, activation='relu'))
premodel.add(Dropout(0.5))
premodel.add(Dense(1000, activation='softmax'))
premodel.add(Dropout(0.5))
premodel.add(Dense(1, activation='softmax'))

premodel.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

premodel.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test), shuffle=True, verbose = 0)

In [0]:
x_train, x_test, y_train, y_test = train_test_split(x1, y, test_size = 0.2, shuffle=True, random_state = 0)

In [0]:
#featuremodel with initial weights of the top network from the premodel

featuremodel = VGGFace(model='vgg16')
for i in range(0, 6):
    featuremodel.layers.pop()

for layer in featuremodel.layers[:14]: #18
    layer.trainable = False

finemodel = Sequential()
finemodel.add(featuremodel)
finemodel.add(Dense(4096, activation='relu', input_dim = dim_vgg16))
finemodel.add(Dropout(0.5))
finemodel.add(Dense(4096, activation='relu'))
finemodel.add(Dropout(0.5))
finemodel.add(Dense(1000, activation='softmax'))
finemodel.add(Dropout(0.5))
finemodel.add(Dense(1, activation='softmax'))

finemodel.layers[1].set_weights(premodel.layers[0].get_weights())
finemodel.layers[3].set_weights(premodel.layers[2].get_weights())
finemodel.layers[5].set_weights(premodel.layers[4].get_weights())
finemodel.layers[7].set_weights(premodel.layers[6].get_weights())

finemodel.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
story = finemodel.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_test, y_test))

In [None]:
predictions = finemodel.predict(x_test)
actual = y_test
#print('AUC: ', metrics.roc_auc_score(actual, predictions))
false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(actual, predictions)
roc_auc = metrics.auc(false_positive_rate, true_positive_rate)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',
label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
plt.title('Training and Testing Accuracy')
plt.plot(story.history['acc'])
plt.plot(story.history['val_acc'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_accuracy', 'test_accuracy'], loc='best')
plt.show()