# Human-centered Machine Learning Project:
## <b> Explanation of locally-important features for Deep fake detection  

In [None]:
# Install VGGFace for Keras
%pip install git+https://github.com/rcmalli/keras-vggface.git

In [1]:
# LIBRARIES

# Library for data manipulation
import pandas as pd
import numpy as np

# Library for image manipulation
from PIL import Image
import cv2

# Library for data preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split   
from sklearn.preprocessing import StandardScaler

# Library for Ground Truth Generation
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# Library for data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Library for general purpose
import os
import shutil
import zipfile
import random
import time

# Library for deep learning
import tensorflow as tf

# VGGFace (clone repository before running)
from keras_vggface.vggface import VGGFace

from keras import Model
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization, Input
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from keras.utils import to_categorical

# Library for evaluation
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
# EXTRACTION OF ZIP FILES

# Local paths
real_fake_img_pth = "C:\\Users\\FDB\\Downloads\\dataset.zip"

# Extracting the zip files
# IntraFace
with zipfile.ZipFile(real_fake_img_pth, 'r') as zip_ref:
    zip_ref.extractall('C:\\Users\\FDB\\Downloads\\dataset')

In [3]:
# DATA PREPROCESSING

# Local paths
real_fake_path = "C:\\Users\\FDB\\Downloads\\dataset\\real_vs_fake\\real-vs-fake" # USE YOUR LOCAL PATH

# Create a dataset of images
def create_dataset(path):
    images = []
    labels = []
        
    for file in os.listdir(os.path.join(path, 'fake')):
        image = cv2.imread(os.path.join(path, 'fake', file))
        image = cv2.resize(image, (224, 224))
        images.append(image)
        labels.append(0)

    for file in os.listdir(os.path.join(path, 'real')):
        image = cv2.imread(os.path.join(path, 'real', file))
        image = cv2.resize(image, (224, 224))
        images.append(image)
        labels.append(1)

    images = np.array(images, dtype = 'float32')
    labels = np.array(labels, dtype = 'int32')

    return images, labels

train_set = create_dataset(os.path.join(real_fake_path, "train"))
test_set = create_dataset(os.path.join(real_fake_path, "test"))
val_set = create_dataset(os.path.join(real_fake_path, "valid"))

In [None]:
# Rescaling the images
train_set[0] = train_set[0] / 255
test_set[0] = test_set[0] / 255
val_set[0] = val_set[0] / 255

# Splitting the dataset into the Training set and Test set
X_train, y_train = train_set[0], train_set[1]
X_test, y_test = test_set[0], test_set[1]
X_val, y_val = val_set[0], val_set[1]

# Encoding the labels
y_train = to_categorical(y_train, num_classes = 2)
y_test = to_categorical(y_test, num_classes = 2)
y_val = to_categorical(y_val, num_classes = 2)

In [10]:
# VGG FACE MODEL (TO DO: REARRANGE THE MODEL INTO A FUNCTION)

#custom parameters
nb_class = 2
hidden_dim = 512

vgg_model = VGGFace(include_top=False, input_shape=(224, 224, 3), pooling='avg') # pooling: None, avg or max
last_layer = vgg_model.get_layer('pool5').output
x = Flatten(name='flatten')(last_layer)
x = Dense(hidden_dim, activation='relu', name='fc6')(x)
x = Dense(hidden_dim, activation='relu', name='fc7')(x)
out = Dense(nb_class, activation='sigmoid', name='fc8')(x)
custom_vgg_model = Model(vgg_model.input, out)

for layer in custom_vgg_model.layers[:-3]:
    layer.trainable = False

custom_vgg_model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy, val_accuracy'])

# MODEL TRAINING

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min', restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, verbose=1, min_delta=0.0001)

# Fitting the model
history = custom_vgg_model.fit(X_train, y_train, batch_size = 32, epochs = 100, validation_data = (X_val, y_val), callbacks=[early_stopping, reduce_lr])

NameError: name 'EarlyStopping' is not defined

In [9]:
custom_vgg_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1_1 (Conv2D)            (None, 224, 224, 64)      1792      
                                                                 
 conv1_2 (Conv2D)            (None, 224, 224, 64)      36928     
                                                                 
 pool1 (MaxPooling2D)        (None, 112, 112, 64)      0         
                                                                 
 conv2_1 (Conv2D)            (None, 112, 112, 128)     73856     
                                                                 
 conv2_2 (Conv2D)            (None, 112, 112, 128)     147584    
                                                                 
 pool2 (MaxPooling2D)        (None, 56, 56, 128)       0   

In [None]:
# VISUALIZATION OF THE RESULTS

# Plotting the training and validation accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plotting the training and validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
# EVALUATION OF THE MODEL

# Predictions
predictions = custom_vgg_model.predict(X_test)

# Confusion matrix
cm = confusion_matrix(test_set.classes, np.argmax(predictions, axis=-1))
print(cm)

# Classification report
print(classification_report(test_set.classes, np.argmax(predictions, axis=-1), target_names = ['fake', 'real']))s

In [None]:
# SAVING THE MODEL

# Local paths
model_path = "C:\\Users\\FDB\\Downloads\\model.h5"

# Saving the model
custom_vgg_model.save(model_path)