In [1]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from keras import backend as K
# os.environ["CUDA_VISIBLE_DEVICES"] = '2'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)

from keras.layers import Input, Dense, Flatten, GlobalAveragePooling2D, Activation, Conv2D, MaxPooling2D, BatchNormalization, Lambda, Dropout
from keras.layers import SeparableConv2D, Add
from keras.models import Model, load_model, model_from_json
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam, SGD
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical
from sklearn import metrics
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
from scipy.optimize import brentq
from scipy.interpolate import interp1d
import glob
from PIL import Image
from tqdm import tqdm, trange
import random
from keras.applications import Xception

Using TensorFlow backend.


In [2]:
nb_classes = 2  # number of classes
img_width, img_height = 64, 64  # change based on the shape/structure of your images
batch_size = 32  # try 4, 8, 16, 32, 64, 128, 256 dependent on CPU/GPU memory capacity (powers of 2 values).
nb_epoch = 300  # number of iteration the algorithm gets trained.

# Dir

In [3]:
train_dir = '/home/www/fake_detection/task2/128x128/train'
validation_dir = '/home/www/fake_detection/task2/128x128/validation'
test50_dir = '/home/www/fake_detection/task2/128x128/test_50'

# Xception

In [4]:
img_input = Input(shape=(img_height, img_width, 3))

# layer 1 #
x = Conv2D(filters=32, kernel_size=(3, 3), strides=2, padding='valid', use_bias=False)(img_input)
x = BatchNormalization()(x)
x = Activation('relu')(x)

# layer 2 #
x = Conv2D(filters=64, kernel_size=(3, 3), padding='valid', use_bias=False)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

# skip layer 1 #
res = Conv2D(filters=128, kernel_size=(1, 1), strides=2, padding='same', use_bias=False)(x)
res = BatchNormalization()(res)

# layer 3 #
x = SeparableConv2D(filters=128, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)

# layer 4 #
x = Activation('relu')(x)
x = SeparableConv2D(filters=128, kernel_size=(3,3), strides=1, padding='same', use_bias=False)(x)
x = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x)
x = Add()([x, res])

# skip layer 2 #
res = Conv2D(filters=256, kernel_size=(1, 1), strides=2, padding='same', use_bias=False)(x)
res = BatchNormalization()(res)

# layer 5 #
x = Activation('relu')(x)
x = SeparableConv2D(filters=256, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)

# layer 6 #
x = Activation('relu')(x)
x = SeparableConv2D(filters=256, kernel_size=(3,3), strides=1, padding='same', use_bias=False)(x)
x = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x)
x = Add()([x, res])

# skip layer 3 #
res = Conv2D(filters=728, kernel_size=(1, 1), strides=2, padding='same', use_bias=False)(x)
res = BatchNormalization()(res)

# layer 7 #
x = Activation('relu')(x)
x = SeparableConv2D(filters=728, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)

# layer 8 #
x = Activation('relu')(x)
x = SeparableConv2D(filters=728, kernel_size=(3,3), strides=1, padding='same', use_bias=False)(x)
x = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x)
x = Add()([x, res])

# ======== middle flow ========= #
for i in range(8):
    # layer 9, 10, 11, 12, 13, 14, 15, 16, 17 #
    res = x
    
    x = Activation('relu')(x)
    x = SeparableConv2D(filters=728, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    
    x = Activation('relu')(x)
    x = SeparableConv2D(filters=728, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)    

    x = Activation('relu')(x)
    x = SeparableConv2D(filters=728, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    x = Add()([x, res])    

# ======== exit flow ========== #
# skip layer 4 #
res = Conv2D(filters=1024, kernel_size=(1, 1), strides=2, padding='same', use_bias=False)(x)
res = BatchNormalization()(res)

# layer 18 #
x = Activation('relu')(x)
x = SeparableConv2D(filters=728, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)

# layer 19 #
x = Activation('relu')(x)
x = SeparableConv2D(filters=1024, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x)
x = Add()([x, res])

# layer 20 #
x = SeparableConv2D(filters=1536, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

# layer 21 #
x = SeparableConv2D(filters=2048, kernel_size=(3, 3), strides=1, padding='same', use_bias=False)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = GlobalAveragePooling2D()(x)
output = Dense(units=2, activation='softmax')(x)

model = Model(img_input, output)
model.summary()


Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 31, 31, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 31, 31, 32)   128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 31, 31, 32)   0           batch_normalization_1[0][0]      
___________________________________________________________________________________________

In [5]:
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print(len(model.trainable_weights))

150


# Data generator

In [6]:
train_datagen = ImageDataGenerator(rotation_range=0.0, 
                                   shear_range=0,
                                   zoom_range=0,
                                   width_shift_range=0,
                                   height_shift_range=0,
                                   horizontal_flip=False,
                                   rescale=1./255,)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_dir,
                                                        target_size=(img_height, img_width),
                                                        batch_size=batch_size,
                                                        shuffle=True,
                                                        class_mode='categorical')

validation_generator = train_datagen.flow_from_directory(validation_dir,
                                                        target_size=(img_height, img_width),
                                                        batch_size=batch_size,
                                                        shuffle=False,
                                                        class_mode='categorical')

test50_generator = test_datagen.flow_from_directory(test50_dir,
                                                  target_size=(img_height, img_width),
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  class_mode='categorical')

Found 128404 images belonging to 2 classes.
Found 32100 images belonging to 2 classes.
Found 37566 images belonging to 2 classes.


In [7]:
callback_list = [EarlyStopping(monitor='val_accuracy', patience=10),
                 ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3)]
history = model.fit_generator(train_generator,
                            steps_per_epoch=200,
                            epochs=100,
                            validation_data=validation_generator,
                            validation_steps=len(validation_generator),
                            callbacks=callback_list)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100


In [8]:
model.save('/home/www/fake_detection/model/celeba_xception.h5')

In [9]:
model = load_model('/home/www/fake_detection/model/celeba_xception.h5')

In [10]:
output = model.predict_generator(test50_generator, steps=len(test50_generator), verbose=1)
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
print(test50_generator.class_indices)
print(output)

{'0_real': 0, '1_fake': 1}
[[0.981 0.019]
 [0.992 0.008]
 [0.978 0.022]
 ...
 [0.107 0.893]
 [0.045 0.955]
 [0.045 0.955]]


In [11]:
output_score50 = []
output_class50 = []
answer_class50 = []
answer_class50_1 =[]

for i in trange(len(test50_generator)):
    output50 = model.predict_on_batch(test50_generator[i][0])
    output_score50.append(output50)
    answer_class50.append(test50_generator[i][1])
    
output_score50 = np.concatenate(output_score50)
answer_class50 = np.concatenate(answer_class50)

output_class50 = np.argmax(output_score50, axis=1)
answer_class50_1 = np.argmax(answer_class50, axis=1)

print(output_class50)
print(answer_class50_1)

100%|██████████| 1174/1174 [02:01<00:00,  9.70it/s]

[0 0 0 ... 1 1 1]
[0 0 0 ... 1 1 1]





In [12]:
cm50 = confusion_matrix(answer_class50_1, output_class50)
report50 = classification_report(answer_class50_1, output_class50)

recall50 = cm50[0][0] / (cm50[0][0] + cm50[0][1])
fallout50 = cm50[1][0] / (cm50[1][0] + cm50[1][1])

fpr50, tpr50, thresholds50 = roc_curve(answer_class50_1, output_score50[:, 1], pos_label=1.)
eer50 = brentq(lambda x : 1. - x - interp1d(fpr50, tpr50)(x), 0., 1.)
thresh50 = interp1d(fpr50, thresholds50)(eer50)

print(report50)
print(cm50)
print("AUROC: %f" %(roc_auc_score(answer_class50_1, output_score50[:, 1])))
print(thresh50)
print('test_acc: ', len(output_class50[np.equal(output_class50, answer_class50_1)]) / len(output_class50))

              precision    recall  f1-score   support

           0       0.91      0.84      0.88     18788
           1       0.85      0.92      0.88     18778

    accuracy                           0.88     37566
   macro avg       0.88      0.88      0.88     37566
weighted avg       0.88      0.88      0.88     37566

[[15796  2992]
 [ 1503 17275]]
AUROC: 0.950685
0.5635902753897427
test_acc:  0.8803439280200182
