In [0]:
import os
import cv2
import json
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from imutils import paths


In [0]:
input_shape = (299, 299, 3)
BATCH_SIZE = 32

trainPath = os.path.sep.join(['dataset/dfdc' ,"training"])
valPath = os.path.sep.join(['dataset/dfdc' ,"validation"])
testPath = os.path.sep.join(['dataset/dfdc', "evaluation"])

# determine the total number of image paths in training, validation,
# and testing directories
totalTrain = len(list(paths.list_images(trainPath)))
totalVal = len(list(paths.list_images(valPath)))
totalTest = len(list(paths.list_images(testPath)))

# initialize the training data augmentation object
trainAug = ImageDataGenerator(
	rotation_range=3,
	zoom_range=0.05,
	width_shift_range=0.1,
	height_shift_range=0.1,
	shear_range=0.05,
	horizontal_flip=False,
	fill_mode="nearest")

# initialize the validation/testing data augmentation object (which
# we'll be adding mean subtraction to)
valAug = ImageDataGenerator()

# define the ImageNet mean subtraction (in RGB order) and set the
# the mean subtraction value for each of the data augmentation
# objects
mean = np.array([123.68, 116.779, 103.939], dtype="float32")
trainAug.mean = mean
valAug.mean = mean

# def to prevent from bad input images
def my_gen(gen):
  i = 0
  while True:
    try:
        data, labels = next(gen)
        yield data, labels
    except:
        print(i)
        i += 1
        pass


# initialize the training generator
trainGen = trainAug.flow_from_directory(
	trainPath,
	class_mode="binary",
	target_size=(299, 299),
	color_mode="rgb",
	shuffle=True,
	batch_size=BATCH_SIZE)

# initialize the validation generator
valGen = valAug.flow_from_directory(
	valPath,
	class_mode="binary",
	target_size=(299, 299),
	color_mode="rgb",
	shuffle=False,
	batch_size=BATCH_SIZE)

# initialize the testing generator
testGen = valAug.flow_from_directory(
	testPath,
	class_mode="binary",
	target_size=(299, 299),
	color_mode="rgb",
	shuffle=False,
	batch_size=BATCH_SIZE)

labels = (trainGen.class_indices)
print(labels)

Found 149553 images belonging to 2 classes.
Found 18691 images belonging to 2 classes.
Found 18705 images belonging to 2 classes.
{'FAKE': 0, 'REAL': 1}


In [0]:
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import InputLayer
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers
# from keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

# googleNet_model = InceptionResNetV2(include_top=False, weights='imagenet', input_shape=input_shape)


hidden_dim = 256
hidden_dim1 = 128
googleNet_model = InceptionResNetV2( include_top=False, weights='imagenet', input_shape=(299, 299, 3))

# version 1
# last_layer = googleNet_model.get_layer('conv_7b_ac').output
# x = Flatten(name='flatten')(last_layer)
# x = Dense(hidden_dim, activation='relu', name='fc6')(x)
# x = Dense(hidden_dim1, activation='relu', name='fc7')(x)
# x = Dropout(0.2)(x)
# out = Dense(1, activation='sigmoid', name='fc8')(x)
# custom_googleNet_model = Model(googleNet_model.input, out)

# for layer in googleNet_model.layers:
# 	layer.trainable = False
# for layer in googleNet_model.layers[45:]:
# 	layer.trainable = True

#version 2
googleNet_model.trainable = True
custom_googleNet_model = Sequential()
custom_googleNet_model.add(googleNet_model)
custom_googleNet_model.add(GlobalAveragePooling2D())
custom_googleNet_model.add(Dense(units=1, activation='sigmoid'))


custom_googleNet_model.compile(loss='binary_crossentropy',
              optimizer=optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.001, amsgrad=False),
              metrics=['accuracy'])
custom_googleNet_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_resnet_v2 (Model)  (None, 8, 8, 1536)        54336736  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1536)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 1537      
Total params: 54,338,273
Trainable params: 54,277,729
Non-trainable params: 60,544
_________________________________________________________________


In [0]:
#Currently not used
early_stopping = EarlyStopping(monitor='val_loss',
                               min_delta=0,
                               patience=2,
                               verbose=0, mode='auto')
checkpoint = ModelCheckpoint('drive/My Drive/inception_resnet_f.model', monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]


# history = model.fit(X_train, Y_train, batch_size = BATCH_SIZE, epochs = EPOCHS, validation_data = (X_val, Y_val), verbose = 1)
H = custom_googleNet_model.fit_generator(
	my_gen(trainGen),
	steps_per_epoch=totalTrain // BATCH_SIZE,
	validation_data=valGen,
	validation_steps=totalVal // BATCH_SIZE,
	epochs=25, verbose = 1, callbacks=callbacks_list)

Epoch 1/25
 583/4673 [==>...........................] - ETA: 55:38 - loss: 0.5849 - acc: 0.67820
 584/4673 [==>...........................] - ETA: 15:21 - loss: 0.2830 - acc: 0.8719
Epoch 00001: loss improved from inf to 0.37513, saving model to drive/My Drive/inception_resnet_f.model
Epoch 2/25
 564/4673 [==>...........................] - ETA: 48:33 - loss: 0.2697 - acc: 0.87983
 746/4673 [===>..........................] - ETA: 46:26 - loss: 0.2689 - acc: 0.88154
 584/4673 [==>...........................] - ETA: 14:33 - loss: 0.2476 - acc: 0.8881
Epoch 00002: loss improved from 0.37513 to 0.25957, saving model to drive/My Drive/inception_resnet_f.model
Epoch 3/25
 703/4673 [===>..........................] - ETA: 46:58 - loss: 0.2331 - acc: 0.90126
 584/4673 [==>...........................] - ETA: 14:29 - loss: 0.2418 - acc: 0.8915
Epoch 00003: loss improved from 0.25957 to 0.23172, saving model to drive/My Drive/inception_resnet_f.model
Epoch 4/25
 584/4673 [==>.......................

In [0]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Sat Mar 28 06:25:16 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P0    34W / 250W |    865MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
+-------