In [23]:
import os
import cv2
import json
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
import scipy as scpy
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [2]:
input_shape = (128, 128, 3)
data_dir = '/home/ubuntu/deepfake-detection/dataset'

real_data = [f for f in os.listdir(data_dir+'/real') if f.endswith('.png')]
fake_data = [f for f in os.listdir(data_dir+'/fake') if f.endswith('.png')]

X = []
Y = []

for img in real_data:
    X.append(img_to_array(load_img(data_dir+'/real/'+img)).flatten() / 255.0)
    Y.append(1)
for img in fake_data:
    X.append(img_to_array(load_img(data_dir+'/fake/'+img)).flatten() / 255.0)
    Y.append(0)

Y_val_org = Y

#Normalization
X = np.array(X)
Y = to_categorical(Y, 2)

#Reshape
X = X.reshape(-1, 128, 128, 3)

#Train-Test split
#on interation 9 the test size was moved to 0.1 from 0.2
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.1, random_state=5)


In [21]:
print(['X_train shape: ', X_train.shape])
print(['Y_train shape: ', Y_train.shape])
print(['X_val shape: ', X_val.shape])
print(['Y_val shape: ', Y_val.shape])
nFake = len(fake_data)
nReal = len(real_data)
print('Number of Fake Images: ' + str(nFake))
print('Number of Real Images: ' + str(nReal))
print('Ratio of Real to Fake Images: ' + str(nReal/nFake))
print('Ratio of Real to Total Images: ' + str(nReal/(nFake+nReal)))

['X_train shape: ', (39720, 128, 128, 3)]
['Y_train shape: ', (39720, 2)]
['X_val shape: ', (4414, 128, 128, 3)]
['Y_val shape: ', (4414, 2)]
Number of Fake Images: 35563
Number of Real Images: 8571
Ratio of Real to Fake Images: 0.24100891375868178
Ratio of Real to Total Images: 0.19420401504508997


In [25]:
scpy.stats.describe(X)

DescribeResult(nobs=44134, minmax=(array([[[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       ...,

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]]], dtype=float32), array([[[1., 1., 1.],
        [1., 1., 1.],
        [1