In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator, DirectoryIterator

#from sklearn.model_selection import cross_val_score
#from sklearn.model_selection import KFold
import numpy as np
#import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
from pathlib import Path

# Helper functions
# from helper_functions import imgs_to_numpy

# Styles for miles
style.use('ggplot')

In [38]:
def GimmeImages(data_split, batch_size = 100):
    
    DATADIR = Path('data/') / 'real_vs_fake' / 'real-vs-fake' / data_split
    
    if data_split == "test":
        imgs = DirectoryIterator(DATADIR,
                             image_data_generator = ImageDataGenerator(rescale = 1./255),
                             color_mode = 'rgb', 
                             classes = ["real", "fake"],
                             target_size = (256, 256),
                             batch_size = batch_size,
                             class_mode = 'binary',
                             shuffle = False)
    else:
        imgs = DirectoryIterator(DATADIR,
                                 image_data_generator = ImageDataGenerator(rescale = 1./255,
                                                                           horizontal_flip = True,
                                                                           rotation_range = 20),
                                 color_mode = 'rgb', 
                                 classes = ["real", "fake"],
                                 target_size = (256, 256),
                                 batch_size = batch_size,
                                 class_mode = 'binary',
                                 shuffle = True)
    return imgs

In [39]:
train_imgs = GimmeImages('train')
val_imgs   = GimmeImages('valid')
test_imgs  = GimmeImages('test')

Found 100000 images belonging to 2 classes.
Found 20000 images belonging to 2 classes.
Found 20000 images belonging to 2 classes.


In [62]:
model_deepfake = keras.Sequential([
  keras.Input(shape = (256, 256, 3)),
  layers.Conv2D(filters = 64, kernel_size = 4, strides = (1, 1), padding = 'same', activation = 'relu'),
  layers.MaxPooling2D((4, 4)),
  layers.Conv2D(filters = 32, kernel_size = 4, strides = (1, 1), padding = 'same', activation = 'relu'),
  layers.MaxPooling2D((4, 4)),
  layers.Conv2D(filters = 16, kernel_size = 4, strides = (1, 1), padding = 'same', activation = 'relu'),
  layers.MaxPooling2D((4, 4)),
  layers.Flatten(),
  layers.Dense(units = 4096, activation = 'relu'),
  layers.Dropout(rate = 0.1),
  layers.Dense(units = 128, activation = 'relu'), 
  layers.Dense(units = 1, activation = 'softmax')
])

In [63]:
model_deepfake.compile(optimizer = SGD(lr = 1e-3), 
                       metrics = [keras.metrics.BinaryAccuracy(), 
                                  keras.metrics.Precision(), 
                                  keras.metrics.Recall()],
                       loss = keras.losses.BinaryCrossentropy())

In [64]:
model_deepfake.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_29 (Conv2D)           (None, 256, 256, 64)      3136      
_________________________________________________________________
max_pooling2d_29 (MaxPooling (None, 64, 64, 64)        0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 64, 64, 32)        32800     
_________________________________________________________________
max_pooling2d_30 (MaxPooling (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 16, 16, 16)        8208      
_________________________________________________________________
max_pooling2d_31 (MaxPooling (None, 4, 4, 16)          0         
_________________________________________________________________
flatten_10 (Flatten)         (None, 256)              

In [65]:
model_fit = model_deepfake.fit(train_imgs,
                               epochs = 1,
                               validation_data = val_imgs)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 1000 steps, validate for 200 steps
 182/1000 [====>.........................] - ETA: 4:54 - loss: 7.6667 - binary_accuracy: 0.4972 - precision_11: 0.4972 - recall_11: 1.0000

KeyboardInterrupt: 

## James' Model

In [60]:
model = keras.Sequential([
  layers.Conv2D(16, 3, padding='same', activation='relu', input_shape=(256, 256, 3)),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='softmax'),
  layers.Dense(1)
])
model.compile(optimizer = SGD(lr = 1e-6), 
              metrics = [keras.metrics.BinaryAccuracy(), 
                   keras.metrics.Precision(), 
                   keras.metrics.Recall()],
              loss = keras.losses.BinaryCrossentropy())

In [61]:
model_fit = model.fit(train_imgs,
                      epochs = 1,
                      validation_data = val_imgs)

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 1000 steps, validate for 200 steps

KeyboardInterrupt: 

## Need 2.4.1

In [14]:
tf.keras.preprocessing.image_dataset_from_directory(
    directory="data",
    labels="inferred",
    label_mode="int",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(256, 256),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
)

AttributeError: module 'tensorflow_core.keras.preprocessing' has no attribute 'image_dataset_from_directory'

## Path attempts, no need currently

In [None]:
# Save images as numpy array
for dsplit in ('train', 'valid', 'test'):
    imgs_to_numpy(dsplit=dsplit)

In [None]:
# df['path'].tail()
img_path = 'train/fake/73ILM40K3Z.jpg'
img_idx = 99999
img = plt.imread(DATADIR / img_path)
X[img_idx, :, :, :] = img / 255.0

In [34]:
dsplit = 'train'

# Check that the split exists
if dsplit not in ('train', 'valid', 'test'):
    raise Exception('dsplit must be `train`, `test`, or `valid`')

# Load labeled dataframes
PATHDIR = Path('data')
DATADIR = Path('data/') / 'real_vs_fake' / 'real-vs-fake'
SAVEPATH = PATHDIR / 'data_array'

df = pd.read_csv(PATHDIR / f'{dsplit}.csv', header=0).drop(
    ['original_path', 'Unnamed: 0', 'label_str'], axis=1)

# Create containers for the image data
n = df.shape[0]
X = np.empty(shape=(n, 256, 256, 3))
y = df['label'].to_numpy()[np.newaxis].reshape(-1, 1)

In [None]:
# X[img_idx, :, :, :]
with open(SAVEPATH / f'X_{dsplit}.npy', 'wb') as file:
    np.save(file, X)

In [None]:
with open(SAVEPATH / f'y_{dsplit}.npy', 'wb') as file:
        np.save(file, y)

In [None]:
X = np.random.randn(n, 256, 256, 3)

In [None]:
# Load in the training data
PATHTRAIN = Path('data') / 'data_array'
with tf.device('/device:GPU:0'):
    with open(PATHTRAIN / 'X_train.npy', 'rb') as f:
        X = np.load(f)
    
    with open(PATHTRAIN / 'y_train.npy', 'rb') as f:
        y = np.load(f)
print(X.shape)
print(y.shape)

In [None]:
model_fit.history['binary_accuracy']

In [None]:
plt.plot(model_fit.history['binary_accuracy'], 'g.-', label = 'Binary Accuracy')
plt.plot(model_fit.history['val_binary_accuracy'], 'r.-', label = 'Val Binary Accuracy')
plt.legend()
plt.title('Binary Accuracy over Time')
plt.show()

In [None]:
plt.plot(model_fit.history['precision'], 'g.-', label = 'Precision')
plt.plot(model_fit.history['val_precision'], 'r.-', label = 'Val Precision')
plt.legend()
plt.title('Precision over Time')
plt.show()