In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import numpy as np
import numpy.random as rn
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

In [2]:
model_deepfake = keras.Sequential([
  layers.Conv2D(filters = 64, kernel_size = 4, strides = (1, 1), input_shape = (256, 256, 3), padding = 'same', activation = 'relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(filters = 32, kernel_size = 4, strides = (1, 1), padding = 'same', activation = 'relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(filters = 16, kernel_size = 4, strides = (1, 1), padding = 'same', activation = 'relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(units = 4096, activation = 'relu'),
  layers.Dropout(rate = 0.1),
  layers.Dense(units = 256, activation = 'relu'), 
  layers.Dense(units = 1)
])

In [4]:
model_deepfake.compile(optimizer = 'adam', 
                       metrics = keras.metrics.BinaryAccuracy(),
                       loss = keras.losses.BinaryCrossentropy())

In [5]:
model_deepfake.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 64)      3136      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 64)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 32)      32800     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 16)        8208      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 32, 32, 16)        0         
_________________________________________________________________
flatten (Flatten)            (None, 16384)             0

In [6]:
# Create a second function to randomly select image data from a file
def get_images(n: int, dtype: str='train', seed: int=None):
    '''
    Returns n randomly selected testing, training, or validation data.
    
    Takes ~13 sec / 100 iter with n = 100
    Takes ~118 sec / 1000 iter with n = 100
    '''

    # Make sure train param is valid
    if dtype not in ['train', 'valid', 'test']:
        raise Exception("dtype argument must be train, valid, or test.")
    
    # Load labeled dataframes
    PATHDIR = Path('data')
    df = pd.read_csv(PATHDIR / f'{dtype}.csv', header = 0).drop(['original_path', 'Unnamed: 0', 'label_str'], axis=1)

    
    # Get the number of files in the directory of interest
    n_files = {"train": 50000, "valid": 10000, "test": 10000}[dtype]
    
    # Make sure you don't want more pictures than we have
    if n > n_files:
        raise Exception(f'There are not {n} files in the {dtype} folder')
    
    # Create the paths to the data
    datapath = Path('.') / 'data' / 'real_vs_fake' / 'real-vs-fake' / dtype
    fakepath = datapath / 'fake'
    realpath = datapath / 'real'
    
    # Set a seed if present
    if seed is not None:
        rn.seed(seed)
    
    # Get n random ids
    sample_ids = rn.choice(df['id'].to_numpy(), size=n)
    
    # Get the labels and image paths from the ids
    sample_df = df.copy()
    sample_df = sample_df[sample_df['id'].isin(sample_ids)]

    return sample_df

In [7]:
# Create a function to prepare random images for training
def prep_for_train(sample_df):
    """
    Gets the images from the get_image family of functions into a format
    that the model can understand.
    """
    # Save the labels
    y = sample_df['label'].to_numpy()

    # Path to the data
    DATADIR = Path('data/') / 'real_vs_fake'/ 'real-vs-fake'

    # Load the sample images
    n = sample_df.shape[0]
    X = np.empty(shape=(n, 256, 256, 3))

    # Load in the images to be trained on
    for img_idx, img_path in enumerate(sample_df['path']):
        img = plt.imread(DATADIR / img_path)
        # plt.imshow(img)
        # plt.show()
        X[img_idx, :, :, :] = img / 255.0

    return X, y[np.newaxis].reshape(-1, 1)
        

In [8]:
# TRAIN THAT MODEL!
X, y = prep_for_train(get_images(1000, seed=69))
model_fit = model_deepfake.fit(X, y, epochs=5, batch_size=50, validation_data=prep_for_train(get_images(100, dtype='valid', seed=12)))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [20]:
model1 = model_fit

SyntaxError: invalid syntax (<ipython-input-6-f6b27b303abd>, line 1)

In [10]:
print(tf.__version__)

2.4.1


In [11]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 17056259349394787453
]


In [12]:
tf.config.list_physical_devices('GPU')

[]

In [13]:
!pip uninstall tensorflow

Found existing installation: tensorflow 2.4.1
Uninstalling tensorflow-2.4.1:
  Would remove:
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/bin/estimator_ckpt_converter
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/bin/import_pb_to_tensorboard
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/bin/saved_model_cli
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/bin/tensorboard
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/bin/tf_upgrade_v2
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/bin/tflite_convert
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/bin/toco
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/bin/toco_from_protos
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/lib/python3.8/site-packages/tensorflow-2.4.1.dist-info/*
    /home/dom/Documents/School/Stat430/deepFakeCNN/venv/lib/python3.8/site-packages/tensorflow/*
Proceed (y/n)? ^C
[31mERROR: Operation cancelled by user[0m


In [None]:
|