In [1]:
!pip install --upgrade pip



In [2]:
!pip install tensorflow pandas numpy



In [3]:
import json
import os

import matplotlib.pyplot as plt
import numpy as np

import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import VGG16, MobileNetV2
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input, BatchNormalization
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy, F1Score
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.train import latest_checkpoint

In [4]:
model_name = "mobilenetv3small"
img_size = 256 # assume same for both width and height
batch_size = 8

data_dir = "data"
output_dir = "output"
# assume there is a json file of the same name inside these data subdirs.

checkpoint_dir = "checkpoints"
checkpoint_path = f"{checkpoint_dir}/{model_name}/" + "cp-{epoch:04d}.weights.h5"

In [5]:
df_train: pd.DataFrame = pd.read_csv(f"{data_dir}/uniface-ff-train.csv")
df_val: pd.DataFrame = pd.read_csv(f"{data_dir}/uniface-ff-test.csv")

In [6]:
df_train['label'] = df_train['label'].astype('str')
df_val['label'] = df_val['label'].astype('str')

In [7]:
def validate_image_paths(df) -> int:

    total: int = df.shape[0]

    for path in df['filepath']:
        if not os.path.exists(os.path.join(os.getcwd(), path)):
            #print("failed to validate: ", path)
            continue
        total -= 1
        
    return total
print("total failed validations: ", validate_image_paths(df_train))

total failed validations:  11794


In [8]:
train_datagen = ImageDataGenerator(
    rescale=(1./255),
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[-0.5, 0.5],
    rotation_range=0.2,
    shear_range=0.2,
    fill_mode='nearest'
)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    df_train,
    x_col = "filepath",
    y_col = "label",
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary',
    validate_filenames=True,
    verbose=1,
)
val_generator = val_datagen.flow_from_dataframe(
    df_val,
    x_col = "filepath",
    y_col = "label",
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='binary',
    validate_filenames=True
)

Found 22993 validated image filenames belonging to 1 classes.
Found 8944 validated image filenames belonging to 2 classes.




In [9]:
pretrained_model = MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=(img_size, img_size, 3)
)

pretrained_model.trainable = False

# technique to "stack" layers, starting with pretrain model's layers
inputs = Input(shape=(img_size, img_size, 3))

cl = pretrained_model(inputs, training=False)

cl = GlobalAveragePooling2D()(cl)
cl = Dense(1024, activation='relu')(cl)
cl = BatchNormalization()(cl)
cl = Dropout(0.4)(cl)
cl = Dense(1024, activation='relu')(cl)
cl = BatchNormalization()(cl)
cl = Dropout(0.4)(cl)

# this is the final layer; size must equal desired output size
outputs = Dense(1, activation='sigmoid')(cl)
model = Model(inputs, outputs)

model.summary(show_trainable=True)

  pretrained_model = MobileNetV2(


In [10]:
epochs = 10
learning_rate = 1e-6

model.compile(
    optimizer=Adam(learning_rate),
    loss=BinaryCrossentropy(from_logits=False),
    metrics=[BinaryAccuracy()]
)

In [11]:
checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(os.getcwd(), checkpoint_path),
    save_weights_only=True,
    verbose=1
)

In [12]:
latest = latest_checkpoint(os.path.join(os.getcwd(), checkpoint_path))
print(latest)
#model.load_weights(latest)

None


In [13]:
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=(train_generator.samples // train_generator.batch_size),
    validation_data=val_generator,
    validation_steps=(val_generator.samples // val_generator.batch_size),
    callbacks=[checkpoint_callback],
    verbose=1
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m2874/2874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step - binary_accuracy: 0.5174 - loss: 0.9797  
Epoch 1: saving model to D:\scout\Code\fakereal\models\checkpoints/mobilenetv3small/cp-0001.weights.h5
[1m2874/2874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m719s[0m 249ms/step - binary_accuracy: 0.5174 - loss: 0.9797 - val_binary_accuracy: 0.4942 - val_loss: 1.1121
Epoch 2/10
[1m   1/2874[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5:11[0m 108ms/step - binary_accuracy: 0.6250 - loss: 0.8070




Epoch 2: saving model to D:\scout\Code\fakereal\models\checkpoints/mobilenetv3small/cp-0002.weights.h5
[1m2874/2874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 36ms/step - binary_accuracy: 0.6250 - loss: 0.8070 - val_binary_accuracy: 0.4945 - val_loss: 1.1127
Epoch 3/10
[1m2874/2874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step - binary_accuracy: 0.5372 - loss: 0.8970  
Epoch 3: saving model to D:\scout\Code\fakereal\models\checkpoints/mobilenetv3small/cp-0003.weights.h5
[1m2874/2874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m520s[0m 181ms/step - binary_accuracy: 0.5372 - loss: 0.8970 - val_binary_accuracy: 0.4934 - val_loss: 1.0382
Epoch 4/10
[1m   1/2874[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:46[0m 100ms/step - binary_accuracy: 0.3750 - loss: 0.9877
Epoch 4: saving model to D:\scout\Code\fakereal\models\checkpoints/mobilenetv3small/cp-0004.weights.h5
[1m2874/2874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 35ms/step - binary_acc

In [14]:
model.evaluate(val_generator, verbose=1)

[1m1118/1118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 102ms/step - binary_accuracy: 0.5019 - loss: 0.9852


[0.9963750243186951, 0.49452146887779236]

In [15]:
model.save(f"{output_dir}/deepfake-{model_name}.keras")