In [1]:
import os
import cv2
import shutil
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Lambda, GlobalAveragePooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam
import keras_tuner as kt

In [2]:
data_dir = os.path.join('data','FF++')
output_dir = os.path.join('data','FF++_split_frames')
img_size = (224,224)
batch_size = 32

# Load Datasets

In [3]:
train_raw = keras.utils.image_dataset_from_directory(
    os.path.join(output_dir, 'train'),
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary')

Found 1400 files belonging to 2 classes.


In [4]:
val_raw = keras.utils.image_dataset_from_directory(
    os.path.join(output_dir, 'val'),
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary')

Found 300 files belonging to 2 classes.


In [5]:
test_raw = keras.utils.image_dataset_from_directory(
    os.path.join(output_dir, 'test'),
    image_size=img_size,
    batch_size=batch_size,
    label_mode='binary', 
    shuffle=False)

Found 300 files belonging to 2 classes.


In [6]:
file_names = test_raw.file_paths
class_names = test_raw.class_names

In [7]:
# autotune automatically decides how many batches to pre-fetch based on cpu, gpu, memory etc.
train = train_raw.prefetch(tf.data.AUTOTUNE)
val = val_raw.prefetch(tf.data.AUTOTUNE)
test = test_raw.prefetch(tf.data.AUTOTUNE)

In [8]:
y_true = np.concatenate([np.array(y) for x,y in test_raw], axis=0)

In [9]:
class_names

['fake', 'real']

In [10]:
def get_video_predictions(file_names, class_names, y_true, y_pred):
    video_pred_df = {'video':[], 'actual_class':[], 'predicted_class':[]}
    for i, file in enumerate(file_names):
        dirname = os.path.dirname(file)
        f = os.path.splitext(os.path.basename(file))[0][:-2]
        if f not in video_pred_df['video']:
            video_pred_df['video'].append(f)
            video_pred_df['actual_class'].append(class_names[int(y_true[i][0])])
            idxs = [file_names.index(os.path.join(dirname,f+f"_{c}.jpg")) for c in range(5)]
            majority_pred = 1 if sum(int(y_pred[idx][0]) for idx in idxs)>2.5 else 0
            video_pred_df['predicted_class'].append(class_names[majority_pred])
    return pd.DataFrame(video_pred_df)

# Transfer Learning Model

## MobileNetV2

In [11]:
base_model = keras.applications.MobileNetV2(
    input_shape=img_size+(3,),
    include_top=False,
    weights='imagenet')
base_model.trainable = False

In [12]:
model = Sequential()
model.add(Input(shape=img_size+(3,)))
model.add(Lambda(keras.applications.mobilenet_v2.preprocess_input))
model.add(base_model),
model.add(GlobalAveragePooling2D()),
model.add(Dropout(0.3)),
model.add(Dense(1, activation='sigmoid'))
model.summary()




In [13]:
model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='binary_crossentropy',
    metrics=['accuracy'])

In [14]:
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
model.fit(train, validation_data=val,
          epochs=500, callbacks=[estop],
          verbose=1)

Epoch 1/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 588ms/step - accuracy: 0.5079 - loss: 0.7788 - val_accuracy: 0.6133 - val_loss: 0.6678
Epoch 2/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 520ms/step - accuracy: 0.5950 - loss: 0.6842 - val_accuracy: 0.6333 - val_loss: 0.6596
Epoch 3/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 537ms/step - accuracy: 0.6200 - loss: 0.6584 - val_accuracy: 0.6367 - val_loss: 0.6554
Epoch 4/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 528ms/step - accuracy: 0.6621 - loss: 0.6264 - val_accuracy: 0.6067 - val_loss: 0.6545
Epoch 5/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 521ms/step - accuracy: 0.6721 - loss: 0.6157 - val_accuracy: 0.6567 - val_loss: 0.6550
Epoch 6/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 513ms/step - accuracy: 0.6786 - loss: 0.6011 - val_accuracy: 0.6367 - val_loss: 0.6584
Epoch 7/500
[1m

<keras.src.callbacks.history.History at 0x285f5455df0>

In [16]:
test_loss, test_accuracy = model.evaluate(test, verbose=0)
print("Test Accuracy:", test_accuracy)
print("Test Loss:", test_loss)

Test Accuracy: 0.6100000143051147
Test Loss: 0.7111378908157349


In [17]:
predictions = model.predict(test, verbose=0)

In [18]:
y_pred = np.array([[1] if pred[0]>=0.5 else [0] for pred in predictions])

In [19]:
video_preds = get_video_predictions(file_names, class_names, y_true, y_pred)
video_preds

Unnamed: 0,video,actual_class,predicted_class
0,01_11__meeting_serious__9OM3VE0Y,fake,real
1,01_12__outside_talking_pan_laughing__TNI7KUZ6,fake,fake
2,02_03__walking_outside_cafe_disgusted__QH3Y0IG0,fake,fake
3,02_07__meeting_serious__1JCLEEBQ,fake,real
4,02_09__kitchen_pan__HIH8YA82,fake,fake
5,02_15__walking_and_outside_surprised__I8G2LWD1,fake,fake
6,03_06__podium_speech_happy__83ABVHC3,fake,real
7,03_07__walk_down_hall_angry__IFSURI9X,fake,fake
8,03_09__outside_talking_still_laughing__RCETIXYL,fake,fake
9,03_09__secret_conversation__RCETIXYL,fake,fake


In [20]:
accuracy = len(video_preds[video_preds['actual_class']==video_preds['predicted_class']])/len(video_preds)
print(accuracy)

0.6166666666666667


## MobileNetV2 Improved - Data Augmentation

In [21]:
base_model = keras.applications.MobileNetV2(
    input_shape=img_size+(3,),
    include_top=False,
    weights='imagenet')
base_model.trainable = False
# # unfreezing a few of the last layers
# base_model.trainable = True
# for layer in base_model.layers[:-30]:
#     layer.trainable = False

In [22]:
# adding data augmentation for make model more robust
data_augmentation = Sequential(
    [
        keras.layers.RandomFlip('horizontal'),
        keras.layers.RandomRotation(0.1),
        keras.layers.RandomZoom(0.1),
        keras.layers.RandomBrightness(factor=0.2)
    ]
)

In [23]:
model = Sequential()
model.add(Input(shape=img_size+(3,)))
model.add(data_augmentation)
model.add(Lambda(keras.applications.mobilenet_v2.preprocess_input))
model.add(base_model),
model.add(GlobalAveragePooling2D()),
model.add(Dropout(0.3)),
model.add(Dense(1, activation='sigmoid'))
model.summary()

In [24]:
model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='binary_crossentropy',
    metrics=['accuracy'])

In [25]:
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
# mcheckpt = ModelCheckpoint("best_baseline_mobilenetv2_model.keras", save_best_only=True)
# lrplateau = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2)
# model.fit(train, validation_data=val,
#           epochs=500, callbacks=[estop, mcheckpt, lrplateau],
#           verbose=1)
model.fit(train, validation_data=val,
          epochs=500, callbacks=[estop],
          verbose=1)

Epoch 1/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 416ms/step - accuracy: 0.4914 - loss: 0.7923 - val_accuracy: 0.5900 - val_loss: 0.6693
Epoch 2/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 581ms/step - accuracy: 0.5671 - loss: 0.7179 - val_accuracy: 0.5800 - val_loss: 0.6634
Epoch 3/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 572ms/step - accuracy: 0.5493 - loss: 0.7352 - val_accuracy: 0.6233 - val_loss: 0.6575
Epoch 4/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 574ms/step - accuracy: 0.5700 - loss: 0.7176 - val_accuracy: 0.6100 - val_loss: 0.6574
Epoch 5/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 587ms/step - accuracy: 0.5807 - loss: 0.6884 - val_accuracy: 0.6533 - val_loss: 0.6482
Epoch 6/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 575ms/step - accuracy: 0.5857 - loss: 0.7002 - val_accuracy: 0.6533 - val_loss: 0.6467
Epoch 7/500
[1m

<keras.src.callbacks.history.History at 0x2858b4c3290>

In [26]:
test_loss, test_accuracy = model.evaluate(test, verbose=0)
print("Test Accuracy:", test_accuracy)
print("Test Loss:", test_loss)

Test Accuracy: 0.5666666626930237
Test Loss: 0.7065443396568298


## MobileNetV2 Improved - Last layers of base model unfrozen

In [27]:
base_model = keras.applications.MobileNetV2(
    input_shape=img_size+(3,),
    include_top=False,
    weights='imagenet')
# unfreezing a few of the last layers
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

In [28]:
# adding data augmentation for make model more robust
data_augmentation = Sequential(
    [
        keras.layers.RandomFlip('horizontal'),
        keras.layers.RandomRotation(0.1),
        keras.layers.RandomZoom(0.1),
        keras.layers.RandomBrightness(factor=0.2)
    ]
)

In [29]:
model = Sequential()
model.add(Input(shape=img_size+(3,)))
model.add(data_augmentation)
model.add(Lambda(keras.applications.mobilenet_v2.preprocess_input))
model.add(base_model),
model.add(GlobalAveragePooling2D()),
model.add(Dropout(0.3)),
model.add(Dense(1, activation='sigmoid', kernel_regularizer=keras.regularizers.l2(1e-4)))
model.summary()

In [30]:
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy'])

In [31]:
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
lrplateau = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2)
model.fit(train, validation_data=val,
          epochs=500, callbacks=[estop, lrplateau],
          verbose=1)

Epoch 1/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 560ms/step - accuracy: 0.4929 - loss: 0.7872 - val_accuracy: 0.5033 - val_loss: 0.7483 - learning_rate: 1.0000e-05
Epoch 2/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 645ms/step - accuracy: 0.5400 - loss: 0.7317 - val_accuracy: 0.5000 - val_loss: 0.7539 - learning_rate: 1.0000e-05
Epoch 3/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 642ms/step - accuracy: 0.5507 - loss: 0.7155 - val_accuracy: 0.5033 - val_loss: 0.7634 - learning_rate: 1.0000e-05
Epoch 4/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 675ms/step - accuracy: 0.5657 - loss: 0.7084 - val_accuracy: 0.5033 - val_loss: 0.7652 - learning_rate: 2.0000e-06
Epoch 5/500
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 657ms/step - accuracy: 0.5657 - loss: 0.6959 - val_accuracy: 0.5067 - val_loss: 0.7627 - learning_rate: 2.0000e-06
Epoch 6/500
[1m44/44[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x2862c32fb30>

In [32]:
test_loss, test_accuracy = model.evaluate(test, verbose=0)
print("Test Accuracy:", test_accuracy)
print("Test Loss:", test_loss)

Test Accuracy: 0.5066666603088379
Test Loss: 0.7620180249214172


## Finding Best dropout, l2 regularization parameter, and learing rate 

In [27]:
def build_model(hp):
    base_model = keras.applications.MobileNetV2(
        input_shape=img_size+(3,),
        include_top=False,
        weights='imagenet')
    # unfreezing a few of the last layers
    base_model.trainable = True
    for layer in base_model.layers[:-30]:
        layer.trainable = False
    # adding data augmentation for make model more robust
    data_augmentation = Sequential(
        [
            keras.layers.RandomFlip('horizontal'),
            keras.layers.RandomRotation(0.1),
            keras.layers.RandomZoom(0.1),
            keras.layers.RandomBrightness(factor=0.2)
        ]
    )
    model = Sequential()
    model.add(Input(shape=img_size+(3,)))
    model.add(data_augmentation)
    model.add(Lambda(keras.applications.mobilenet_v2.preprocess_input))
    model.add(base_model),
    model.add(GlobalAveragePooling2D()),
    model.add(Dropout(hp.Float('dropout', min_value=0.2, max_value=0.6, step=0.1))),
    model.add(Dense(1, activation='sigmoid', 
                    kernel_regularizer=keras.regularizers.l2(
                        hp.Choice('l2_regularizer_param', values=[1e-6, 1e-5, 1e-4, 1e-3]))))

    model.compile(
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-6, 1e-5, 1e-4, 1e-3])),
        loss='binary_crossentropy',
        metrics=['accuracy'])

    return model

In [28]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=15,
    executions_per_trial=1,
    directory='tuning',
    project_name='params')

Reloading Tuner from tuning\params\tuner0.json


In [31]:
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
lrplateau = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2)
tuner.search(train, validation_data=val, epochs=500, callbacks=[estop, lrplateau])

In [32]:
best_model = tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [33]:
best_hps = tuner.get_best_hyperparameters(1)[0]
print(best_hps.values)

{'dropout': 0.30000000000000004, 'l2_regularizer_param': 0.001, 'learning_rate': 0.001}


In [34]:
best_model.compile(optimizer=Adam(learning_rate=best_hps['learning_rate']),
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

In [35]:
best_loss, best_accuracy = best_model.evaluate(test, verbose=0)
print('Best accuracy:', best_accuracy)
print('Best loss:', best_loss)

Best accuracy: 0.6566666960716248
Best loss: 1.7716784477233887


**Tuned MobileNetV2-based transfer learning model gave a (frame-level) test accuracy of 65.7%.**

In [36]:
best_model.save('artifacts/tuned_mobilenetv2_model.keras')

In [37]:
predictions = best_model.predict(test)
y_pred = np.array([[1] if pred[0]>=0.5 else [0] for pred in predictions])
video_preds = get_video_predictions(file_names, class_names, y_true, y_pred)
video_preds

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 406ms/step


Unnamed: 0,video,actual_class,predicted_class
0,01_11__meeting_serious__9OM3VE0Y,fake,fake
1,01_12__outside_talking_pan_laughing__TNI7KUZ6,fake,fake
2,02_03__walking_outside_cafe_disgusted__QH3Y0IG0,fake,fake
3,02_07__meeting_serious__1JCLEEBQ,fake,fake
4,02_09__kitchen_pan__HIH8YA82,fake,fake
5,02_15__walking_and_outside_surprised__I8G2LWD1,fake,fake
6,03_06__podium_speech_happy__83ABVHC3,fake,fake
7,03_07__walk_down_hall_angry__IFSURI9X,fake,real
8,03_09__outside_talking_still_laughing__RCETIXYL,fake,fake
9,03_09__secret_conversation__RCETIXYL,fake,fake


In [38]:
accuracy = len(video_preds[video_preds['actual_class']==video_preds['predicted_class']])/len(video_preds)
print(accuracy)

0.65


**The video-level test accuracy achieved is 65%.**