In [2]:
import os
import util
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout, Lambda, GlobalAveragePooling2D
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam
import keras_tuner as kt
from sklearn.metrics import accuracy_score

In [3]:
base_dir = 'artifacts'
data = os.path.join(base_dir, 'split_3d_data.pkl')
batch_size = 32

# Load Data

In [4]:
X_train, X_val, X_test, y_train, y_val, y_test = util.load_split_3d_data(data)

In [5]:
X_train.shape, X_test.shape, X_val.shape

((280, 16, 224, 224, 3), (60, 16, 224, 224, 3), (60, 16, 224, 224, 3))

In [6]:
num_frames, img_size = X_train.shape[1], X_train.shape[2:4]
print(num_frames, img_size)

16 (224, 224)


In [7]:
y_train.shape, y_test.shape, y_val.shape

((280,), (60,), (60,))

## One-Hot Encoding of Target

In [8]:
labels = pd.factorize(y_val)[1]
print(labels)

['real' 'fake']


In [9]:
y_train, y_val, y_test = util.one_hot_encoding(num_categories=len(labels),
                                               split=3,
                                               y_train=pd.factorize(y_train)[0], 
                                               y_val=pd.factorize(y_val)[0], 
                                               y_test=pd.factorize(y_test)[0])

In [10]:
y_train.shape, y_test.shape, y_val.shape

((280, 2), (60, 2), (60, 2))

## Converting 3D to 2D Data

In [11]:
num_test = X_test.shape[0]

In [12]:
train_frames, val_frames, test_frames = util.convert_3d_to_2d(split=3,
                                                         train=(X_train, y_train),
                                                         val=(X_val,y_val),
                                                         test=(X_test, y_test))
X_train_frames, y_train_frames = train_frames
X_val_frames, y_val_frames = val_frames
X_test_frames, y_test_frames = test_frames

In [13]:
X_train_frames.shape, X_val_frames.shape, X_test_frames.shape

((4480, 224, 224, 3), (960, 224, 224, 3), (960, 224, 224, 3))

In [14]:
y_train_frames.shape, y_val_frames.shape, y_test_frames.shape

((4480, 2), (960, 2), (960, 2))

# Transfer Learning Model

## MobileNetV2 - Base Model Trainable=False

In [15]:
base_model = keras.applications.MobileNetV2(
    input_shape=img_size+(3,),
    include_top=False,
    weights='imagenet')
base_model.trainable = False

In [16]:
model1 = Sequential()
model1.add(Input(shape=img_size+(3,)))
model1.add(Lambda(keras.applications.mobilenet_v2.preprocess_input))
model1.add(base_model),
model1.add(GlobalAveragePooling2D()),
model1.add(Dense(128, activation='relu'))
model1.add(Dropout(0.5)),
model1.add(Dense(2, activation='softmax'))
model1.summary()




In [17]:
model1.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [18]:
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
model1.fit(X_train_frames, y_train_frames, 
           validation_data=(X_val_frames, y_val_frames),
           epochs=500, batch_size=batch_size,
           callbacks=[estop], verbose=1)

Epoch 1/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 277ms/step - accuracy: 0.6355 - loss: 0.6879 - val_accuracy: 0.6156 - val_loss: 0.6582
Epoch 2/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 268ms/step - accuracy: 0.7143 - loss: 0.5389 - val_accuracy: 0.6458 - val_loss: 0.7318
Epoch 3/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 261ms/step - accuracy: 0.7295 - loss: 0.5039 - val_accuracy: 0.6156 - val_loss: 0.7900
Epoch 4/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 263ms/step - accuracy: 0.7527 - loss: 0.4821 - val_accuracy: 0.6333 - val_loss: 0.7786
Epoch 5/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 265ms/step - accuracy: 0.7656 - loss: 0.4615 - val_accuracy: 0.6458 - val_loss: 0.8154
Epoch 6/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 266ms/step - accuracy: 0.7717 - loss: 0.4421 - val_accuracy: 0.5958 - val_loss: 0.8371
Epoc

<keras.src.callbacks.history.History at 0x1b46e0576b0>

In [19]:
test_loss1, test_accuracy1 = model1.evaluate(X_test_frames, y_test_frames, verbose=0)
print("Test Accuracy:", test_accuracy1)
print("Test Loss:", test_loss1)

Test Accuracy: 0.5145833492279053
Test Loss: 0.7304467558860779


In [23]:
frame_pred_probs = model1.predict(X_test_frames, verbose=0).reshape(num_test, num_frames, -1)
frame_pred_labels = np.argmax(frame_pred_probs, axis=2)
print(frame_pred_labels.shape)
video_pred_labels = (np.sum(frame_pred_labels, axis=1) > (num_frames/2)).astype(int)
print(video_pred_labels.shape)
video_test_accuracy1 = accuracy_score(np.argmax(y_test, axis=1), video_pred_labels)
print("Video-level Test accuracy:", video_test_accuracy1)

(60, 16)
(60,)
Video-level Test accuracy: 0.4666666666666667


## MobileNetV2 Improved - Data Augmentation

In [24]:
base_model = keras.applications.MobileNetV2(
    input_shape=img_size+(3,),
    include_top=False,
    weights='imagenet')
base_model.trainable = False
# # unfreezing a few of the last layers
# base_model.trainable = True
# for layer in base_model.layers[:-30]:
#     layer.trainable = False

In [25]:
# adding data augmentation for make model more robust
data_augmentation = Sequential([
    keras.layers.RandomFlip("horizontal"),
    keras.layers.RandomRotation(0.1),
    keras.layers.RandomZoom(0.1),
    keras.layers.RandomBrightness(0.2),
    keras.layers.RandomContrast(0.2)
])

In [26]:
model2 = Sequential()
model2.add(Input(shape=img_size+(3,)))
model2.add(data_augmentation)
model2.add(Lambda(keras.applications.mobilenet_v2.preprocess_input))
model2.add(base_model),
model2.add(GlobalAveragePooling2D()),
model2.add(Dense(128, activation='relu'))
model2.add(Dropout(0.5)),
model2.add(Dense(2, activation='softmax'))
model2.summary()

In [27]:
model2.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [28]:
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
model2.fit(X_train_frames, y_train_frames, 
           validation_data=(X_val_frames, y_val_frames),
           epochs=500, batch_size=batch_size,
           callbacks=[estop], verbose=1)

Epoch 1/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 337ms/step - accuracy: 0.5571 - loss: 0.7242 - val_accuracy: 0.5292 - val_loss: 0.6938
Epoch 2/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 327ms/step - accuracy: 0.6344 - loss: 0.6419 - val_accuracy: 0.5854 - val_loss: 0.6870
Epoch 3/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 322ms/step - accuracy: 0.6614 - loss: 0.6117 - val_accuracy: 0.5750 - val_loss: 0.6849
Epoch 4/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 327ms/step - accuracy: 0.6681 - loss: 0.5977 - val_accuracy: 0.5740 - val_loss: 0.6891
Epoch 5/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 328ms/step - accuracy: 0.6875 - loss: 0.5744 - val_accuracy: 0.6073 - val_loss: 0.7139
Epoch 6/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 329ms/step - accuracy: 0.6817 - loss: 0.5699 - val_accuracy: 0.6146 - val_loss: 0.6732
Epoc

<keras.src.callbacks.history.History at 0x1b4913fbb00>

In [29]:
test_loss2, test_accuracy2 = model2.evaluate(X_test_frames, y_test_frames, verbose=0)
print("Test Accuracy:", test_accuracy2)
print("Test Loss:", test_loss2)

Test Accuracy: 0.5333333611488342
Test Loss: 0.7877933382987976


In [30]:
frame_pred_probs = model2.predict(X_test_frames, verbose=0).reshape(num_test, num_frames, -1)
frame_pred_labels = np.argmax(frame_pred_probs, axis=2)
print(frame_pred_labels.shape)
video_pred_labels = (np.sum(frame_pred_labels, axis=1) > (num_frames/2)).astype(int)
print(video_pred_labels.shape)
video_test_accuracy2 = accuracy_score(np.argmax(y_test, axis=1), video_pred_labels)
print("Video-level Test accuracy:", video_test_accuracy2)

(60, 16)
(60,)
Video-level Test accuracy: 0.5333333333333333


## MobileNetV2 Improved - Last layers of base model unfrozen & Data Augmentation

In [31]:
base_model = keras.applications.MobileNetV2(
    input_shape=img_size+(3,),
    include_top=False,
    weights='imagenet')
# unfreezing a few of the last layers
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

In [32]:
# adding data augmentation for make model more robust
data_augmentation = Sequential([
    keras.layers.RandomFlip("horizontal"),
    keras.layers.RandomRotation(0.1),
    keras.layers.RandomZoom(0.1),
    keras.layers.RandomBrightness(0.2),
    keras.layers.RandomContrast(0.2)
])

In [34]:
model3 = Sequential()
model3.add(Input(shape=img_size+(3,)))
model3.add(data_augmentation)
model3.add(Lambda(keras.applications.mobilenet_v2.preprocess_input))
model3.add(base_model)
model3.add(GlobalAveragePooling2D())
model3.add(Dense(128, activation='relu'))
model3.add(Dropout(0.5))
model3.add(Dense(2, activation='softmax'))
model3.summary()

In [35]:
model3.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [36]:
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
model3.fit(X_train_frames, y_train_frames, 
           validation_data=(X_val_frames, y_val_frames),
           epochs=500, batch_size=batch_size,
           callbacks=[estop], verbose=1)

Epoch 1/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 391ms/step - accuracy: 0.5295 - loss: 0.8435 - val_accuracy: 0.4948 - val_loss: 0.7671
Epoch 2/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 368ms/step - accuracy: 0.5685 - loss: 0.7329 - val_accuracy: 0.5156 - val_loss: 0.7658
Epoch 3/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 369ms/step - accuracy: 0.5973 - loss: 0.6910 - val_accuracy: 0.5344 - val_loss: 0.7504
Epoch 4/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 368ms/step - accuracy: 0.6408 - loss: 0.6457 - val_accuracy: 0.5531 - val_loss: 0.7511
Epoch 5/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 375ms/step - accuracy: 0.6413 - loss: 0.6296 - val_accuracy: 0.5677 - val_loss: 0.7431
Epoch 6/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 381ms/step - accuracy: 0.6750 - loss: 0.6033 - val_accuracy: 0.5708 - val_loss: 0.7389
Epoc

<keras.src.callbacks.history.History at 0x1b497587bf0>

In [37]:
test_loss3, test_accuracy3 = model3.evaluate(X_test_frames, y_test_frames, verbose=0)
print("Test Accuracy:", test_accuracy3)
print("Test Loss:", test_loss3)

Test Accuracy: 0.6229166388511658
Test Loss: 0.742871880531311


In [38]:
frame_pred_probs = model3.predict(X_test_frames, verbose=0).reshape(num_test, num_frames, -1)
frame_pred_labels = np.argmax(frame_pred_probs, axis=2)
print(frame_pred_labels.shape)
video_pred_labels = (np.sum(frame_pred_labels, axis=1) > (num_frames/2)).astype(int)
print(video_pred_labels.shape)
video_test_accuracy3 = accuracy_score(np.argmax(y_test, axis=1), video_pred_labels)
print("Video-level Test accuracy:", video_test_accuracy3)

(60, 16)
(60,)
Video-level Test accuracy: 0.6166666666666667


In [39]:
frame_acc = [test_accuracy1, test_accuracy2, test_accuracy3]
video_acc = [video_test_accuracy1, video_test_accuracy2, video_test_accuracy3]
i = np.argmax(frame_acc)
j = np.argmax(video_acc)
print(i,j)

2 2


In [40]:
models = [model1, model2, model3]
models[j].save('artifacts/mobilenetv2.keras')

## XceptionNet

In [14]:
base_model = keras.applications.Xception(
    input_shape=img_size+(3,),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step 


In [15]:
model1 = Sequential(
    [
        Input(shape=img_size+(3,)),
        Lambda(keras.applications.xception.preprocess_input),
        base_model,
        GlobalAveragePooling2D(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ]
)
model1.summary()




In [17]:
model1.compile(optimizer=Adam(learning_rate=1e-5),
               loss='categorical_crossentropy', metrics=['accuracy'])
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
model1.fit(X_train_frames, y_train_frames, 
           validation_data=(X_val_frames, y_val_frames),
           epochs=500, batch_size=batch_size,
           callbacks=[estop], verbose=1)

Epoch 1/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 1s/step - accuracy: 0.5065 - loss: 0.7461 - val_accuracy: 0.4917 - val_loss: 0.7093
Epoch 2/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 1s/step - accuracy: 0.5507 - loss: 0.7045 - val_accuracy: 0.5292 - val_loss: 0.7040
Epoch 3/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 1s/step - accuracy: 0.5850 - loss: 0.6739 - val_accuracy: 0.5427 - val_loss: 0.7001
Epoch 4/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 1s/step - accuracy: 0.6074 - loss: 0.6565 - val_accuracy: 0.5531 - val_loss: 0.7016
Epoch 5/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 1s/step - accuracy: 0.6283 - loss: 0.6414 - val_accuracy: 0.5562 - val_loss: 0.7022
Epoch 6/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 1s/step - accuracy: 0.6411 - loss: 0.6303 - val_accuracy: 0.5604 - val_loss: 0.7007
Epoch 7/500
[1m

<keras.src.callbacks.history.History at 0x13d1bc43cb0>

In [18]:
test_loss1, test_accuracy1 = model1.evaluate(X_test_frames, y_test_frames, verbose=0)
print("Test Accuracy:", test_accuracy1)
print("Test Loss:", test_loss1)

Test Accuracy: 0.5239583253860474
Test Loss: 0.7086097598075867


In [19]:
frame_pred_probs = model1.predict(X_test_frames, verbose=0).reshape(num_test, num_frames, -1)
frame_pred_labels = np.argmax(frame_pred_probs, axis=2)
print(frame_pred_labels.shape)
video_pred_labels = (np.sum(frame_pred_labels, axis=1) > (num_frames/2)).astype(int)
print(video_pred_labels.shape)
video_test_accuracy1 = accuracy_score(np.argmax(y_test, axis=1), video_pred_labels)
print("Video-level Test accuracy:", video_test_accuracy1)

(60, 16)
(60,)
Video-level Test accuracy: 0.5


## XceptionNet Improved - Last layers of base model unfrozen & Data Augmentation

In [20]:
base_model = keras.applications.Xception(
    input_shape=img_size+(3,),
    include_top=False,
    weights='imagenet'
)
# unfreezing a few of the last layers
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

In [21]:
# adding data augmentation for make model more robust
data_augmentation = Sequential([
    keras.layers.RandomFlip("horizontal"),
    keras.layers.RandomRotation(0.1),
    keras.layers.RandomZoom(0.1),
    keras.layers.RandomBrightness(0.2),
    keras.layers.RandomContrast(0.2)
])

In [22]:
model2 = Sequential()
model2.add(Input(shape=img_size+(3,)))
model2.add(data_augmentation)
model2.add(Lambda(keras.applications.xception.preprocess_input))
model2.add(base_model)
model2.add(GlobalAveragePooling2D())
model2.add(Dense(128, activation='relu'))
model2.add(Dropout(0.5))
model2.add(Dense(2, activation='softmax'))
model2.summary()

In [23]:
model2.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [24]:
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
model2.fit(X_train_frames, y_train_frames, 
           validation_data=(X_val_frames, y_val_frames),
           epochs=500, batch_size=batch_size,
           callbacks=[estop], verbose=1)

Epoch 1/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m223s[0m 2s/step - accuracy: 0.5321 - loss: 0.6957 - val_accuracy: 0.5656 - val_loss: 0.7131
Epoch 2/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 2s/step - accuracy: 0.5855 - loss: 0.6744 - val_accuracy: 0.5396 - val_loss: 0.7187
Epoch 3/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 2s/step - accuracy: 0.6172 - loss: 0.6494 - val_accuracy: 0.5594 - val_loss: 0.7160
Epoch 4/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m216s[0m 2s/step - accuracy: 0.6542 - loss: 0.6234 - val_accuracy: 0.5562 - val_loss: 0.7197
Epoch 5/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 2s/step - accuracy: 0.6770 - loss: 0.5945 - val_accuracy: 0.5750 - val_loss: 0.7271
Epoch 6/500
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 2s/step - accuracy: 0.6931 - loss: 0.5765 - val_accuracy: 0.5927 - val_loss: 0.7265
Epoch 6: early s

<keras.src.callbacks.history.History at 0x13d1ee3b410>

In [25]:
test_loss2, test_accuracy2 = model2.evaluate(X_test_frames, y_test_frames, verbose=0)
print("Test Accuracy:", test_accuracy2)
print("Test Loss:", test_loss2)

Test Accuracy: 0.5260416865348816
Test Loss: 0.7080363631248474


In [26]:
frame_pred_probs = model2.predict(X_test_frames, verbose=0).reshape(num_test, num_frames, -1)
frame_pred_labels = np.argmax(frame_pred_probs, axis=2)
print(frame_pred_labels.shape)
video_pred_labels = (np.sum(frame_pred_labels, axis=1) > (num_frames/2)).astype(int)
print(video_pred_labels.shape)
video_test_accuracy2 = accuracy_score(np.argmax(y_test, axis=1), video_pred_labels)
print("Video-level Test accuracy:", video_test_accuracy2)

(60, 16)
(60,)
Video-level Test accuracy: 0.55


In [27]:
model2.save('artifacts/xceptionnet.keras')

**Best performance was given my MobileNetV2 with last 30 layers unfrozen and data augmentation, achieving ~62% accuracy, while XcpetionNet only achieved 55% accuracy under the same conditions.**