In [1]:
import os
import util
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Input, Rescaling, Conv2D, Conv2DTranspose, Dense, GlobalAveragePooling2D, TimeDistributed, Flatten, GRU, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score
import joblib

In [5]:
base_dir = 'other_artifacts'
data = os.path.join(base_dir, 'split_3d_data.pkl')
batch_size = 32

In [6]:
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

# Load Data

In [7]:
X_train, X_val, X_test, y_train, y_val, y_test = util.load_split_3d_data(data)

In [8]:
X_train.shape, X_test.shape, X_val.shape

((280, 16, 224, 224, 3), (60, 16, 224, 224, 3), (60, 16, 224, 224, 3))

In [9]:
num_frames, img_size, num_channels = X_train.shape[1], X_train.shape[2:4], X_train.shape[-1]
print(num_frames, img_size, num_channels)
print((num_frames,)+img_size+(num_channels,))

16 (224, 224) 3
(16, 224, 224, 3)


In [10]:
y_train.shape, y_test.shape, y_val.shape

((280,), (60,), (60,))

In [11]:
num_train, num_val, num_test = X_train.shape[0], X_val.shape[0], X_test.shape[0]
num_train, num_val, num_test

(280, 60, 60)

In [12]:
X_train[0]

array([[[[ 10,  53, 121],
         [ 12,  55, 123],
         [ 11,  56, 124],
         ...,
         [187, 181, 184],
         [189, 180, 184],
         [188, 179, 183]],

        [[  9,  52, 120],
         [ 12,  55, 123],
         [ 11,  56, 124],
         ...,
         [187, 181, 184],
         [187, 181, 184],
         [185, 179, 182]],

        [[ 14,  57, 125],
         [ 12,  55, 123],
         [ 11,  56, 124],
         ...,
         [187, 181, 184],
         [185, 181, 184],
         [184, 180, 183]],

        ...,

        [[ 24, 112, 166],
         [ 28, 117, 169],
         [  5,  50, 111],
         ...,
         [155, 163, 167],
         [155, 163, 167],
         [155, 163, 167]],

        [[ 25, 111, 165],
         [ 30, 117, 169],
         [  2,  47, 108],
         ...,
         [155, 163, 167],
         [157, 163, 167],
         [157, 163, 167]],

        [[ 24, 110, 164],
         [ 31, 118, 170],
         [  3,  48, 109],
         ...,
         [155, 163, 167],
        

## Factorizing Target

In [13]:
labels = pd.factorize(y_val)[1]
print(labels)

['real' 'fake']


In [14]:
y_train, y_val, y_test = pd.factorize(y_train)[0], pd.factorize(y_val)[0], pd.factorize(y_test)[0]

In [15]:
y_train.shape, y_test.shape, y_val.shape

((280,), (60,), (60,))

## Extract Real only

In [16]:
real_train_idx = np.where(y_train==0)
print(real_train_idx)

(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139], dtype=int64),)


In [17]:
X_real_train = X_train[real_train_idx]
X_real_train.shape

(140, 16, 224, 224, 3)

In [18]:
real_val_idx = np.where(y_val==0)
real_test_idx = np.where(y_test==0)
X_real_val = X_val[real_val_idx]
X_real_test = X_test[real_test_idx]
X_real_val.shape, X_real_test.shape

((30, 16, 224, 224, 3), (30, 16, 224, 224, 3))

# Conv2D Autoencoder

In [19]:
input_shape = (num_frames,)+img_size+(num_channels,)

In [21]:
conv2d_autoencoder = Sequential([
    # encoder
    Input(shape=(input_shape)),
    TimeDistributed(Rescaling(1./255.)),
    TimeDistributed(Conv2D(32, (3,3), strides=(2,2), padding='same', activation='relu')),
    TimeDistributed(Conv2D(64, (3,3), strides=(2,2), padding='same', activation='relu')),
    # decoder
    TimeDistributed(Conv2DTranspose(32, (3,3), strides=(2,2), padding='same', activation='relu')),
    TimeDistributed(Conv2DTranspose(3, (3,3), strides=(2,2), padding='same', activation='linear'))
])
conv2d_autoencoder.summary()

In [19]:
conv2d_autoencoder.compile(optimizer='adam', loss='mean_squared_error')
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
conv2d_autoencoder.fit(X_real_train, X_real_train, 
                       validation_data=(X_real_val, X_real_val),
                       epochs=500, batch_size=16,
                       callbacks=[estop], verbose=1)

Epoch 1/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 3s/step - loss: 17711.5527 - val_loss: 16096.5811
Epoch 2/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 3s/step - loss: 17618.5840 - val_loss: 15860.4941
Epoch 3/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3s/step - loss: 16996.4824 - val_loss: 14603.4355
Epoch 4/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 3s/step - loss: 14437.4268 - val_loss: 10465.0479
Epoch 5/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 3s/step - loss: 8483.7451 - val_loss: 5042.9429
Epoch 6/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 3s/step - loss: 5246.4736 - val_loss: 3879.3337
Epoch 7/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 3s/step - loss: 3330.8989 - val_loss: 2550.2131
Epoch 8/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 3s/step - loss: 2333.6174 - val_loss: 1787.5150
Epoch 9/

<keras.src.callbacks.history.History at 0x18b4a872090>

In [20]:
test_loss = conv2d_autoencoder.evaluate(X_real_test, X_real_test, verbose=0)
print("2D Conv Time Distributed Autoencoder MSE:", test_loss)

2D Conv Time Distributed Autoencoder MSE: 62.94225311279297


In [21]:
joblib.dump(conv2d_autoencoder, 'artifacts/conv2d_autoencoder.keras')

['artifacts/conv2d_autoencoder.keras']

In [22]:
conv2d_autoencoder.layers

[<TimeDistributed name=time_distributed, built=True>,
 <TimeDistributed name=time_distributed_1, built=True>,
 <TimeDistributed name=time_distributed_2, built=True>,
 <TimeDistributed name=time_distributed_3, built=True>,
 <TimeDistributed name=time_distributed_4, built=True>]

In [23]:
conv2d_encoder = Sequential(conv2d_autoencoder.layers[:3])
conv2d_encoder.trainable=False

In [24]:
conv2d_encoder.summary()

In [44]:
gru_classifier = Sequential([
    Input(shape=input_shape),
    conv2d_encoder,
    TimeDistributed(GlobalAveragePooling2D()),
    TimeDistributed(Dropout(0.5)),
    TimeDistributed(Dense(256, activation='relu')),
    TimeDistributed(Dropout(0.5)),
    GRU(128, activation='tanh', return_sequences=True),
    GRU(128, activation='tanh', return_sequences=False),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
gru_classifier.summary()

In [45]:
gru_classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
estop = EarlyStopping(monitor='val_loss', mode='min',
                      min_delta=1e-5, patience=5,
                      restore_best_weights=True, verbose=1)
gru_classifier.fit(X_train, y_train, 
                       validation_data=(X_val, y_val),
                       epochs=500, batch_size=32,
                       callbacks=[estop], verbose=1)

Epoch 1/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 638ms/step - accuracy: 0.4714 - loss: 0.8071 - val_accuracy: 0.5000 - val_loss: 0.7427
Epoch 2/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 369ms/step - accuracy: 0.4536 - loss: 0.8098 - val_accuracy: 0.5000 - val_loss: 0.7021
Epoch 3/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 371ms/step - accuracy: 0.5179 - loss: 0.7391 - val_accuracy: 0.5000 - val_loss: 0.7255
Epoch 4/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 383ms/step - accuracy: 0.5714 - loss: 0.7240 - val_accuracy: 0.5000 - val_loss: 0.7073
Epoch 5/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 365ms/step - accuracy: 0.4714 - loss: 0.7239 - val_accuracy: 0.5833 - val_loss: 0.6922
Epoch 6/500
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 365ms/step - accuracy: 0.4929 - loss: 0.7296 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 7/500
[1m9/9[0m [32m━━━━

<keras.src.callbacks.history.History at 0x18fd4b62840>

In [46]:
clf_test_loss, clf_test_accuracy = gru_classifier.evaluate(X_test, y_test, verbose=1)
print("Classifier Loss:", clf_test_loss)
print("Classifier Accuracy:", clf_test_accuracy)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 261ms/step - accuracy: 0.5333 - loss: 0.6939
Classifier Loss: 0.6938716769218445
Classifier Accuracy: 0.5333333611488342


* Not the best performance seen but better than baseline and the 3D CNN model.
* But training the autoencoder was time consuming and it still had quite a high mean squared error considering the original pixel values were all rescaled to be between 0 and 1.
* If the dataset was larger this setup could perhaps have given a better performance.