<a href="https://colab.research.google.com/github/iamimpeccable/Deepfake-Detection-/blob/main/MultimodalLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Multimodal Learning**

# **VGG-16 and GRU(Gated Reccurent Unit)**

In [None]:
import zipfile
import os
import numpy as np
import cv2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

# Step 1: Unzip and extract data
with zipfile.ZipFile('/content/PreprocessedForVideo-20241024T231730Z-001.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')  # Extract everything into /content/dataset

# Ensure folders are extracted properly
preprocessed_folder1 = '/content/dataset/PreprocessedForVideo/Deepfakes'
preprocessed_folder2 = '/content/dataset/PreprocessedForVideo/original'

# Initialize lists to store image data and labels
image_data = []
labels = []

# Step 2: Function to load images from subfolders
def load_images_from_folder(folder, label):
    for subfolder in os.listdir(folder):
        subfolder_path = os.path.join(folder, subfolder)

        # Check if it's a directory (subfolder containing images)
        if os.path.isdir(subfolder_path):
            for image_name in os.listdir(subfolder_path):
                image_path = os.path.join(subfolder_path, image_name)

                # Read the image
                image = cv2.imread(image_path)
                if image is not None:
                    # Resize the image to 224x224 pixels for VGG16
                    image = cv2.resize(image, (224, 224))

                    # Convert to array and append to image data
                    image_data.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))  # Ensure correct color format
                    labels.append(label)

# Load images from both 'Deepfakes' and 'Original' folders
load_images_from_folder(preprocessed_folder1, 1)  # Deepfake class: 1
load_images_from_folder(preprocessed_folder2, 0)  # Original class: 0

# Step 3: Convert lists to numpy arrays
image_data = np.array(image_data)
labels = np.array(labels)

# Normalize the images (scale pixel values between 0 and 1)
image_data = image_data / 255.0

# One-hot encode the labels
labels = to_categorical(labels, num_classes=2)

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

# Step 5: Define the VGG16 + GRU Model
# Load pre-trained VGG16 without the top layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the VGG16 layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers
x = layers.Flatten()(base_model.output)
x = layers.Dense(512, activation='relu')(x)
x = layers.Reshape((-1, 512))(x)  # Prepare for GRU layer

# Add GRU layer for temporal feature extraction
x = layers.GRU(128, return_sequences=False, activation='tanh')(x)

# Add Fully Connected Layers
x = layers.Dense(128, activation='relu')(x)
output = layers.Dense(2, activation='softmax')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=output)

# Step 6: Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Step 7: Model Summary
model.summary()

# Step 8: Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Step 9: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Step 10: Predict on the test set
y_pred = model.predict(X_test)

# Convert predictions from one-hot encoding to labels
y_pred_labels = np.argmax(y_pred, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

# Step 11: Classification Report
print("Classification Report:")
print(classification_report(y_test_labels, y_pred_labels))


Epoch 1/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 41ms/step - accuracy: 0.5339 - loss: 0.7121 - val_accuracy: 0.5319 - val_loss: 0.7126
Epoch 2/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.5871 - loss: 0.6595 - val_accuracy: 0.6487 - val_loss: 0.6000
Epoch 3/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.7688 - loss: 0.5038 - val_accuracy: 0.9290 - val_loss: 0.2473
Epoch 4/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.9551 - loss: 0.1426 - val_accuracy: 0.9407 - val_loss: 0.1514
Epoch 5/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.9743 - loss: 0.0712 - val_accuracy: 0.9677 - val_loss: 0.0905
Epoch 6/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.9755 - loss: 0.0697 - val_accuracy: 0.9569 - val_loss: 0.1494
Epoch 7/10
[1m140/140

## **Summary of results**
GRUs are simpler and faster than LSTMs, with fewer parameters while still capturing temporal dependencies.They can be a drop-in replacement for LSTM.


1.   The result shows that the model was not able to generalize on training and new unsee  data.
2.   Thus the combination of VGG-16 and GRU does not work well with the detection of deepfake data.



# **3D CNN AND LSTM**

In [None]:
import zipfile
import os
import numpy as np
import cv2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Input

# Step 1: Unzip and extract data
with zipfile.ZipFile('/content/PreprocessedForVideo-20241024T231730Z-001.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')  # Extract everything into /content/dataset

# Ensure folders are extracted properly
preprocessed_folder1 = '/content/dataset/PreprocessedForVideo/Deepfakes'
preprocessed_folder2 = '/content/dataset/PreprocessedForVideo/original'

# Initialize lists to store image data and labels
image_data = []
labels = []

# Step 2: Function to load images from subfolders
def load_images_from_folder(folder, label):
    for subfolder in os.listdir(folder):
        subfolder_path = os.path.join(folder, subfolder)

        # Check if it's a directory (subfolder containing images)
        if os.path.isdir(subfolder_path):
            for image_name in os.listdir(subfolder_path):
                image_path = os.path.join(subfolder_path, image_name)

                # Read the image
                image = cv2.imread(image_path)
                if image is not None:
                    # Resize the image to 64x64 pixels (you can adjust this as needed)
                    image = cv2.resize(image, (64, 64))

                    # Convert to array and append to image data
                    image_data.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))  # Ensure correct color format
                    labels.append(label)

# Load images from both 'Deepfakes' and 'Original' folders
load_images_from_folder(preprocessed_folder1, 1)  # Deepfake class: 1
load_images_from_folder(preprocessed_folder2, 0)  # Original class: 0

# Step 3: Convert lists to numpy arrays
image_data = np.array(image_data)
labels = np.array(labels)

# Normalize the images (scale pixel values between 0 and 1)
image_data = image_data / 255.0

# One-hot encode the labels
labels = to_categorical(labels, num_classes=2)

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

# Step 5: Define the 3D CNN + LSTM model

input_layer = Input(shape=(64, 64, 3))  # Use Input to define input shape

# **3D CNN Layers for Feature Extraction**
x = layers.Reshape((64, 64, 3, 1))(input_layer)  # Reshaping input for 3D Conv layers

# Conv3D layers for feature extraction
x = layers.Conv3D(32, (3, 3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling3D((2, 2, 1))(x)

x = layers.Conv3D(64, (3, 3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling3D((2, 2, 1))(x)

x = layers.Conv3D(128, (3, 3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling3D((2, 2, 1))(x)

# **Reshape the Output for LSTM (Temporal Learning)**
# Now, we need to reshape the output into a sequence format for the LSTM.
# The output from Conv3D will have shape: (batch_size, time_steps, height, width, channels)
x = layers.Reshape((-1, 128))(x)  # Flatten the output while maintaining the temporal axis for LSTM

# **LSTM Layers for Temporal Learning**
x = layers.LSTM(128, return_sequences=False, activation='tanh')(x)  # LSTM for temporal learning

# **Fully Connected Layers (Dense Layers)**
x = layers.Dense(128, activation='relu')(x)
output_layer = layers.Dense(2, activation='softmax')(x)  # 2 classes: Deepfake (1) and Original (0)

# Step 6: Define the model
model = models.Model(inputs=input_layer, outputs=output_layer)

# Step 7: Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Step 8: Model Summary
model.summary()

# Step 9: Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Step 10: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Step 11: Predict on the test set
y_pred = model.predict(X_test)

# Convert predictions from one-hot encoding to labels
y_pred_labels = np.argmax(y_pred, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

# Step 12: Classification Report
print("Classification Report:")
print(classification_report(y_test_labels, y_pred_labels))


Epoch 1/20
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.5234 - loss: 0.6939 - val_accuracy: 0.5553 - val_loss: 0.6900
Epoch 2/20
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5280 - loss: 0.6923 - val_accuracy: 0.5553 - val_loss: 0.6926
Epoch 3/20
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.5244 - loss: 0.6927 - val_accuracy: 0.5553 - val_loss: 0.6861
Epoch 4/20
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5223 - loss: 0.6922 - val_accuracy: 0.5553 - val_loss: 0.6873
Epoch 5/20
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5151 - loss: 0.6932 - val_accuracy: 0.5553 - val_loss: 0.6886
Epoch 6/20
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.5276 - loss: 0.6921 - val_accuracy: 0.5553 - val_loss: 0.6884
Epoch 7/20
[1m140/140

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# **VGG-16 AND TCN**

In [None]:
import zipfile
import os
import numpy as np
import cv2
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model

# Step 1: Unzip and extract data
with zipfile.ZipFile('/content/PreprocessedForVideo-20241024T231730Z-001.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')  # Extract everything into /content/dataset

# Ensure folders are extracted properly
preprocessed_folder1 = '/content/dataset/PreprocessedForVideo/Deepfakes'
preprocessed_folder2 = '/content/dataset/PreprocessedForVideo/original'

# Initialize lists to store image data and labels
image_data = []
labels = []

# Step 2: Function to load images from subfolders
def load_images_from_folder(folder, label):
    for subfolder in os.listdir(folder):
        subfolder_path = os.path.join(folder, subfolder)

        # Check if it's a directory (subfolder containing images)
        if os.path.isdir(subfolder_path):
            for image_name in os.listdir(subfolder_path):
                image_path = os.path.join(subfolder_path, image_name)

                # Read the image
                image = cv2.imread(image_path)
                if image is not None:
                    # Resize the image to 224x224 pixels for VGG16
                    image = cv2.resize(image, (224, 224))

                    # Convert to array and append to image data
                    image_data.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))  # Ensure correct color format
                    labels.append(label)

# Load images from both 'Deepfakes' and 'Original' folders
load_images_from_folder(preprocessed_folder1, 1)  # Deepfake class: 1
load_images_from_folder(preprocessed_folder2, 0)  # Original class: 0

# Step 3: Convert lists to numpy arrays
image_data = np.array(image_data)
labels = np.array(labels)

# Normalize the images (scale pixel values between 0 and 1)
image_data = image_data / 255.0

# One-hot encode the labels
labels = to_categorical(labels, num_classes=2)

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

# Step 5: Define the VGG16 + Temporal Convolutional Network (TCN) Model
# Load pre-trained VGG16 without the top layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the VGG16 layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers
x = layers.Flatten()(base_model.output)
x = layers.Dense(512, activation='relu')(x)
x = layers.Reshape((-1, 512))(x)  # Prepare for TCN layer

# Add Temporal Convolutional Network (TCN) layers
x = layers.Conv1D(128, kernel_size=3, activation='relu', padding='causal')(x)
x = layers.Conv1D(128, kernel_size=3, activation='relu', padding='causal')(x)
x = layers.GlobalAveragePooling1D()(x)

# Add Fully Connected Layers
x = layers.Dense(128, activation='relu')(x)
output = layers.Dense(2, activation='softmax')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=output)

# Step 6: Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Step 7: Model Summary
model.summary()

# Step 8: Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

# Step 9: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Step 10: Predict on the test set
y_pred = model.predict(X_test)

# Convert predictions from one-hot encoding to labels
y_pred_labels = np.argmax(y_pred, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

# Step 11: Classification Report
print("Classification Report:")
print(classification_report(y_test_labels, y_pred_labels))


Epoch 1/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 53ms/step - accuracy: 0.5703 - loss: 0.6981 - val_accuracy: 0.9290 - val_loss: 0.1784
Epoch 2/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9045 - loss: 0.2389 - val_accuracy: 0.9686 - val_loss: 0.0917
Epoch 3/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9758 - loss: 0.0771 - val_accuracy: 0.9919 - val_loss: 0.0273
Epoch 4/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9933 - loss: 0.0234 - val_accuracy: 0.9596 - val_loss: 0.1011
Epoch 5/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9854 - loss: 0.0400 - val_accuracy: 0.9910 - val_loss: 0.0193
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.9905 - loss: 0.0231
Test Loss: 0.019286101683974266
Test Accuracy: 0.9910152554512024
[1m35/35[0m 

The combination of VGG-16 and Temporal Convolutional Network has acheieved the
required accuracy and the performance metrics show that the model has generalized to the test data well.

Therefore, this comination suits well for detection of Deepfake detection out of all combinations implemented as a part of Multimodal Learning.


