# Building a Deepfake Detector using Deep Learning Models
This notebook demonstrates the development of a deepfake detection system using multiple pre-trained **CNN (Convolutional Neural Network)** models, such as **ResNet50**, **EfficientNetV2B0** and **Xception**, combined with **LSTM (Long Short-Term Memory)** networks for temporal analysis. The datasets used are **FaceForensics++**, **DFDC** and **Celeb-DF (v2)**. To ensure unbiased testing, the **Celeb-DF (v2)** dataset consists of completely unseen videos that are exclusively reserved for testing and are not included in the training or validation processes. `OpenCV` is utilized for video frame extraction and preprocessing while `dlib` is used for face detection and cropping.

## GPU Configuration and Verification with TensorFlow
To ensure TensorFlow is configured to effectively utilize the GPU for deep learning tasks, optimize memory usage and verify GPU support.

In [1]:
import tensorflow as tf

# Check if TensorFlow is built with CUDA support and list GPUs
print("TensorFlow CUDA Support:", tf.test.is_built_with_cuda())
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs Available:", len(physical_devices))

if physical_devices:
    try:
        for i, gpu in enumerate(physical_devices):
            # Enable memory growth for each GPU
            tf.config.experimental.set_memory_growth(gpu, True)
            print(f"Enabled memory growth for GPU {i}: {tf.config.experimental.get_device_details(gpu)['device_name']}")
    except RuntimeError as e:
        print("Error enabling GPU memory growth:", e)
else:
    print("No GPU detected. Ensure proper GPU setup.")

TensorFlow CUDA Support: True
Num GPUs Available: 1
Enabled memory growth for GPU 0: NVIDIA GeForce 940MX


## Importing Libraries
Importing all necessary libraries at the top to ensure better organization, easy debugging and smooth execution of the entire pipeline.

In [2]:
import os
import sys
import cv2
import numpy as np
import pandas as pd
import shutil
import dlib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score, roc_curve, auc
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, GlobalAveragePooling2D, Dropout, LSTM, TimeDistributed, Concatenate
from tensorflow.keras.applications import ResNet50, EfficientNetV2B0, Xception
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, Callback, ReduceLROnPlateau
import time
from tensorflow.keras.models import load_model
import seaborn as sns

## 3. Utility Functions and Custom Callbacks for Logging

In [3]:
# Learning Rate Logger
class LearningRateLogger(Callback):
    def __init__(self, writer):
        self.writer = writer
        self.logs = []

    def on_epoch_end(self, epoch, logs=None):
        lr = self.model.optimizer.lr.numpy()
        self.logs.append({'epoch': epoch + 1, 'learning_rate': lr})

    def on_train_end(self, logs=None):
        df = pd.DataFrame(self.logs)
        df.to_excel(self.writer, sheet_name='Learning Rates', index=False)

# Precision, Recall, F1 Score Logger
class MetricsLogger(Callback):
    def __init__(self, validation_data, writer):
        self.validation_data = validation_data
        self.writer = writer
        self.logs = []

    def on_epoch_end(self, epoch, logs=None):
        # Extract a single batch from the generator
        val_x, val_y_true = next(self.validation_data)
        val_y_pred = (self.model.predict(val_x) > 0.5).astype("int32").flatten()
        val_y_true = val_y_true.flatten()

        # Compute metrics with zero_division=0 for precision
        precision = precision_score(val_y_true, val_y_pred, zero_division=0)
        recall = recall_score(val_y_true, val_y_pred)
        f1 = f1_score(val_y_true, val_y_pred)

        # Append metrics to logs
        self.logs.append({'epoch': epoch + 1, 'precision': precision, 'recall': recall, 'f1_score': f1})

    def on_train_end(self, logs=None):
        df = pd.DataFrame(self.logs)
        df.to_excel(self.writer, sheet_name='Precision-Recall-F1', index=False)

# Epoch Time Logger
class EpochTimeLogger(Callback):
    def __init__(self, writer):
        self.writer = writer
        self.logs = []

    def on_epoch_begin(self, epoch, logs=None):
        self.start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        duration = time.time() - self.start_time
        self.logs.append({'epoch': epoch + 1, 'duration': duration})

    def on_train_end(self, logs=None):
        df = pd.DataFrame(self.logs)
        df.to_excel(self.writer, sheet_name='Epoch Times', index=False)

# Batch Metrics Logger
class BatchMetricsLogger(Callback):
    def __init__(self, filename):
        self.filename = filename
        with open(self.filename, 'w') as f:
            f.write('epoch,batch,loss,accuracy,val_loss,val_accuracy\n')

    def on_train_batch_end(self, batch, logs=None):
        epoch = self.params['epochs']
        with open(self.filename, 'a') as f:
            f.write(f'{epoch},{batch},{logs.get("loss", 0):.4f},{logs.get("accuracy", 0):.4f},,,\n')

    def on_test_batch_end(self, batch, logs=None):
        epoch = self.params['epochs']
        with open(self.filename, 'a') as f:
            f.write(
                f'{epoch},{batch},,,{logs.get("val_loss", 0):.4f},{logs.get("val_accuracy", 0):.4f}\n')

# Custom Callbacks
def create_callbacks(model_name, val_generator):
    log_writer = pd.ExcelWriter(f'{model_name}_training_logs.xlsx', engine='openpyxl')
    return [
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        ModelCheckpoint(f'{model_name}_model.h5', save_best_only=True, monitor='val_loss', verbose=1),
        CSVLogger(f'{model_name}_training_log.csv'),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1),
        LearningRateLogger(writer=log_writer),
        MetricsLogger(validation_data=val_generator, writer=log_writer),
        EpochTimeLogger(writer=log_writer),
        BatchMetricsLogger(filename=f'{model_name}_batch_logs.csv')
    ]

## 4. Augmentation and Sequence Generation
Enhancing model generalization through data augmentation and implementing a sequence generator to efficiently feed sequential image frames into CNN-LSTM models.

### 4.1 Defining Custom Sequence Generator for Training and Validation

In [4]:
def stratified_sequence_data_generator(
    data_dir, batch_size, sequence_length, target_size=(224, 224), augment=True, max_iterations=None, shuffle=True):
    """
    Stratified data generator for training and validation.

    Preprocessing (resizing, normalization) is applied first, followed by augmentation (if enabled).
    Stratified sampling ensures balanced class representation in each batch.

    Args:
        data_dir (str): Path to the dataset directory containing class subfolders ('real', 'fake').
        batch_size (int): Number of sequences per batch.
        sequence_length (int): Number of frames per sequence.
        target_size (tuple): Dimensions to resize each frame to (height, width).
        augment (bool): Whether to apply augmentation to images.
        max_iterations (int): Maximum number of iterations for batch generation (useful for debugging).
        shuffle (bool): Whether to shuffle sequences within and across classes.

    Yields:
        Tuple of (X, y): X is a batch of sequences, y is the corresponding labels.
    """
    print(f"\n--- Initializing Stratified Sequence Data Generator ---")
    print(f"Data Directory: {data_dir}")
    print(f"Batch Size: {batch_size}, Sequence Length: {sequence_length}, Target Size: {target_size}, Augment: {augment}, Shuffle: {shuffle}\n")

    # Define augmentation pipeline
    datagen = ImageDataGenerator(
        rotation_range=10 if augment else 0,
        zoom_range=0.1 if augment else 0,
        horizontal_flip=augment,
        fill_mode="nearest"
    )

    # Class directories and labels
    class_dirs = {"real": 0, "fake": 1}
    file_paths = {class_name: [] for class_name in class_dirs.keys()}

    # Collect image paths for each class
    print("Collecting image paths and labels...")
    for class_name, label in class_dirs.items():
        class_dir = os.path.join(data_dir, class_name)

        # Validate class directory
        if not os.path.exists(class_dir):
            print(f"Error: Class directory {class_dir} not found. Skipping.")
            continue

        print(f"Scanning directory: {class_dir}")
        
        # Recursively collect image paths from subfolders
        for subdir, _, files in os.walk(class_dir):  # Traverse subdirectories
            for file_name in files:
                if file_name.lower().endswith((".jpg", ".jpeg", ".png")):
                    file_paths[class_name].append(os.path.join(subdir, file_name))

        print(f"Collected {len(file_paths[class_name])} images for class '{class_name}'.")

        if len(file_paths[class_name]) == 0:
            print(f"Warning: No images found for class '{class_name}'. This class will be skipped.")

        # Handle class imbalance by oversampling
        max_class_size = max(len(file_paths["real"]), len(file_paths["fake"]))
        if len(file_paths[class_name]) < max_class_size:
            oversampling_factor = (max_class_size // len(file_paths[class_name])) + 1
            file_paths[class_name] *= oversampling_factor

        if shuffle:
            np.random.shuffle(file_paths[class_name])  # Initial shuffling within the class

    print("\nImage collection and shuffling completed for all classes.")

    # Ensure class balance per batch
    class_batch_size = batch_size // len(class_dirs)
    assert class_batch_size > 0, "Batch size must be greater than the number of classes."

    # Batch generation loop
    iterations = 0
    while max_iterations is None or iterations < max_iterations:
        X, y = [], []

        for class_name, label in class_dirs.items():
            for _ in range(class_batch_size):
                # Randomly select a starting index for the sequence
                if len(file_paths[class_name]) < sequence_length:
                    print(f"Error: Not enough images in class '{class_name}' to form sequences.")
                    continue

                start_idx = np.random.randint(0, len(file_paths[class_name]) - sequence_length + 1)

                sequence = []
                augmentation_params = None  # Ensure consistent augmentations across frames in a sequence

                for frame_index in range(sequence_length):
                    img_path = file_paths[class_name][start_idx + frame_index]
                    img = cv2.imread(img_path)

                    # Validate file loading
                    if img is None:
                        print(f"Warning: Failed to load image: {img_path}. Skipping.")
                        continue

                    # Preprocessing: Resize, normalize
                    try:
                        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB
                        img = cv2.resize(img, target_size)
                        img = img / 255.0  # Normalize to [0, 1]

                        # Apply consistent augmentations across the sequence
                        if augment:
                            if augmentation_params is None:
                                augmentation_params = datagen.get_random_transform(img.shape)
                            img = datagen.apply_transform(img, augmentation_params)
                    except Exception as e:
                        print(f"Error processing image {img_path}: {e}. Skipping.")
                        continue

                    sequence.append(img)

                X.append(sequence)
                y.append(label)

        # Final shuffle to mix sequences from different classes
        if shuffle:
            combined = list(zip(X, y))
            np.random.shuffle(combined)
            X, y = zip(*combined)

        yield np.array(X), np.array(y)
        iterations += 1

### 4.2 Initializing Data Generators for Training and Validation

In [5]:
# Dataset directories
print("--- Setting Up Training and Validation Dataset Paths ---")
base_dir = os.getcwd()
train_dir = os.path.join(base_dir, "Cropped_Faces", "train")
val_dir = os.path.join(base_dir, "Cropped_Faces", "val")
print(f"Training Directory: {train_dir}")
print(f"Validation Directory: {val_dir}")

# Hyperparameters for Testing/Validation
BATCH_SIZE = 8  # Recommended batch size is 8 (Reduce value according to need)
SEQUENCE_LENGTH = 10  # Recommended frames per sequence is 10 (Reduce value according to need)
TARGET_SIZE = (224, 224)  # Resize dimensions

# Initialize training generator (with stratified sampling and augmentation)
print("\n--- Creating Stratified Training Generator ---")
train_generator = stratified_sequence_data_generator(
    data_dir=train_dir,
    batch_size=BATCH_SIZE,
    sequence_length=SEQUENCE_LENGTH,
    target_size=TARGET_SIZE,
    augment=True  # Enable augmentation
)
print("Training generator created successfully.")

# Initialize validation generator (no augmentation, stratified sampling)
print("\n--- Creating Stratified Validation Generator ---")
val_generator = stratified_sequence_data_generator(
    data_dir=val_dir,
    batch_size=BATCH_SIZE,
    sequence_length=SEQUENCE_LENGTH,
    target_size=TARGET_SIZE,
    augment=False  # No augmentation for validation
)
print("Validation generator created successfully.")

--- Setting Up Training and Validation Dataset Paths ---
Training Directory: C:\Users\atul\Cropped_Faces\train
Validation Directory: C:\Users\atul\Cropped_Faces\val

--- Creating Stratified Training Generator ---
Training generator created successfully.

--- Creating Stratified Validation Generator ---
Validation generator created successfully.


## 5. Model Definition
Defining the CNN-LSTM architectures, including separate models for ResNet50, EfficientNetV2B0 and Xception, along with a combined model that fuses features from all three networks, while incorporating an LSTM layer to capture temporal dependencies in the extracted features.

In [6]:
# Creating CNN-LSTM Model
def create_cnn_lstm_model(feature_extractor, sequence_length):
    print(f"Creating CNN-LSTM model for {feature_extractor.name}...")
    feature_extractor.trainable = False  # Freeze the pre-trained feature extractor
    model = Sequential([
        TimeDistributed(feature_extractor, input_shape=(sequence_length, *TARGET_SIZE, 3)),
        TimeDistributed(GlobalAveragePooling2D()),
        LSTM(128, return_sequences=False),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    print(f"{feature_extractor.name}-based CNN-LSTM model created successfully.")
    return model

### 5.1 Loading ResNet50 Pre-trained Feature Extractor and Model Compilation

In [7]:
# Load ResNet50 feature extractors
resnet_extractor = ResNet50(weights='imagenet', include_top=False, input_shape=(*TARGET_SIZE, 3))
print("ResNet50 feature extractor loaded successfully.\n")

# Initialize ResNet50 Model
resnet_model = create_cnn_lstm_model(resnet_extractor, SEQUENCE_LENGTH)

# Compile ResNet50 Model
print("\n--- Compiling ResNet50 Model ---")
resnet_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
print("ResNet50 model compiled successfully.\n")

# Print ResNet50 Model Summary
print("--- ResNet50 Model Summary ---")
resnet_model.summary()

ResNet50 feature extractor loaded successfully.

Creating CNN-LSTM model for resnet50...
resnet50-based CNN-LSTM model created successfully.

--- Compiling ResNet50 Model ---
ResNet50 model compiled successfully.

--- ResNet50 Model Summary ---
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed (TimeDistr  (None, 10, 7, 7, 2048)   23587712  
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 10, 2048)         0         
 tributed)                                                       
                                                                 
 lstm (LSTM)                 (None, 128)               1114624   
                                                                 
 dense (Dense)               (None, 128)               16512     
         

### 5.2 Callbacks for ResNet

In [11]:
# Callbacks for ResNet
print("--- Setting Up Training Callbacks for ResNet ---")

# Standard Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint_resnet = ModelCheckpoint('resnet_model.h5', save_best_only=True, monitor='val_loss', verbose=1)
csv_logger_resnet = CSVLogger('resnet_training_log.csv')

# Learning Rate Scheduler
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',        # Monitor validation loss
    factor=0.5,                # Reduce learning rate by half
    patience=3,                # Wait for 3 epochs of no improvement
    min_lr=1e-6,               # Minimum learning rate
    verbose=1                  # Display logs
)

# Custom Logging Callbacks
log_writer_resnet = pd.ExcelWriter('resnet_training_logs.xlsx', engine='openpyxl')

lr_logger_resnet = LearningRateLogger(writer=log_writer_resnet)
metrics_logger_resnet = MetricsLogger(validation_data=val_generator, writer=log_writer_resnet)
epoch_time_logger_resnet = EpochTimeLogger(writer=log_writer_resnet)
batch_metrics_logger_resnet = BatchMetricsLogger(filename='resnet_batch_logs.csv')

print("EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau and ExcelWriter callbacks initialized for ResNet50 model.")

--- Setting Up Training Callbacks for ResNet ---
EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau and ExcelWriter callbacks initialized for ResNet50 model.


### 5.3 Training ResNet-Based Model

In [13]:
# Set Training Parameters
EPOCHS = 20  # Number of epochs for training
STEPS_PER_EPOCH = 300  # Steps per epoch for training
VALIDATION_STEPS = 150  # Steps for validation per epoch

# Training ResNet Model
print("--- Training ResNet Model ---")
history_resnet = resnet_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,  # Number of epochs
    steps_per_epoch=STEPS_PER_EPOCH,  # Steps per epoch for training
    validation_steps=VALIDATION_STEPS,  # Steps for validation
    callbacks=[
        early_stopping,
        checkpoint_resnet,
        csv_logger_resnet,
        lr_logger_resnet,
        reduce_lr,
        metrics_logger_resnet,
        epoch_time_logger_resnet,
        batch_metrics_logger_resnet
    ]
)

# Save Custom Logs
print("--- Saving Custom Logs for ResNet ---")
log_writer_resnet.close()
print("Custom logs for ResNet saved successfully.\n")

--- Training ResNet Model ---

--- Initializing Stratified Sequence Data Generator ---
Data Directory: C:\Users\atul\Cropped_Faces\train
Batch Size: 8, Sequence Length: 10, Target Size: (224, 224), Augment: True, Shuffle: True

Collecting image paths and labels...
Scanning directory: C:\Users\atul\Cropped_Faces\train\real
Collected 139606 images for class 'real'.
Scanning directory: C:\Users\atul\Cropped_Faces\train\fake
Collected 123036 images for class 'fake'.

Image collection and shuffling completed for all classes.

Generating a batch of 8 sequences...
Epoch 1/20
--- Initializing Stratified Sequence Data Generator ---
Data Directory: C:\Users\atul\Cropped_Faces\val
Batch Size: 8, Sequence Length: 10, Target Size: (224, 224), Augment: False, Shuffle: True

Collecting image paths and labels...
Scanning directory: C:\Users\atul\Cropped_Faces\val\real
Collected 34902 images for class 'real'.
Scanning directory: C:\Users\atul\Cropped_Faces\val\fake
Collected 30760 images for class 'fak

### 5.4 Loading Pre-trained Feature Extractors: EfficientNetV2B0

In [8]:
# Load EfficientNetV2B0 feature extractors
efficientnet_extractor = EfficientNetV2B0(weights='imagenet', include_top=False, input_shape=(*TARGET_SIZE, 3))
print("EfficientNetV2B0 feature extractor loaded successfully.\n")

# Initialize EfficientNetV2B0 Model
efficientnet_model = create_cnn_lstm_model(efficientnet_extractor, SEQUENCE_LENGTH)

# Compile EfficientNetV2B0 Model
print("\n--- Compiling EfficientNetV2B0 Model ---")
efficientnet_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
print("EfficientNetV2B0 model compiled successfully.\n")

# Print EfficientNetV2B0 Model Summary
print("--- EfficientNetV2B0 Model Summary ---")
efficientnet_model.summary()

EfficientNetV2B0 feature extractor loaded successfully.

Creating CNN-LSTM model for efficientnetv2-b0...
efficientnetv2-b0-based CNN-LSTM model created successfully.

--- Compiling EfficientNetV2B0 Model ---
EfficientNetV2B0 model compiled successfully.

--- EfficientNetV2B0 Model Summary ---
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_2 (TimeDis  (None, 10, 7, 7, 1280)   5919312   
 tributed)                                                       
                                                                 
 time_distributed_3 (TimeDis  (None, 10, 1280)         0         
 tributed)                                                       
                                                                 
 lstm_1 (LSTM)               (None, 128)               721408    
                                                                 
 dense_2 (Dense)       

### 5.5 Callbacks for EfficientNet

In [35]:
# Callbacks for EfficientNet
print("--- Setting Up Training Callbacks for EfficientNet ---")

# Standard Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint_efficientnet = ModelCheckpoint('efficientnet_model.h5', save_best_only=True, monitor='val_loss', verbose=1)
csv_logger_efficientnet = CSVLogger('efficientnet_training_log.csv')

# Learning Rate Scheduler
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',        # Monitor validation loss
    factor=0.5,                # Reduce learning rate by half
    patience=3,                # Wait for 3 epochs of no improvement
    min_lr=1e-6,               # Minimum learning rate
    verbose=1                  # Display logs
)

# Custom Logging Callbacks
log_writer_efficientnet = pd.ExcelWriter('efficientnet_training_logs.xlsx', engine='openpyxl')

lr_logger_efficientnet = LearningRateLogger(writer=log_writer_efficientnet)
metrics_logger_efficientnet = MetricsLogger(validation_data=val_generator, writer=log_writer_efficientnet)
epoch_time_logger_efficientnet = EpochTimeLogger(writer=log_writer_efficientnet)
batch_metrics_logger_efficientnet = BatchMetricsLogger(filename='efficientnet_batch_logs.csv')

print("EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau, and ExcelWriter callbacks initialized for EfficientNetV2B0 model.")

--- Setting Up Training Callbacks for EfficientNet ---
EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau, and ExcelWriter callbacks initialized for EfficientNetV2B0 model.


### 5.6 Training EfficientNet-Based Model

In [36]:
# Set Training Parameters
EPOCHS = 20  # Number of epochs for training
STEPS_PER_EPOCH = 300  # Steps per epoch for training
VALIDATION_STEPS = 150  # Steps for validation per epoch

# Training EfficientNet Model
print("--- Training EfficientNet Model ---")
history_efficientnet = efficientnet_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,  # Number of epochs
    steps_per_epoch=STEPS_PER_EPOCH,  # Steps per epoch for training
    validation_steps=VALIDATION_STEPS,  # Steps for validation
    callbacks=[
        early_stopping,
        checkpoint_efficientnet,
        csv_logger_efficientnet,
        lr_logger_efficientnet,
        reduce_lr,
        metrics_logger_efficientnet,
        epoch_time_logger_efficientnet,
        batch_metrics_logger_efficientnet
    ]
)

# Save Custom Logs
print("--- Saving Custom Logs for EfficientNet ---")
log_writer_efficientnet.close()
print("Custom logs for EfficientNet saved successfully.\n")

--- Training EfficientNet Model ---

--- Initializing Stratified Sequence Data Generator ---
Data Directory: C:\Users\atul\Cropped_Faces\train
Batch Size: 8, Sequence Length: 10, Target Size: (224, 224), Augment: True, Shuffle: True

Collecting image paths and labels...
Scanning directory: C:\Users\atul\Cropped_Faces\train\real
Collected 139606 images for class 'real'.
Scanning directory: C:\Users\atul\Cropped_Faces\train\fake
Collected 123036 images for class 'fake'.

Image collection and shuffling completed for all classes.
Epoch 1/20
--- Initializing Stratified Sequence Data Generator ---
Data Directory: C:\Users\atul\Cropped_Faces\val
Batch Size: 8, Sequence Length: 10, Target Size: (224, 224), Augment: False, Shuffle: True

Collecting image paths and labels...
Scanning directory: C:\Users\atul\Cropped_Faces\val\real
Collected 34902 images for class 'real'.
Scanning directory: C:\Users\atul\Cropped_Faces\val\fake
Collected 30760 images for class 'fake'.

Image collection and shuffl

### 5.7 Loading Pre-trained Feature Extractors: Xception

In [9]:
# Load Xception feature extractors
xception_extractor = Xception(weights='imagenet', include_top=False, input_shape=(*TARGET_SIZE, 3))
print("Xception feature extractor loaded successfully.\n")

# Initialize Xception Model
xception_model = create_cnn_lstm_model(xception_extractor, SEQUENCE_LENGTH)

# Compile Xception Model
print("\n--- Compiling Xception Model ---")
xception_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
print("Xception model compiled successfully.\n")

# Print Xception Model Summary
print("--- Xception Model Summary ---")
xception_model.summary()

Xception feature extractor loaded successfully.

Creating CNN-LSTM model for xception...
xception-based CNN-LSTM model created successfully.

--- Compiling Xception Model ---
Xception model compiled successfully.

--- Xception Model Summary ---
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_4 (TimeDis  (None, 10, 7, 7, 2048)   20861480  
 tributed)                                                       
                                                                 
 time_distributed_5 (TimeDis  (None, 10, 2048)         0         
 tributed)                                                       
                                                                 
 lstm_2 (LSTM)               (None, 128)               1114624   
                                                                 
 dense_4 (Dense)             (None, 128)               16512     
       

### 5.8 Callbacks for Xception

In [44]:
# Callbacks for Xception
print("--- Setting Up Training Callbacks for Xception ---")

# Standard Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint_xception = ModelCheckpoint('xception_model.h5', save_best_only=True, monitor='val_loss', verbose=1)
csv_logger_xception = CSVLogger('xception_training_log.csv')

# Learning Rate Scheduler
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',        # Monitor validation loss
    factor=0.5,                # Reduce learning rate by half
    patience=3,                # Wait for 3 epochs of no improvement
    min_lr=1e-6,               # Minimum learning rate
    verbose=1                  # Display logs
)

# Custom Logging Callbacks
log_writer_xception = pd.ExcelWriter('xception_training_logs.xlsx', engine='openpyxl')

lr_logger_xception = LearningRateLogger(writer=log_writer_xception)
metrics_logger_xception = MetricsLogger(validation_data=val_generator, writer=log_writer_xception)
epoch_time_logger_xception = EpochTimeLogger(writer=log_writer_xception)
batch_metrics_logger_xception = BatchMetricsLogger(filename='xception_batch_logs.csv')

print("EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau, and ExcelWriter callbacks initialized for Xception model.")

--- Setting Up Training Callbacks for Xception ---
EarlyStopping, ModelCheckpoint, CSVLogger, ReduceLROnPlateau, and ExcelWriter callbacks initialized for Xception model.


### 5.9 Training Xception-Based Model

In [46]:
# Set Training Parameters
EPOCHS = 20  # Number of epochs for training
STEPS_PER_EPOCH = 300  # Steps per epoch for training
VALIDATION_STEPS = 150  # Steps for validation per epoch

# Training Xception Model
print("--- Training Xception Model ---")
history_xception = xception_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,  # Number of epochs
    steps_per_epoch=STEPS_PER_EPOCH,  # Steps per epoch for training
    validation_steps=VALIDATION_STEPS,  # Steps for validation
    callbacks=[
        early_stopping,
        checkpoint_xception,
        csv_logger_xception,
        lr_logger_xception,
        reduce_lr,
        metrics_logger_xception,
        epoch_time_logger_xception,
        batch_metrics_logger_xception
    ]
)

# Save Custom Logs
print("--- Saving Custom Logs for Xception ---")
log_writer_xception.close()
print("Custom logs for Xception saved successfully.\n")

--- Training Xception Model ---

--- Initializing Stratified Sequence Data Generator ---
Data Directory: C:\Users\atul\Cropped_Faces\train
Batch Size: 5, Sequence Length: 5, Target Size: (224, 224), Augment: True, Shuffle: True

Collecting image paths and labels...
Scanning directory: C:\Users\atul\Cropped_Faces\train\real
Collected 139606 images for class 'real'.
Scanning directory: C:\Users\atul\Cropped_Faces\train\fake
Collected 123036 images for class 'fake'.

Image collection and shuffling completed for all classes.
Epoch 1/20
--- Initializing Stratified Sequence Data Generator ---
Data Directory: C:\Users\atul\Cropped_Faces\val
Batch Size: 5, Sequence Length: 5, Target Size: (224, 224), Augment: False, Shuffle: True

Collecting image paths and labels...
Scanning directory: C:\Users\atul\Cropped_Faces\val\real
Collected 34902 images for class 'real'.
Scanning directory: C:\Users\atul\Cropped_Faces\val\fake
Collected 30760 images for class 'fake'.

Image collection and shuffling co

In [1]:
import tensorflow as tf
tf.keras.backend.clear_session()
print("Session Cleared.")

Session Cleared.


In [2]:
from tensorflow.keras import backend as K
import gc
K.clear_session()
gc.collect()

105