In [None]:
import os
import numpy as np
import cv2
from PIL import Image

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications.xception import preprocess_input

# Import MTCNN for face detection
from mtcnn import MTCNN


In [None]:
# Parameters
TIME_STEPS = 30  # Number of frames per video
HEIGHT, WIDTH = 299, 299

# Paths
model_path = '/content/drive/MyDrive/Dataset DDM/FINAL models/COMBINED_best_Phase1.keras'  # Update with your model path
# video_path = '/content/drive/MyDrive/Dataset DDM/Saved models_trial 1/demo 2.mp4'  # Update with your video path

In [None]:

def extract_faces_from_video(video_path, num_frames=TIME_STEPS):
    """
    Extracts faces from a video file using MTCNN, resizes them to 299x299,
    and returns a list of preprocessed face images.
    """
    # Initialize MTCNN face detector
    detector = MTCNN()

    # Open the video file
    cap = cv2.VideoCapture(video_path)

    frames = []
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, frame_count - 1, num_frames, dtype=int)

    idx = 0
    success = True
    while success and len(frames) < num_frames:
        success, frame = cap.read()
        if not success:
            break
        if idx in frame_indices:
            # Convert frame to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Detect faces in the frame
            detections = detector.detect_faces(frame_rgb)
            if detections:
                # If faces are detected, take the first one
                x, y, width, height = detections[0]['box']
                # Ensure the bounding box is within the frame
                x, y = max(0, x), max(0, y)
                x2, y2 = x + width, y + height
                face = frame_rgb[y:y2, x:x2]
                # Resize to 299x299
                face_image = Image.fromarray(face).resize((WIDTH, HEIGHT))
                # Preprocess the image
                face_array = np.array(face_image)
                face_array = preprocess_input(face_array)
                frames.append(face_array)
            else:
                # If no face is detected, use a black image
                face_array = np.zeros((HEIGHT, WIDTH, 3), dtype=np.float32)
                frames.append(face_array)
        idx += 1

    cap.release()

    # If fewer frames were collected, pad with the last frame
    if len(frames) < num_frames:
        if frames:
            last_frame = frames[-1]
        else:
            # If no frames were collected, use a black image
            last_frame = np.zeros((HEIGHT, WIDTH, 3), dtype=np.float32)
        frames += [last_frame] * (num_frames - len(frames))

    # Convert to NumPy array and add batch dimension
    video_array = np.array(frames)  # Shape: (num_frames, HEIGHT, WIDTH, 3)
    video_array = np.expand_dims(video_array, axis=0)  # Shape: (1, num_frames, HEIGHT, WIDTH, 3)
    return video_array


In [None]:
def build_model(lstm_hidden_size=256, num_classes=2, dropout_rate=0.5):
    # Input shape: (batch_size, TIME_STEPS, HEIGHT, WIDTH, 3)
    inputs = layers.Input(shape=(TIME_STEPS, HEIGHT, WIDTH, 3))

    # TimeDistributed layer to apply the base model to each frame
    base_model = keras.applications.Xception(weights='imagenet', include_top=False, pooling='avg')
    # For inference, we don't need to set trainable, but if you plan to retrain, you can set accordingly
    # base_model.trainable = False

    # Apply TimeDistributed wrapper
    x = layers.TimeDistributed(base_model)(inputs)
    # x shape: (batch_size, TIME_STEPS, 2048)

    # LSTM layer
    x = layers.LSTM(lstm_hidden_size)(x)

    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = keras.Model(inputs, outputs)
    return model

# Load the model architecture
model = build_model()
# Load weights into the model
model.load_weights(model_path)

In [None]:
video_path = '/content/drive/MyDrive/Dataset DDM/FF++/manipulated_sequences/FaceShifter/raw/videos/724_725.mp4'

# Process the video and get the preprocessed frames
video_array = extract_faces_from_video(video_path, num_frames=TIME_STEPS)

# Make prediction
predictions = model.predict(video_array)
# Get the predicted class (0 for real, 1 for fake)
predicted_class = np.argmax(predictions, axis=1)[0]
# Get class probabilities
probabilities = predictions[0]

# Output the results
class_names = ['Real', 'Fake']
print(f"Predicted Class: {class_names[predicted_class]}")
print(f"Class Probabilities: Real: {probabilities[0]:.4f}, Fake: {probabilities[1]:.4f}")