In [8]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/deepfake-detection-v1/tensorflow2/default/1/deepfake_detection_model_3.h5
/kaggle/input/deepfake-detection-challenge/sample_submission.csv
/kaggle/input/deepfake-detection-challenge/test_videos/qooxnxqqjb.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/hierggamuo.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/ocgdbrgmtq.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/icbsahlivv.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/prhmixykhr.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/ihglzxzroo.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/yarpxfqejd.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/orixbcfvdz.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/qxyrtwozyw.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/hqzwudvhih.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/ghnpsltzyn.mp4
/kaggle/input/deepfake-detection-challenge/test_videos/ziipxxchai.mp4
/kaggle

# Training Data Preparation

In [None]:
!pip install keras-nightly

In [1]:
import os
import cv2
import numpy as np
import pickle
from collections import defaultdict
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.inception_v3 import preprocess_input

# Constants
IMG_SIZE = (299, 299)  # Target image size for InceptionV3
MAX_FRAMES = 10        # Maximum number of frames per sequence
REAL_DIR = 'real'      # Directory containing images of real samples
FAKE_DIR = 'fake'      # Directory containing images of fake samples

def load_images_from_directory(directory, label):
    """
    Load images from the specified directory, group them by videoname, preprocess, and pad sequences.
    
    Parameters:
    - directory (str): Path to the directory containing images.
    - label (int): Label for the samples (0 for real, 1 for fake).
    
    Returns:
    - data (np.array): Array of processed sequences of images.
    - labels (np.array): Array of corresponding labels.
    """
    data = []
    labels = []
    video_frames = defaultdict(list)  # Dictionary to hold frames grouped by videoname

    # Iterate over all images in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.png'):
            # Extract the videoname (before the first underscore)
            video_name = filename.split('_')[0]
            filepath = os.path.join(directory, filename)
            img = cv2.imread(filepath)

            if img is not None:
                # Resize and preprocess the image
                img = cv2.resize(img, IMG_SIZE)
                img = img_to_array(img)
                img = preprocess_input(img)  # Preprocess using InceptionV3 preprocessing
                video_frames[video_name].append(img)
    
    # Process each set of images grouped by videoname
    for frames in video_frames.values():
        # Pad with zeros if frames are less than MAX_FRAMES
        while len(frames) < MAX_FRAMES:
            frames.append(np.zeros((299, 299, 3)))  # Zero-padding for missing frames

        # Limit to MAX_FRAMES if more frames are present
        frames = frames[:MAX_FRAMES]

        data.append(frames)
        labels.append(label)
    
    return np.array(data), np.array(labels)


In [3]:
# Load real and fake images
REAL_DIR = "/kaggle/input/deepfake-detection-challenge-dataset-face-images/real"
FAKE_DIR = "/kaggle/input/deepfake-detection-challenge-dataset-face-images/fake"
# Load real and fake images
x_real, y_real = load_images_from_directory(REAL_DIR, label=0)  # Label 0 for real
x_fake, y_fake = load_images_from_directory(FAKE_DIR, label=1)  # Label 1 for fake


In [4]:

# Combine real and fake data
x_data = np.concatenate([x_real, x_fake], axis=0)
y_data = np.concatenate([y_real, y_fake], axis=0)

# Split the data into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.2, random_state=42, stratify=y_data)



In [5]:
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, TimeDistributed, Dropout, GlobalAveragePooling2D, Input

# Define the custom feature extractor as a Keras Model
def create_feature_extractor():
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
    feature_extractor = tf.keras.Model(inputs=base_model.input, outputs=GlobalAveragePooling2D()(base_model.output))
    return feature_extractor

# Build the sequential model with TimeDistributed
def create_model():
    feature_extractor = create_feature_extractor()
    
    model = Sequential()
    model.add(Input(shape=(None, 299, 299, 3)))
    model.add(TimeDistributed(feature_extractor))
    model.add(LSTM(128, return_sequences=True)) # LSTM layer
    model.add(GRU(128)) # GRU layer
    model.add(Dropout(0.5)) # Dropout layer for regularization
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Output layer: binary classification
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create and summarize the model
model = create_model()
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [6]:
model.fit(x_train, y_train, epochs=20, batch_size=10, validation_data=(x_val, y_val))

Epoch 1/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m307s[0m 4s/step - accuracy: 0.5902 - loss: 0.7595 - val_accuracy: 0.5161 - val_loss: 0.6891
Epoch 2/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2s/step - accuracy: 0.6176 - loss: 0.6742 - val_accuracy: 0.5484 - val_loss: 0.7227
Epoch 3/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2s/step - accuracy: 0.4998 - loss: 0.6986 - val_accuracy: 0.6452 - val_loss: 0.6762
Epoch 4/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2s/step - accuracy: 0.5414 - loss: 0.7167 - val_accuracy: 0.4516 - val_loss: 0.7068
Epoch 5/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2s/step - accuracy: 0.6123 - loss: 0.6843 - val_accuracy: 0.6129 - val_loss: 0.6575
Epoch 6/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2s/step - accuracy: 0.6036 - loss: 0.6693 - val_accuracy: 0.5806 - val_loss: 0.6620
Epoch 7/20
[1m13/13[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7a7eec2653f0>

In [7]:
# Save the model in the .h5 format
model.save('/kaggle/working/deepfake_detection_model.h5')

In [12]:
import tensorflow as tf

# Load the model
model_path = '/kaggle/working/deepfake_detection_model.h5'
model = tf.keras.models.load_model(model_path)


ValueError: Layer count mismatch when loading weights from file. Model expected 1 layers, found 5 saved layers.

In [8]:
# Save model architecture
model_json = model.to_json()
with open('/kaggle/working/model_architecture.json', 'w') as json_file:
    json_file.write(model_json)

# Save model weights
model.save_weights('/kaggle/working/model.weights.h5')

In [9]:
import tensorflow as tf
from tensorflow.keras.models import model_from_json

# Load model architecture
with open('/kaggle/working/model_architecture.json', 'r') as json_file:
    model_json = json_file.read()

# Recreate the model from the architecture
model = model_from_json(model_json, custom_objects={'TimeDistributed': tf.keras.layers.TimeDistributed})

# Load model weights
model.load_weights('/kaggle/working/model.weights.h5')
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])


  saveable.load_own_variables(weights_store.get(inner_path))


In [35]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.inception_v3 import preprocess_input

# Load the trained model
#model = load_model('/kaggle/working/deepfake_detection_model_2.h5')

# Constants
IMG_SIZE = (299, 299)  # Image size expected by the model
MAX_FRAMES = 10        # Max frames to consider per video

def extract_frames_from_video(video_path, max_frames=MAX_FRAMES):
    """
    Extract and preprocess frames from a given video for prediction.
    
    Parameters:
    - video_path (str): Path to the input video file.
    - max_frames (int): Maximum number of frames to process from the video.

    Returns:
    - processed_frames (np.array): Array of processed frames ready for model prediction.
    """
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    processed_frames = []

    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Resize and preprocess the frame
        resized_frame = cv2.resize(frame, IMG_SIZE)
        frame_array = img_to_array(resized_frame)
        processed_frame = preprocess_input(frame_array)
        processed_frames.append(processed_frame)
        frame_count += 1

    cap.release()

    # Pad with zero frames if less than max_frames are present
    while len(processed_frames) < max_frames:
        processed_frames.append(np.zeros((299, 299, 3)))
    
    return np.array([processed_frames])

def predict_video(model, video_path):
    """
    Predict whether the video is REAL or FAKE based on extracted frames.

    Parameters:
    - model: The trained deepfake detection model.
    - video_path (str): Path to the input video file.

    Returns:
    - prediction (str): 'REAL' or 'FAKE' based on model prediction.
    """
    # Extract frames from the video
    frames = extract_frames_from_video(video_path)
    
    # Make predictions on frames
    predictions = model.predict(frames)
    print(predictions)
    # Aggregate predictions; if the average is above 0.5, classify as FAKE
    avg_prediction = np.mean(predictions)
    print(avg_prediction)
    if avg_prediction > 0.5:
        return 'FAKE'
    else:
        return 'REAL'

In [36]:
# Example usage
video_path = '/kaggle/input/deepfake-detection-challenge/test_videos/bcbqxhziqz.mp4'  # Replace with your video file path
result = predict_video(model, video_path)
print(f'The video is predicted to be: {result}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
[[0.6812479]]
0.6812479
The video is predicted to be: FAKE
