In [30]:
import os

print("Video Dataset Files:", os.listdir("/kaggle/input/deep-fake-detection-dfd-entire-original-dataset")[:10])
print("Image Dataset Files:", os.listdir("/kaggle/input/deepfake-and-real-images")[:10])

Video Dataset Files: ['DFD_original sequences', 'DFD_manipulated_sequences']
Image Dataset Files: ['Dataset']


In [31]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
print("Complete")

Complete


In [32]:
train_dir="/kaggle/input/deepfake-and-real-images/Dataset/Train"
test_dir="/kaggle/input/deepfake-and-real-images/Dataset/Test"
val_dir="/kaggle/input/deepfake-and-real-images/Dataset/Validation"

#Image Preprocessing
datagen=ImageDataGenerator(rescale=1./255,validation_split=0.2)

train_generator=datagen.flow_from_directory(
    train_dir,
    target_size=(128,128),
    batch_size=32,
    class_mode="binary",
    subset="training"
)

val_generator=datagen.flow_from_directory(
    val_dir,
    target_size=(128,128),
    batch_size=32,
    class_mode="binary",
    subset="validation"
)

test_generator=datagen.flow_from_directory(
    test_dir,
    target_size=(128,128),
    batch_size=32,
    class_mode="binary"
)

Found 112002 images belonging to 2 classes.
Found 7885 images belonging to 2 classes.
Found 10905 images belonging to 2 classes.


In [33]:
model=Sequential([
    Input(shape=(128,128,3)),
    Conv2D(16,(3,3),activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(32,(3,3),activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(64,activation='relu'),
    Dropout(0.5),
    Dense(1,activation='sigmoid')
])

#Compile the model
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

#Model summary
model.summary()

In [34]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=3  
)

Epoch 1/3
[1m3501/3501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m771s[0m 220ms/step - accuracy: 0.7603 - loss: 0.4815 - val_accuracy: 0.8642 - val_loss: 0.3004
Epoch 2/3
[1m3501/3501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m697s[0m 199ms/step - accuracy: 0.8848 - loss: 0.2705 - val_accuracy: 0.8968 - val_loss: 0.2419
Epoch 3/3
[1m3501/3501[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m688s[0m 197ms/step - accuracy: 0.9154 - loss: 0.2077 - val_accuracy: 0.9027 - val_loss: 0.2292


In [35]:
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc:.4f}")

[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 287ms/step - accuracy: 0.8425 - loss: 0.3498
Test Accuracy: 0.8419


In [44]:
model.save("/kaggle/working/deepfake_model.h5")

In [46]:
import IPython.display as display

output_path='/kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences'
video_files = os.listdir(output_path)
print("Available video files:", video_files[:100]) 
video_path = os.path.join(output_path, video_files[4])
display.Video(video_path)

Available video files: ['26__walking_down_street_outside_angry.mp4', '08__talking_against_wall.mp4', '14__walking_down_indoor_hall_disgust.mp4', '08__walking_down_street_outside_angry.mp4', '05__outside_talking_still_laughing.mp4', '14__exit_phone_room.mp4', '06__walk_down_hall_angry.mp4', '12__outside_talking_still_laughing.mp4', '02__talking_against_wall.mp4', '01__talking_against_wall.mp4', '24__outside_talking_still_laughing.mp4', '19__exit_phone_room.mp4', '03__hugging_happy.mp4', '12__walking_down_indoor_hall_disgust.mp4', '05__walk_down_hall_angry.mp4', '16__kitchen_pan.mp4', '18__walking_down_street_outside_angry.mp4', '16__walking_down_indoor_hall_disgust.mp4', '12__podium_speech_happy.mp4', '08__outside_talking_still_laughing.mp4', '03__secret_conversation.mp4', '27__kitchen_pan.mp4', '16__exit_phone_room.mp4', '11__talking_against_wall.mp4', '09__walk_down_hall_angry.mp4', '13__outside_talking_pan_laughing.mp4', '14__outside_talking_still_laughing.mp4', '07__talking_against_

In [47]:
import cv2

def extract_frames(video_path, output_folder, num_frames=100):
    cap = cv2.VideoCapture(video_path)
    count = 0

    while cap.isOpened() and count < num_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_folder, f"frame_{count}.jpg")
        cv2.imwrite(frame_path, frame)
        count += 1

    cap.release()
    print(f"Extracted {count} frames from {video_path}")


frames_output_path = "/kaggle/working/extracted_frames"
os.makedirs(frames_output_path, exist_ok=True)

extract_frames(video_path, frames_output_path)

Extracted 100 frames from /kaggle/input/deep-fake-detection-dfd-entire-original-dataset/DFD_original sequences/05__outside_talking_still_laughing.mp4


In [53]:
model = tf.keras.models.load_model("/kaggle/working/deepfake_model.h5")

test_frame = os.path.join(frames_output_path, "frame_0.jpg")
img = image.load_img(test_frame, target_size=(128,128))
img_array = image.img_to_array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)


prediction = model.predict(img_array)[0][0]
print("Prediction:", "Fake" if prediction > 0.5 else "Real")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
Prediction: Fake
