# Library Intsallation

In [None]:
!pip install tensorflow opencv-python flask-ngrok

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [None]:
from google.colab import files
files.upload()


TypeError: 'NoneType' object is not subscriptable

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download and Extract Dataset

In [None]:
# Downloads Dataset from Kaggle
!kaggle competitions download -c deepfake-detection-challenge

# Extracts dataset
import zipfile
import os

# Creates a directory for extracted data
os.makedirs("dfdc_data", exist_ok=True)

# Unzips the dataset
with zipfile.ZipFile("deepfake-detection-challenge.zip", 'r') as zip_ref:
    zip_ref.extractall("dfdc_data")

print("Dataset extracted successfully!")
print("Files in extracted directory:", os.listdir("dfdc_data"))


# Frame Extraction

In [None]:
import cv2
import os

def extract_frames(video_path, output_dir, frame_rate=30):

    os.makedirs(output_dir, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % frame_rate == 0:  # Saves every frame
            frame_filename = os.path.join(output_dir, f"frame_{count}.jpg")
            cv2.imwrite(frame_filename, frame)
        count += 1

    cap.release()


# Load Metadata

In [None]:
import pandas as pd

# Loads metadata from metadata.json
import json
metadata_path = "dfdc_data/train_sample_videos/metadata.json"
with open(metadata_path, "r") as f:
    metadata = json.load(f)

# Assigns labels (i think 0 for REAL, 1 for FAKE)
labels = {video: 1 if info["label"] == "FAKE" else 0 for video, info in metadata.items()}
print(f"Number of real videos: {sum(1 for v in labels.values() if v == 0)}")
print(f"Number of fake videos: {sum(1 for v in labels.values() if v == 1)}")


# Assign Labels to Frames

In [None]:
frame_labels = []
frame_output_dir = "frames/train"
os.makedirs(frame_output_dir, exist_ok=True)

# This Extracts frames and assign labels
for video, label in labels.items():
    video_path = f"dfdc_data/train_sample_videos/{video}"
    video_frames_dir = os.path.join(frame_output_dir, video.split('.')[0])

    # Extracts frames for each video
    extract_frames(video_path, video_frames_dir, frame_rate=30)

    if os.path.exists(video_frames_dir):
        for frame_file in os.listdir(video_frames_dir):
            frame_labels.append((os.path.join(video_frames_dir, frame_file), label))

print(f"Number of frame-label pairs: {len(frame_labels)}")


# Preprocess Frames

In [None]:
import numpy as np

def preprocess_image(image_path, image_size=(128, 128)):
    """
    Preprocess image for model input.
    """
    image = cv2.imread(image_path)
    image = cv2.resize(image, image_size) / 255.0
    image = image.reshape(1, *image_size, 3)
    return image

def load_frame_data(frame_labels, image_size=(128, 128)):
    """
    Load all frames and labels for training.
    """
    images, labels = [], []
    for frame_path, label in frame_labels:
        image = cv2.imread(frame_path)
        if image is None:
            continue
        image = cv2.resize(image, image_size) / 255.0
        images.append(image)
        labels.append(label)
    return np.array(images), np.array(labels)

X, y = load_frame_data(frame_labels)
print(f"Loaded {len(X)} images and {len(y)} labels")


# Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Converts labels to one hot encoding
y = to_categorical(y)

# Splits the data intio train and test
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training data shape: {X_train.shape}, Validation data shape: {X_val.shape}")


# CNN Model Definition


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

# Define CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

# Compiles the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


# Train the Model

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=32
)

# Evaluate the model
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")


Epoch 1/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 2s/step - accuracy: 0.6351 - loss: 0.4990 - val_accuracy: 0.7212 - val_loss: 0.4263
Epoch 2/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 2s/step - accuracy: 0.7341 - loss: 0.4125 - val_accuracy: 0.7750 - val_loss: 0.5385
Epoch 3/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 2s/step - accuracy: 0.7555 - loss: 0.3896 - val_accuracy: 0.8250 - val_loss: 0.4187
Epoch 4/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 2s/step - accuracy: 0.8306 - loss: 0.3830 - val_accuracy: 0.8363 - val_loss: 0.4110
Epoch 5/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 2s/step - accuracy: 0.8297 - loss: 0.3681 - val_accuracy: 0.8225 - val_loss: 0.4113
Epoch 6/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 2s/step - accuracy: 0.8236 - loss: 0.3745 - val_accuracy: 0.8275 - val_loss: 0.4323
Epoch 7/10
[1m100/100

In [None]:
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 394ms/step - accuracy: 0.8282 - loss: 0.4930
Validation Accuracy: 83.13%


# Saving the Model

In [None]:
# Saves the model in the .keras format for use in bckend
model.save("deepfake_detector_test.keras")
print("Model saved as deepfake_detector.keras")


Model saved as deepfake_detector.keras


# Video Classification 4 use in backend

In [None]:
def classify_video(video_path, model, frame_rate=30):
    """
    Classify a video by analyzing frames.
    """
    temp_dir = "/tmp/video_frames"
    os.makedirs(temp_dir, exist_ok=True)

    extract_frames(video_path, temp_dir, frame_rate)

    predictions = []
    for frame_file in os.listdir(temp_dir):
        frame_path = os.path.join(temp_dir, frame_file)
        predictions.append(np.argmax(model.predict(preprocess_image(frame_path))))

    result = max(set(predictions), key=predictions.count)

    for file in os.listdir(temp_dir):
        os.remove(os.path.join(temp_dir, file))
    os.rmdir(temp_dir)

    return "REAL" if result == 0 else "FAKE"
