# Install Packages

In [None]:
!conda install -c conda-forge opencv -y
!pip install facenet-pytorch

# Download Dataset

# DeepfakeTIMIT: Deepfakes
Any publication (eg. conference paper, journal article, technical report, book chapter, etc) resulting from the usage of DeepfakeTIMIT must cite the following paper:

    P. Korshunov and S. Marcel,
    DeepFakes: a New Threat to Face Recognition? Assessment and Detection.
    arXiv https://arxiv.org/abs/1812.08685 and Idiap Research Report (http://publications.idiap.ch/index.php/publications/show/3988)

In [46]:
!wget --no-check-certificate -O DeepfakeTIMIT.tar.gz "https://zenodo.org/records/4068245/files/DeepfakeTIMIT.tar.gz?download=1"
!tar -xf DeepfakeTIMIT.tar.gz
!rm DeepfakeTIMIT.tar.gz
!wget --no-check-certificate "https://my.pcloud.com/publink/show?code=XZLGvd7ZI9LjgIy7iOLzXBG5RNJzGFQzhTRy"

--2025-02-05 23:27:26--  https://my.pcloud.com/publink/show?code=XZLGvd7ZI9LjgIy7iOLzXBG5RNJzGFQzhTRy
Resolving my.pcloud.com (my.pcloud.com)... 45.131.244.10, 45.131.247.13, 45.131.247.15, ...
Connecting to my.pcloud.com (my.pcloud.com)|45.131.244.10|:443... connected.
  Unable to locally verify the issuer's authority.
HTTP request sent, awaiting response... 302 Found
Location: https://u.pcloud.link/publink/show?code=XZLGvd7ZI9LjgIy7iOLzXBG5RNJzGFQzhTRy [following]
--2025-02-05 23:27:26--  https://u.pcloud.link/publink/show?code=XZLGvd7ZI9LjgIy7iOLzXBG5RNJzGFQzhTRy
Resolving u.pcloud.link (u.pcloud.link)... 74.120.8.115, 74.120.9.94, 74.120.8.110, ...
Connecting to u.pcloud.link (u.pcloud.link)|74.120.8.115|:443... connected.
  Unable to locally verify the issuer's authority.
HTTP request sent, awaiting response... 404 Not Found
2025-02-05 23:27:27 ERROR 404: Not Found.



# VidTIMIT Audio-Video Dataset: True Images
The VidTIMIT dataset is Copyright © 2001 Conrad Sanderson.

Distribution and research usage of this dataset is permitted under the following conditions:

    1. This notice is left intact and not modified in any way.
    2. The dataset is provided as is. There is no warranty as to the fitness for any particular purpose.
    3. The author of the dataset is not responsible for any direct or indirect losses resulting from the use of the dataset.
    4. Any publication (eg. conference paper, journal article, technical report, book chapter, etc) resulting from the usage of VidTIMIT must cite the following paper:
        C. Sanderson and B.C. Lovell
        Multi-Region Probabilistic Histograms for Robust and Scalable Identity Inference.
        Lecture Notes in Computer Science (LNCS), Vol. 5558, pp. 199-208, 2009.

In [7]:
# List of file names
file_names = [
    "fadg0", "faks0", "fcft0", "fcmh0", "fdac1", "fdrd1", "fedw0", "felc0", "fjas0",    "fjem0", "fjre0", "fjwb0", "fkms0", "fram1",
    "mccs0", "mcem0", "mdab0", "mdbb0", "mdld0", "mgwt0", "mjar0", "mjsw0", "mmdb1", "mmdm2", "mpdf0", "mpgl0", "mrcz0", "mrgg0", "mrjo0",
    "msjs1", "mstk0", "mwbt0"
]

# Base URL
base_url = "https://zenodo.org/record/158963/files/{}.zip"

# Download folder
download_folder = "VidTIMIT"
os.makedirs(download_folder, exist_ok=True)

# Function to download files
def download_file(file_name):
    url = base_url.format(file_name)
    local_filename = os.path.join(download_folder, f"{file_name}.zip")
    
    print(f"Downloading {url}...")
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(local_filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
        print(f"Downloaded: {local_filename}")
    else:
        print(f"Failed to download {url}, Status Code: {response.status_code}")

# Download each file one at a time
for file_name in file_names:
    download_file(file_name)

# Function to extract and delete zip files
def extract_and_delete_zip(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(download_folder)
        print(f"Extracted: {zip_path}")
        os.remove(zip_path)
        print(f"Deleted: {zip_path}")
    except zipfile.BadZipFile:
        print(f"Error: {zip_path} is not a valid zip file.")

# Process all zip files in the folder
for file in os.listdir(download_folder):
    if file.endswith(".zip"):
        zip_path = os.path.join(download_folder, file)
        extract_and_delete_zip(zip_path)

print("All zip files extracted and deleted.")

Downloading https://zenodo.org/record/158963/files/mjar0.zip...
Downloaded: VidTIMIT/mjar0.zip


# Import Packages

In [1]:
import os
import requests
import zipfile

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
import numpy as np
import cv2
from facenet_pytorch import MTCNN, InceptionResnetV1

In [2]:
# Check for GPU
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

Using device: cpu


In [None]:
# Preprocessing Transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [None]:
def extract_faces_from_video(video_path, mtcnn, face_encoder, device=DEVICE, frame_interval=5):
    """Extracts and encodes faces from a video efficiently."""
    cap = cv2.VideoCapture(video_path)
    face_embeddings = []
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_count += 1
        if frame_count % frame_interval != 0:
            continue

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        faces, _ = mtcnn.detect(frame_rgb)
        if faces is not None:
            for box in faces:
                x1, y1, x2, y2 = map(int, box)
                face = frame_rgb[y1:y2, x1:x2]
                face_tensor = transform(face).unsqueeze(0).to(device)
                with torch.no_grad():
                    embedding = face_encoder(face_tensor)
                face_embeddings.append(embedding.cpu().numpy())
    cap.release()
    return np.mean(face_embeddings, axis=0) if face_embeddings else None

def detect_deepfake(real_video, unknown_video, model, threshold=0.7):
    """Compares two videos and determines if the unknown video is a deepfake."""
    real_embedding = extract_faces_from_video(real_video, mtcnn, face_encoder)
    unknown_embedding = extract_faces_from_video(unknown_video, mtcnn, face_encoder)

    if real_embedding is None or unknown_embedding is None:
        return "Error: Could not extract faces from one or both videos"

    real_tensor = torch.tensor(real_embedding).float().unsqueeze(0).to(DEVICE)
    unknown_tensor = torch.tensor(unknown_embedding).float().unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        similarity = model(real_tensor, unknown_tensor).item()

    return "REAL" if similarity > threshold else "DEEPFAKE"

In [None]:
# Example Usage
real_video_path = "real.mp4"
unknown_video_path = "unknown.mp4"

result = detect_deepfake(real_video_path, unknown_video_path, model)
print(f"Result: {result}")


In [3]:
# Load Face Detector (MTCNN) and Feature Extractor (FaceNet)
mtcnn = MTCNN(keep_all=True, device=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
face_encoder = InceptionResnetV1(pretrained='vggface2').eval().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Define Siamese Network with Temporal Modeling of Facial Movements
class SiameseNetworkTemporal(nn.Module):
    def __init__(self, hidden_dim=16, num_layers=2):
        super(SiameseNetworkTemporal, self).__init__()
        self.resnet = models.resnet50(models.ResNet50_Weights.DEFAULT)
        self.resnet.fc = nn.Identity()  # Remove final classification layer
        
        self.lstm = nn.LSTM(input_size=2048, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim * 2, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 32)
        )
    
    def forward(self, x1_seq, x2_seq):
        batch_size, seq_len, c, h, w = x1_seq.shape  # Extract dimensions
        x1_seq = x1_seq.view(batch_size * seq_len, c, h, w)  # Flatten batch & sequence
        x2_seq = x2_seq.view(batch_size * seq_len, c, h, w)
        
        f1_seq = self.resnet(x1_seq).view(batch_size, seq_len, -1)  # Extract features
        f2_seq = self.resnet(x2_seq).view(batch_size, seq_len, -1)

        f1_seq, _ = self.lstm(f1_seq)  # Pass through LSTM for facial movement analysis
        f2_seq, _ = self.lstm(f2_seq)
        
        f1 = torch.mean(f1_seq, dim=1)  # Average hidden states for temporal aggregation
        f2 = torch.mean(f2_seq, dim=1)
        
        f1, f2 = self.fc(f1), self.fc(f2)  # Fully connected layers
        similarity = torch.cosine_similarity(f1, f2)
        return similarity

# Initialize Model with Temporal Movement Analysis
model = SiameseNetworkTemporal().to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.TripletMarginLoss(margin=1.0, p=2)



In [None]:
# Define dummy input tensors to simulate a batch of video frames (batch_size=2, seq_len=5, channels=3, height=224, width=224)
dummy_x1 = torch.randn(2, 5, 3, 224, 224).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
dummy_x2 = torch.randn(2, 5, 3, 224, 224).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Check model output
try:
    output = model(dummy_x1, dummy_x2)
    print("Model Output:", output)
    print("Output Shape:", output.shape)
except Exception as e:
    print("Error:", e)


In [8]:
dummy_x1.shape

torch.Size([2, 5, 3, 224, 224])

In [12]:
batch_size, seq_len, c, h, w = dummy_x1.shape  # Extract dimensions
x1_seq = dummy_x1.view(batch_size * seq_len, c, h, w)  # Flatten batch & sequence
x1 = model.resnet(x1_seq).view(batch_size, seq_len, -1)
# model.lstm(x1)

In [15]:
model.lstm(x1)

(tensor([[[-0.0267,  0.1654,  0.0804, -0.0120, -0.1208,  0.0290, -0.1028,
            0.0491, -0.0563,  0.0492,  0.0735, -0.0279,  0.0368, -0.0032,
            0.0289,  0.0395, -0.0313,  0.1550, -0.1979,  0.0280, -0.1185,
            0.0214,  0.1448,  0.0384, -0.1222, -0.2314, -0.0457, -0.0318,
            0.1365,  0.0237, -0.0080,  0.0316],
          [-0.0493,  0.2520,  0.1263,  0.0124, -0.1799,  0.0362, -0.1554,
            0.0059, -0.0961,  0.0586,  0.0947, -0.0388,  0.1078,  0.0303,
            0.0412,  0.0412, -0.0227,  0.1311, -0.1563,  0.0477, -0.1851,
            0.0142,  0.1342,  0.0705, -0.1141, -0.1966, -0.0291, -0.0572,
            0.1406,  0.0244,  0.0275,  0.0513],
          [-0.0152,  0.2395,  0.0760,  0.0304, -0.1726,  0.0456, -0.1681,
           -0.0222, -0.1296,  0.0527,  0.1342, -0.0563,  0.0582, -0.0643,
            0.0433,  0.0600, -0.0213,  0.1027, -0.1112,  0.0169, -0.2132,
           -0.0124,  0.1067,  0.0448, -0.1113, -0.1838, -0.0246, -0.0724,
            0.11

In [14]:
x1.shape

torch.Size([2, 5, 2048])