In [2]:
pip --version


pip 23.2.1 from c:\Users\Harshita\AppData\Local\Programs\Python\Python311\Lib\site-packages\pip (python 3.11)Note: you may need to restart the kernel to use updated packages.




In [3]:
pip install torch torchvision opencv-python numpy scikit-learn

Collecting torch
  Obtaining dependency information for torch from https://files.pythonhosted.org/packages/11/c5/2370d96b31eb1841c3a0883a492c15278a6718ccad61bb6a649c80d1d9eb/torch-2.6.0-cp311-cp311-win_amd64.whl.metadata
  Downloading torch-2.6.0-cp311-cp311-win_amd64.whl.metadata (28 kB)
Collecting torchvision
  Obtaining dependency information for torchvision from https://files.pythonhosted.org/packages/88/53/4ad334b9b1d8dd99836869fec139cb74a27781298360b91b9506c53f1d10/torchvision-0.21.0-cp311-cp311-win_amd64.whl.metadata
  Downloading torchvision-0.21.0-cp311-cp311-win_amd64.whl.metadata (6.3 kB)
Collecting opencv-python
  Obtaining dependency information for opencv-python from https://files.pythonhosted.org/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting numpy
  Obtaining dependency information for numpy from 


[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define a custom dataset
class VideoDataset(Dataset):
    def __init__(self, real_dir, fake_dir, transform=None, num_frames=10):
        self.real_dir = real_dir
        self.fake_dir = fake_dir
        self.transform = transform
        self.num_frames = num_frames

        # Get list of video paths and labels
        self.real_videos = [os.path.join(real_dir, fname) for fname in os.listdir(real_dir)]
        self.fake_videos = [os.path.join(fake_dir, fname) for fname in os.listdir(fake_dir)]
        self.video_paths = self.real_videos + self.fake_videos
        self.labels = [0] * len(self.real_videos) + [1] * len(self.fake_videos)

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        frames = self.extract_frames(video_path)
        label = self.labels[idx]
        if self.transform:
            frames = [self.transform(frame) for frame in frames]
        frames = torch.stack(frames)
        return frames, label

    def extract_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_indices = np.linspace(0, total_frames - 1, self.num_frames, dtype=int)

        for i in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)
        cap.release()
        return frames

# Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Reshape input to combine batch and frames dimensions
        batch_size, num_frames, C, H, W = x.size()
        x = x.view(-1, C, H, W)  # New shape: (batch_size * num_frames, C, H, W)
        
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten the features
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.sigmoid(self.fc2(x))
        
        # Reshape back to (batch_size, num_frames) and average over frames
        x = x.view(batch_size, num_frames)
        x = x.mean(dim=1).unsqueeze(1)  # Output shape: (batch_size, 1)
        return x

# Define transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the dataset
real_dir = r'C:\Users\Harshita\OneDrive\Desktop\wow\FF++\real'  # Path to real videos
fake_dir = r'C:\Users\Harshita\OneDrive\Desktop\wow\FF++\fake'  # Path to fake videos
dataset = VideoDataset(real_dir, fake_dir, transform=transform)

# Split the dataset into training and validation sets
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Initialize the model, loss function, and optimizer
model = SimpleCNN()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.float().unsqueeze(1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            predicted = (outputs > 0.5).float()
            total += labels.size(0)
            correct += (predicted.squeeze() == labels).sum().item()
    print(f"Validation Accuracy: {100 * correct / total}%")

# Save the model
torch.save(model.state_dict(), 'fake_video_detection_model.pth')

Epoch 1, Loss: 48.143014173209664
Validation Accuracy: 45.0%
Epoch 2, Loss: 48.75
Validation Accuracy: 45.0%
Epoch 3, Loss: 48.75
Validation Accuracy: 45.0%
Epoch 4, Loss: 48.75
Validation Accuracy: 45.0%
Epoch 5, Loss: 48.75
Validation Accuracy: 45.0%
Epoch 6, Loss: 48.75
Validation Accuracy: 45.0%
Epoch 7, Loss: 48.75
Validation Accuracy: 45.0%
Epoch 8, Loss: 48.75
Validation Accuracy: 45.0%
Epoch 9, Loss: 48.75
Validation Accuracy: 45.0%
Epoch 10, Loss: 48.75
Validation Accuracy: 45.0%


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define a custom dataset
class VideoDataset(Dataset):
    def __init__(self, real_dir, fake_dir, transform=None, num_frames=10):
        self.real_dir = real_dir
        self.fake_dir = fake_dir
        self.transform = transform
        self.num_frames = num_frames

        # Get list of video paths and labels
        self.real_videos = [os.path.join(real_dir, fname) for fname in os.listdir(real_dir)]
        self.fake_videos = [os.path.join(fake_dir, fname) for fname in os.listdir(fake_dir)]
        self.video_paths = self.real_videos + self.fake_videos
        self.labels = [0] * len(self.real_videos) + [1] * len(self.fake_videos)

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        frames = self.extract_frames(video_path)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        if self.transform:
            frames = [self.transform(frame) for frame in frames]
        frames = torch.stack(frames)
        return frames, label

    def extract_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_indices = np.linspace(0, max(1, total_frames) - 1, self.num_frames, dtype=int)
        
        for i in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            else:
                frame = np.zeros((224, 224, 3), dtype=np.uint8)  # Padding with black frames
            frames.append(frame)
        cap.release()
        return frames

# Define CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 1)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        batch_size, num_frames, C, H, W = x.size()
        x = x.view(-1, C, H, W)
        x = self.pool(nn.functional.relu(self.bn1(self.conv1(x))))
        x = self.pool(nn.functional.relu(self.bn2(self.conv2(x))))
        x = x.view(x.size(0), -1)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        x = x.view(batch_size, num_frames)
        x = x.mean(dim=1).unsqueeze(1)
        return x

# Define transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset
real_dir = r'C:\Users\Harshita\OneDrive\Desktop\wow\FF++\real'
fake_dir = r'C:\Users\Harshita\OneDrive\Desktop\wow\FF++\fake'
dataset = VideoDataset(real_dir, fake_dir, transform=transform)
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Initialize model, loss function, optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

    # Validation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(inputs)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Validation Accuracy: {100 * correct / total:.2f}%")

# Save model
torch.save(model.state_dict(), 'fake_video_detection_model.pth')


Epoch 1, Loss: 11.349386589229107
Validation Accuracy: 43.75%
Epoch 2, Loss: 1.5697441533207894
Validation Accuracy: 60.00%
Epoch 3, Loss: 0.6941670551896095
Validation Accuracy: 55.00%
Epoch 4, Loss: 0.6823463067412376
Validation Accuracy: 55.00%
Epoch 5, Loss: 0.7022960960865021
Validation Accuracy: 55.00%
Epoch 6, Loss: 0.6944482028484344
Validation Accuracy: 55.00%
Epoch 7, Loss: 0.6943198427557945
Validation Accuracy: 55.00%
Epoch 8, Loss: 0.6942238181829452
Validation Accuracy: 55.00%
Epoch 9, Loss: 0.6940780460834504
Validation Accuracy: 55.00%
Epoch 10, Loss: 0.693999619781971
Validation Accuracy: 55.00%


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Define a custom dataset
class VideoDataset(Dataset):
    def __init__(self, real_dir, fake_dir, transform=None, num_frames=16):
        self.real_dir = real_dir
        self.fake_dir = fake_dir
        self.transform = transform
        self.num_frames = num_frames

        # Get list of video paths and labels
        self.real_videos = [os.path.join(real_dir, fname) for fname in os.listdir(real_dir)]
        self.fake_videos = [os.path.join(fake_dir, fname) for fname in os.listdir(fake_dir)]
        self.video_paths = self.real_videos + self.fake_videos
        self.labels = [0] * len(self.real_videos) + [1] * len(self.fake_videos)

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        video_path = self.video_paths[idx]
        frames = self.extract_frames(video_path)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        if self.transform:
            frames = [self.transform(frame) for frame in frames]
        frames = torch.stack(frames)
        return frames, label

    def extract_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_indices = np.linspace(0, max(1, total_frames) - 1, self.num_frames, dtype=int)
        
        for i in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            else:
                frame = np.zeros((224, 224, 3), dtype=np.uint8)  # Padding with black frames
            frames.append(frame)
        cap.release()
        return frames

# Use ResNet-18 as feature extractor
class ResNetModel(nn.Module):
    def __init__(self):
        super(ResNetModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 1)
    
    def forward(self, x):
        batch_size, num_frames, C, H, W = x.size()
        x = x.view(-1, C, H, W)  # Merge batch and frames
        x = self.resnet(x)
        x = x.view(batch_size, num_frames)
        x = x.mean(dim=1).unsqueeze(1)
        return x

# Define transformations with augmentation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset
real_dir = r'C:\Users\Harshita\OneDrive\Desktop\wow\FF++\real'
fake_dir = r'C:\Users\Harshita\OneDrive\Desktop\wow\FF++\fake'
dataset = VideoDataset(real_dir, fake_dir, transform=transform)
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42, stratify=dataset.labels)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Initialize model, loss function, optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNetModel().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    scheduler.step()
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

    # Validation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(inputs)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Validation Accuracy: {100 * correct / total:.2f}%")

# Save model
torch.save(model.state_dict(), 'fake_video_detection_model.pth')


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\Harshita/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100.0%


Epoch 1, Loss: 0.7584500521421432
Validation Accuracy: 55.00%
Epoch 2, Loss: 0.6303750351071358
Validation Accuracy: 70.00%
Epoch 3, Loss: 0.5379576478153467
Validation Accuracy: 62.50%
Epoch 4, Loss: 0.4600734688341618
Validation Accuracy: 67.50%
Epoch 5, Loss: 0.4017051238566637
Validation Accuracy: 71.25%
Epoch 6, Loss: 0.33631095234304664
Validation Accuracy: 67.50%
Epoch 7, Loss: 0.3343473393470049
Validation Accuracy: 66.25%
Epoch 8, Loss: 0.2713272036984563
Validation Accuracy: 61.25%
Epoch 9, Loss: 0.28030629493296144
Validation Accuracy: 63.75%
Epoch 10, Loss: 0.23544629868119954
Validation Accuracy: 70.00%


In [7]:
import torch

# Assuming you have a model instance
torch.save(model.state_dict(), "model_weights.pth")

# To load the model later
model.load_state_dict(torch.load("model_weights.pth"))
model.eval()  # Set to evaluation mode


ResNetModel(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_

In [7]:
pip install gradio

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import numpy as np
from torchvision import models
import gradio as gr

class VideoResNet(nn.Module):
    def __init__(self):
        super(VideoResNet, self).__init__()
        self.resnet = models.resnet18(weights=None)  # Consider using pretrained weights
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 1)
        
    def forward(self, x):
        batch_size, num_frames, C, H, W = x.size()
        x = x.view(-1, C, H, W)
        x = self.resnet(x)
        x = x.view(batch_size, num_frames)
        return x.mean(dim=1).unsqueeze(1)

def load_model(model_path='fake_video_detection_model.pth'):
    model = VideoResNet()
    state_dict = torch.load(model_path, map_location=torch.device('cpu'))
    model.load_state_dict(state_dict, strict=False)  # Removed key replacement
    model.eval()
    return model

def process_video(video_path, num_frames=16):
    cap = cv2.VideoCapture(video_path)
    frames = []
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Added normalization
    ])
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    selected_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    
    for i in selected_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = transform(frame)
        frames.append(frame)
    
    cap.release()
    
    if len(frames) == 0:
        raise ValueError("Error processing video: No frames were extracted.")
    
    return torch.stack(frames).unsqueeze(0)

def predict_video(video_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = load_model().to(device)
    model.eval()
    
    frames = process_video(video_path)
    frames = frames.to(device)

    with torch.no_grad():
        output = model(frames)
        probability = torch.sigmoid(output).mean().item()  # Apply sigmoid
    
    confidence = round(probability * 100, 2)
    
    if probability > 0.5:
        return f"Fake Video ({confidence}% confidence)"
    else:
        return f"Real Video ({100 - confidence}% confidence)"

demo = gr.Interface(
    fn=predict_video,
    inputs=gr.Video(label="Upload Video"),
    outputs="text",
    title="Deepfake Video Detector",
    description="Upload a video to check if it's real or fake."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


