In [3]:
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18
import numpy as np
from PIL import Image

try:
    from torchvision.io import read_video
except ImportError:
    raise ImportError("PyAV is not installed. Install it using: pip install av")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device used:", device)

# Dataset Preparation
class VideoDataset(Dataset):
    def __init__(self, folder_path, metadata_path, transform=None, num_frames=5):
        self.folder_path = folder_path
        self.transform = transform
        self.num_frames = num_frames
        with open(metadata_path, 'r') as f:
            self.metadata = json.load(f)
        self.videos = list(self.metadata.keys())

    def __len__(self):
        return len(self.videos)

    def __getitem__(self, idx):
        video_name = self.videos[idx]
        video_path = os.path.join(self.folder_path, video_name)
        label = 1 if self.metadata[video_name]['label'] == 'FAKE' else 0

        cap = cv2.VideoCapture(video_path)
        frames = []
        success, frame = cap.read()
        while success and len(frames) < self.num_frames:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = Image.fromarray(frame)  # Convert to PIL Image
            if self.transform:
                frame = self.transform(frame)
            frames.append(frame)
            success, frame = cap.read()
        cap.release()

        # Pad with black frames if not enough frames
        while len(frames) < self.num_frames:
            frames.append(torch.zeros((3, 224, 224)))

        frames = torch.stack(frames[:self.num_frames])
        return frames, torch.tensor(label, dtype=torch.float32)

# Transform for preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# Loading datasets
metadata_path = r"C:\\Users\\yakup\\OneDrive\\Desktop\\Special Dataset\\train_sample_videos\\metadata.json"
train_dataset = VideoDataset(r"C:\\Users\\yakup\\OneDrive\\Desktop\\Special Dataset\\train_sample_videos", metadata_path, transform=transform, num_frames=5)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0, pin_memory=False)

# Model Definition (ResNet-18 as base)
class FakeVideoDetector(nn.Module):
    def __init__(self):
        super(FakeVideoDetector, self).__init__()
        self.resnet = resnet18(weights='IMAGENET1K_V1')
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 1)  # Binary classification

    def forward(self, x):
        # Input shape: (B, F, C, H, W) -> ResNet processes single images
        B, F, C, H, W = x.size()
        x = x.view(B * F, C, H, W)  # Combine batch and frames
        x = self.resnet(x)
        x = x.view(B, F, -1).mean(dim=1)  # Average over frames
        return x

# Initialize model, loss, optimizer
model = FakeVideoDetector().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training Loop
for epoch in range(10):
    model.train()
    total_loss = 0
    for videos, labels in train_loader:
        videos, labels = videos.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(videos)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

Device used: cuda


error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\core\src\alloc.cpp:73: error: (-4:Insufficient memory) Failed to allocate 6220800 bytes in function 'cv::OutOfMemoryError'


In [None]:
# Save Model
torch.save(model.state_dict(), "fake_video_detector_more_epochs.pth")
print("Model saved successfully.")

In [None]:
model.eval()  # Set the model to evaluation mode

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Evaluate the model and gather predictions and true labels
model.load_state_dict(torch.load("fake_video_detector.pth"))
model.eval()

all_preds = []
all_labels = []

# Perform evaluation
with torch.no_grad():
    for videos, labels in validation_loader:
        videos, labels = videos.to(device), labels.to(device)
        outputs = model(videos)
        preds = (torch.sigmoid(outputs.squeeze()) > 0.5).int()
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute and display classification report
report = classification_report(all_labels, all_preds, target_names=["REAL", "FAKE"])
print("Classification Report:\n")
print(report)

# Compute and plot confusion matrix
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["REAL", "FAKE"])

plt.figure(figsize=(8, 6))
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()


In [None]:
import cv2
import os
import torch
from torchvision.transforms import transforms
from PIL import Image

# Define the test folder path
test_folder = r"C:/Users/yakup/OneDrive/Desktop/Special Dataset/test_videos"

# Function to preprocess video frames
def preprocess_video(video_path, transform, num_frames=5):
    cap = cv2.VideoCapture(video_path)
    frames = []
    success, frame = cap.read()
    while success and len(frames) < num_frames:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = Image.fromarray(frame)  # Convert to PIL Image
        frame = transform(frame)
        frames.append(frame)
        success, frame = cap.read()
    cap.release()

    # Pad with black frames if not enough frames
    while len(frames) < num_frames:
        frames.append(torch.zeros((3, 224, 224)))

    frames = torch.stack(frames[:num_frames])
    return frames.unsqueeze(0)  # Add batch dimension

# Transform for preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# Load the trained model
model.load_state_dict(torch.load("fake_video_detector.pth"))
model.eval()

# Predict for each video
print("Predictions for test videos:\n")
for video_name in os.listdir(test_folder):
    video_path = os.path.join(test_folder, video_name)
    if not video_path.lower().endswith(('.mp4', '.avi', '.mov')):  # Check video file extensions
        continue

    try:
        # Preprocess video
        video_tensor = preprocess_video(video_path, transform).to(device)

        # Predict using the model
        with torch.no_grad():
            output = model(video_tensor)
            pred = (torch.sigmoid(output.squeeze()) > 0.5).int().item()

        # Map prediction to label
        label = "FAKE" if pred == 1 else "REAL"
        print(f"{video_name}: {label}")

    except Exception as e:
        print(f"Error processing {video_name}: {e}")
