<a href="https://colab.research.google.com/github/ZeynepHeray/DeepFake_Detection/blob/main/deepfake_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#first version of application
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install face_recognition

Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566170 sha256=3488bb4ee81f2f31d5a9aae8b78a1768d5f132e12975500a6a217567dceb6a91
  Stored in directory: /root/.cache/pip/wheels/7a/eb/cf/e9eced74122b679557f597bb7c8e4c739cfcac526db1fd523d
Successfully built face-recognition-models
Installing collected packages: face-recognition-models, face_recognition
Successfully installed face-recognition-m

In [None]:
import json
import cv2
import os
import torch
import face_recognition
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, classification_report


In [None]:
# JSON dosyasını yükleme
with open('/content/drive/MyDrive/ trainData/metadata.json') as f:
    labels = json.load(f)

In [None]:
def extract_frames_and_labels(video_path, label, frame_interval=30):
    video_capture = cv2.VideoCapture(video_path)
    frames = []
    labels = []
    frame_count = 0

    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if not ret:
            break

        # Belirli aralıklarla frame al
        if frame_count % frame_interval == 0:
            face_locations = face_recognition.face_locations(frame)
            for face_location in face_locations:
                top, right, bottom, left = face_location
                face_image = frame[top:bottom, left:right]
                face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
                face_image = Image.fromarray(face_image)
                frames.append(face_image)
                labels.append(label)
        frame_count += 1

    video_capture.release()
    return frames, labels

In [None]:
# Eğitim ve validasyon frameleri ve etiketlerini toplama
all_frames = []
all_labels = []
for video_file, info in labels.items():
    label = 0 if info['label'] == 'REAL' else 1
    video_path = os.path.join('/content/drive/MyDrive/ trainData/dfdc_train_part_32', video_file)
    frames, frame_labels = extract_frames_and_labels(video_path, label)
    all_frames.extend(frames)
    all_labels.extend(frame_labels)

# Veri dönüştürme işlemleri
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Dataset oluşturma
class DeepfakeFramesDataset(Dataset):
    def __init__(self, frames, labels, transform=None):
        self.frames = frames
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.frames)

    def __getitem__(self, idx):
        image = self.frames[idx]
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Dataset ve DataLoader oluşturma
dataset = DeepfakeFramesDataset(all_frames, all_labels, transform=transform)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
# CNN Modeli tanımlama
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(64*30*30, 128)
        self.fc2 = nn.Linear(128, 2)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64*30*30)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN()

In [None]:
# Kayıp fonksiyonu ve optimizasyon
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Model eğitimi
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

    # Validasyon
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    print(f'Validation Loss: {val_loss/len(val_loader)}')

Epoch 1, Loss: 0.5506237546602885
Validation Loss: 0.5122129457692305
Epoch 2, Loss: 0.5223267258948119
Validation Loss: 0.5161361924062172
Epoch 3, Loss: 0.5158992468049286
Validation Loss: 0.5134463955958685
Epoch 4, Loss: 0.4622532058329809
Validation Loss: 0.43148843695720035
Epoch 5, Loss: 0.39898122374027495
Validation Loss: 0.3620518756409486
Epoch 6, Loss: 0.330874224168756
Validation Loss: 0.3835407417888443
Epoch 7, Loss: 0.29505241760816525
Validation Loss: 0.333775845511506
Epoch 8, Loss: 0.24663479241823394
Validation Loss: 0.3084087944589555
Epoch 9, Loss: 0.1966148923648886
Validation Loss: 0.3226826707832515
Epoch 10, Loss: 0.16003059434196937
Validation Loss: 0.32218177942559123


In [None]:
# Test verilerini framelere ayırma ve etiketlerini alma
def extract_test_frames_and_labels(json_path, video_dir, frame_interval=30):
    with open(json_path) as f:
        test_labels = json.load(f)

    all_test_frames = []
    all_test_labels = []
    for video_file, info in test_labels.items():
        label = 0 if info['label'] == 'REAL' else 1
        video_path = os.path.join(video_dir, video_file)
        frames, frame_labels = extract_frames_and_labels(video_path, label, frame_interval)
        all_test_frames.extend(frames)
        all_test_labels.extend(frame_labels)
    return all_test_frames, all_test_labels
# Test frame extraction
test_frames, test_labels = extract_test_frames_and_labels('/content/drive/MyDrive/testData/metadata.json', '/content/drive/MyDrive/testData/data')

# Test dataset ve DataLoader oluşturma
test_dataset = DeepfakeFramesDataset(test_frames, test_labels, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Test verileriyle modelin performansını değerlendirme
def classify_test_data(test_loader, model):
    model.eval()
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return all_predictions, all_labels

predicted_labels, true_labels = classify_test_data(test_loader, model)

# Performans değerlendirme
accuracy = accuracy_score(true_labels, predicted_labels)
report = classification_report(true_labels, predicted_labels)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n {report}')


Accuracy: 0.6371514694800301
Classification Report:
               precision    recall  f1-score   support

           0       0.14      0.21      0.17       912
           1       0.82      0.73      0.77      4396

    accuracy                           0.64      5308
   macro avg       0.48      0.47      0.47      5308
weighted avg       0.70      0.64      0.66      5308



In [None]:
sample_frames = []
sample_labels = []
count = 0
frame_interval = 30

with open('/content/drive/MyDrive/testData/metadata.json') as f:
    test_labels = json.load(f)

for video_file, info in test_labels.items():
    if count >= 10:
        break
    label = 0 if info['label'] == 'REAL' else 1
    video_path = os.path.join('/content/drive/MyDrive/testData/data', video_file)
    video_capture = cv2.VideoCapture(video_path)
    frame_count = 0
    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if not ret:
            break

        if frame_count % frame_interval == 0:
            face_locations = face_recognition.face_locations(frame)
            if face_locations:
                top, right, bottom, left = face_locations[0]
                face_image = frame[top:bottom, left:right]
                face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
                face_image = Image.fromarray(face_image)
                sample_frames.append(face_image)
                sample_labels.append(label)
                count += 1
                break
        frame_count += 1
    video_capture.release()

sample_transformed_frames = [transform(frame).unsqueeze(0) for frame in sample_frames]
model.eval()
with torch.no_grad():
    for i, frame in enumerate(sample_transformed_frames):
        output = model(frame)
        _, predicted = torch.max(output, 1)
        print(f'Video {i+1}:')
        print(f'Gerçek Etiket: {"REAL" if sample_labels[i] == 0 else "FAKE"}')
        print(f'Tahmin Edilen Etiket: {"REAL" if predicted.item() == 0 else "FAKE"}\n')


Video 1:
Gerçek Etiket: FAKE
Tahmin Edilen Etiket: FAKE

Video 2:
Gerçek Etiket: REAL
Tahmin Edilen Etiket: REAL

Video 3:
Gerçek Etiket: REAL
Tahmin Edilen Etiket: FAKE

Video 4:
Gerçek Etiket: FAKE
Tahmin Edilen Etiket: REAL

Video 5:
Gerçek Etiket: FAKE
Tahmin Edilen Etiket: REAL

Video 6:
Gerçek Etiket: FAKE
Tahmin Edilen Etiket: FAKE

Video 7:
Gerçek Etiket: FAKE
Tahmin Edilen Etiket: FAKE

Video 8:
Gerçek Etiket: FAKE
Tahmin Edilen Etiket: REAL

Video 9:
Gerçek Etiket: FAKE
Tahmin Edilen Etiket: FAKE

Video 10:
Gerçek Etiket: FAKE
Tahmin Edilen Etiket: FAKE



In [None]:
class DeeperCNN(nn.Module):
    def __init__(self):
        super(DeeperCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.conv3 = nn.Conv2d(64, 128, 3, 1)
        self.conv4 = nn.Conv2d(128, 256, 3, 1)
        self.fc1 = nn.Linear(256*15*15, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 2)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = self.pool(self.relu(self.conv4(x)))
        x = x.view(-1, 256*15*15)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model2 = DeeperCNN()

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Model eğitimi
num_epochs = 10
for epoch in range(num_epochs):
    model2.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

    # Validasyon
    model2.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    print(f'Validation Loss: {val_loss/len(val_loader)}')


Epoch 1, Loss: 0.11825048334147564
Validation Loss: 0.34932170777271193
Epoch 2, Loss: 0.09792607967438738
Validation Loss: 0.3714068247160564
Epoch 3, Loss: 0.09705354839504238
Validation Loss: 0.46602787244288874
Epoch 4, Loss: 0.08054328157453153
Validation Loss: 0.4120370297071834
Epoch 5, Loss: 0.07186811430276269
Validation Loss: 0.4599178630160168
Epoch 6, Loss: 0.06514994791713814
Validation Loss: 0.42928964966752875
Epoch 7, Loss: 0.05923913361593371
Validation Loss: 0.47269613776976865
Epoch 8, Loss: 0.051136774105197264
Validation Loss: 0.4768809984670952
Epoch 9, Loss: 0.046644350541995776
Validation Loss: 0.5004799435070405
Epoch 10, Loss: 0.0463238449283092
Validation Loss: 0.6808922912556833


In [None]:
def classify_test_data(test_loader, model):
    model2.eval()
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return all_predictions, all_labels

predicted_labels, true_labels = classify_test_data(test_loader, model)

# Performans değerlendirme
accuracy = accuracy_score(true_labels, predicted_labels)
report = classification_report(true_labels, predicted_labels)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n {report}')

Accuracy: 0.6554257724189902
Classification Report:
               precision    recall  f1-score   support

           0       0.15      0.22      0.18       912
           1       0.82      0.75      0.78      4396

    accuracy                           0.66      5308
   macro avg       0.49      0.48      0.48      5308
weighted avg       0.71      0.66      0.68      5308

