In [None]:
import sys
import sklearn
import tensorflow as tf

import cv2
import pandas as pd
import numpy as np

import plotly.graph_objs as go
from plotly.offline import iplot
from matplotlib import pyplot as plt

In [None]:
print("Deepfake Detection")


In [None]:
tf.test.is_gpu_available()


In [None]:
tf.__version__


In [None]:
import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

In [None]:
import os

def get_data():
    return pd.read_csv('/kaggle/input/deepfake-faces/metadata.csv')

In [None]:
meta=get_data()
meta.head()

In [None]:
meta.shape


In [None]:
len(meta[meta.label=='FAKE']),len(meta[meta.label=='REAL'])


In [None]:
real_df = meta[meta["label"] == "REAL"]
fake_df = meta[meta["label"] == "FAKE"]
sample_size = 8000

real_df = real_df.sample(sample_size, random_state=42)
fake_df = fake_df.sample(sample_size, random_state=42)

sample_meta = pd.concat([real_df, fake_df])

In [None]:
from sklearn.model_selection import train_test_split

Train_set, Test_set = train_test_split(sample_meta,test_size=0.2,random_state=42,stratify=sample_meta['label'])
Train_set, Val_set  = train_test_split(Train_set,test_size=0.3,random_state=42,stratify=Train_set['label'])

In [None]:
Train_set.shape,Val_set.shape,Test_set.shape


In [None]:
y = dict()

y[0] = []
y[1] = []

for set_name in (np.array(Train_set['label']), np.array(Val_set['label']), np.array(Test_set['label'])):
    y[0].append(np.sum(set_name == 'REAL'))
    y[1].append(np.sum(set_name == 'FAKE'))

trace0 = go.Bar(
    x=['Train Set', 'Validation Set', 'Test Set'],
    y=y[0],
    name='REAL',
    marker=dict(color='#33cc33'),
    opacity=0.7
)
trace1 = go.Bar(
    x=['Train Set', 'Validation Set', 'Test Set'],
    y=y[1],
    name='FAKE',
    marker=dict(color='#ff3300'),
    opacity=0.7
)

data = [trace0, trace1]
layout = go.Layout(
    title='Count of classes in each set',
    xaxis={'title': 'Set'},
    yaxis={'title': 'Count'}
)

fig = go.Figure(data, layout)
iplot(fig)

In [None]:
plt.figure(figsize=(15,15))
for cur,i in enumerate(Train_set.index[25:50]):
    plt.subplot(5,5,cur+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    
    plt.imshow(cv2.imread('/kaggle/input/deepfake-faces/faces_224/'+Train_set.loc[i,'videoname'][:-4]+'.jpg'))
    
    if(Train_set.loc[i,'label']=='FAKE'):
        plt.xlabel('FAKE Image')
    else:
        plt.xlabel('REAL Image')
        
plt.show()

In [None]:
def retreive_dataset(set_name):
    images,labels=[],[]
    for (img, imclass) in zip(set_name['videoname'], set_name['label']):
        images.append(cv2.imread('/kaggle/input/deepfake-faces/faces_224/'+img[:-4]+'.jpg'))
        if(imclass=='FAKE'):
            labels.append(1)
        else:
            labels.append(0)
    
    return np.array(images),np.array(labels)

In [None]:
X_train,y_train=retreive_dataset(Train_set)
X_val,y_val=retreive_dataset(Val_set)
X_test,y_test=retreive_dataset(Test_set)

In [None]:
from functools import partial
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

tf.random.set_seed(42)
DefaultConv2D = partial(Conv2D, kernel_size=3, padding="same", activation="relu", kernel_initializer="he_normal")

model = Sequential([
    DefaultConv2D(filters=64, kernel_size=7, input_shape=[224, 224, 3]),
    MaxPooling2D(),
    BatchNormalization(),
    
    DefaultConv2D(filters=128),
    DefaultConv2D(filters=128),
    MaxPooling2D(),
    BatchNormalization(),

    Flatten(),
    Dense(units=128, activation="relu", kernel_initializer="he_normal"),
    Dropout(0.5),
    BatchNormalization(),

    Dense(units=64, activation="relu", kernel_initializer="he_normal"),
    Dropout(0.5),
    BatchNormalization(),

    Dense(units=1, activation="sigmoid")
])

# Compile the model with an appropriate learning rate
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])


In [None]:

model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=5,batch_size=64,
                    validation_data=(X_val, y_val))

In [None]:
score = model.evaluate(X_test, y_test)


In [None]:
train_set_raw=tf.data.Dataset.from_tensor_slices((X_train,y_train))
valid_set_raw=tf.data.Dataset.from_tensor_slices((X_val,y_val))
test_set_raw=tf.data.Dataset.from_tensor_slices((X_test,y_test))

In [None]:
tf.keras.backend.clear_session()  # extra code – resets layer name counter

batch_size = 32
preprocess = tf.keras.applications.xception.preprocess_input
train_set = train_set_raw.map(lambda X, y: (preprocess(tf.cast(X, tf.float32)), y))
train_set = train_set.shuffle(1000, seed=42).batch(batch_size).prefetch(1)
valid_set = valid_set_raw.map(lambda X, y: (preprocess(tf.cast(X, tf.float32)), y)).batch(batch_size)
test_set = test_set_raw.map(lambda X, y: (preprocess(tf.cast(X, tf.float32)), y)).batch(batch_size)

In [None]:
plt.figure(figsize=(12, 12))
for X_batch, y_batch in valid_set.take(1):
    for index in range(9):
        plt.subplot(3, 3, index + 1)
        plt.imshow((X_batch[index] + 1) / 2)  # rescale to 0–1 for imshow()
        if(y_batch[index]==1):
            classt='FAKE'
        else:
            classt='REAL'
        plt.title(f"Class: {classt}")
        plt.axis("off")

plt.show()

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip(mode="horizontal", seed=42),
    tf.keras.layers.RandomRotation(factor=0.05, seed=42),
    tf.keras.layers.RandomContrast(factor=0.2, seed=42)
])

In [None]:
plt.figure(figsize=(12, 12))
for X_batch, y_batch in valid_set.take(1):
    X_batch_augmented = data_augmentation(X_batch, training=True)
    for index in range(9):
        plt.subplot(3, 3, index + 1)
        # We must rescale the images to the 0-1 range for imshow(), and also
        # clip the result to that range, because data augmentation may
        # make some values go out of bounds (e.g., RandomContrast in this case).
        plt.imshow(np.clip((X_batch_augmented[index] + 1) / 2, 0, 1))
        if(y_batch[index]==1):
            classt='FAKE'
        else:
            classt='REAL'
        plt.title(f"Class: {classt}")
        plt.axis("off")

plt.show()

In [None]:
tf.random.set_seed(42)  # extra code – ensures reproducibility
base_model = tf.keras.applications.xception.Xception(weights="imagenet",
                                                     include_top=False)
avg = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
output = tf.keras.layers.Dense(1, activation="sigmoid")(avg)
model = tf.keras.Model(inputs=base_model.input, outputs=output)

In [None]:
for layer in base_model.layers:
    layer.trainable = False

In [None]:
optimizer = Adam(learning_rate=0.001)
model.compile(loss="binary_crossentropy", optimizer=optimizer,
              metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=3)

In [None]:
model.evaluate(test_set)


In [None]:
for layer in base_model.layers[56:]:
    layer.trainable = True

learning_rate = 0.001

optimizer = Adam(learning_rate=learning_rate)
model.compile(loss="binary_crossentropy", optimizer=optimizer,
              metrics=["accuracy"])
history = model.fit(train_set, validation_data=valid_set, epochs=20)

In [None]:
pip install torch torchvision
pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp38-cp38-linux_x86_64.whl


In [None]:
import torch
import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.distributed.parallel_loader as pl
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import os
from sklearn.model_selection import train_test_split

# Define the Deepfake detection model for images
class ImageDeepfakeModel(nn.Module):
    def __init__(self, num_classes=2):
        super(ImageDeepfakeModel, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        in_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.resnet(x)

# Custom dataset class for image-based Deepfake detection
class ImageDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.classes = {'real': 0, 'fake': 1}  # Assign labels manually

        # List all image files in the data directory
        self.file_list = [file for file in os.listdir(data_dir) if file.endswith(('.jpg', '.png', '.jpeg'))]

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        filename = self.file_list[idx]
        filepath = os.path.join(self.data_dir, filename)

        # Read image using PIL
        image = Image.open(filepath).convert('RGB')

        # Assign a label based on the filename
        label = self.classes['fake'] if 'fake' in filename.lower() else self.classes['real']

        if self.transform:
            image = self.transform(image)

        return image, label

# Set your data directory
data_directory = "/path/to/your/images"  # Adjust the path accordingly

# Define transformations for the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Create the dataset and dataloader
dataset = ImageDataset(data_dir=data_directory, transform=transform)
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

# Initialize the model, loss function, and optimizer
def train_model(rank, model, train_loader, optimizer, criterion, num_epochs=5):
    device = xm.xla_device()
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            xm.optimizer_step(optimizer)

        print(f"Epoch {epoch + 1}/{num_epochs} completed")

# Create a DataLoader with TPU support
def get_loader(dataset, batch_size=32, shuffle=True):
    device = xm.xla_device()
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        dataset,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=shuffle
    )
    train_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        sampler=train_sampler,
        num_workers=1,
        drop_last=True,
    )
    return train_loader

# Create the DataLoader for training
train_loader = get_loader(train_dataset, batch_size=32, shuffle=True)

# Initialize the model, loss function, and optimizer
model = ImageDeepfakeModel()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model using TPU
xmp.spawn(train_model, args=(model, train_loader, optimizer, criterion), nprocs=8, start_method='fork')


In [None]:
import tensorflow as tf
import cv2
import numpy as np

# Load the saved model
model = tf.keras.models.load_model('/kaggle/input/deepfakemodel/DeepfakeV3.keras')

# Function for image preprocessing
def preprocess_image(image_path):
    # Load the image using OpenCV
    image = cv2.imread(image_path)
    
    # Resize the image to the input size expected by the model
    input_size = (224, 224)  # Adjust based on your model's input size
    image = cv2.resize(image, input_size)

    # Normalize pixel values to be between 0 and 1
    image = image / 255.0

    # Expand dimensions to match the input shape expected by the model
    image = np.expand_dims(image, axis=0)

    return image

# Example: Test image path
test_image_path = '/kaggle/input/test1234/FakeBirla.jpg'

# Preprocess the test image
processed_test_image = preprocess_image(test_image_path)

# Make predictions
predictions = model.predict(processed_test_image)

# Get the predicted label directly
predicted_label = "fake" if predictions[0, 0] > 0.7 else "real"

# Print results
print(f"Predicted Label: {predicted_label}, Predicted Probability (Fake): {predictions[0, 0]}, Predicted Probability (Real): {1 - predictions[0, 0]}")