In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/pythonimport numpy as np
import warnings  # Import the 'warnings' module for handling warnings
warnings.filterwarnings("ignore") 

import os
from pathlib import Path
import numpy as np 
import pandas as pd 
import tensorflow as tf

#additional (for visualization)
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from PIL import Image, ImageChops, ImageEnhance

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from tensorflow.keras import Input
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping,  LearningRateScheduler
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.regularizers import l1, l2
from keras.preprocessing.image import ImageDataGenerator,load_img, img_to_array, array_to_img

In [None]:
dataset_path = Path('/kaggle/input/deepfake-and-real-images/Dataset/Train/')
output_directory = '/kaggle/working/output'
os.makedirs(output_directory, exist_ok=True)

In [None]:
file_names = []
labels = []

# Number of images to take from each folder
num_images_per_class = 5000

# Iterate through both real and fake folders
for label in ['Real', 'Fake']:
    label_path = dataset_path / label
    for file in sorted(label_path.glob('*.*'))[:num_images_per_class]:
        file_names.append(str(file))
        labels.append(label)

# Create a DataFrame
df = pd.DataFrame.from_dict({"images": file_names, "labels": labels})
print(df.shape)

In [None]:
df.head()

In [None]:
df['labels'].unique()

In [None]:
df_train, df_temp = train_test_split(df, test_size=0.2, random_state=42)
df_val, df_test= train_test_split(df_temp, test_size=0.5, random_state=42)

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=df_train,
    x_col='images',
    y_col='labels',
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=df_test,
    x_col='images',
    y_col='labels',
    class_mode='binary'
)
validation_generator = val_datagen.flow_from_dataframe(
    dataframe=df_val,
    x_col='images',
    y_col='labels',
    class_mode='binary',
)

In [None]:
unique_labels = df_train['labels'].unique()

for i, label in enumerate(unique_labels):
    print(f"Class {i}: Label {label}")

In [None]:
def plot_history(model_history):
    fig,ax=plt.subplots(figsize=(15,5))
    training_acc= [acc*100 for acc in model_history.history['accuracy']]
    validation_acc = [acc*100 for acc in model_history.history['val_accuracy']]
    ax.plot(training_acc, label='Training Accuracy')
    ax.plot(validation_acc , label='Validation Accuracy')
    ax.legend()
    ax.grid(True)
    ax.set_title('Model Accuracy')
    ax.set_ylabel('Accuracy (%)')
    ax.set_xlabel('Epochs')

In [None]:
def plot_images(generator, num_images, title):
    plt.figure(figsize=(10, 5))
    for i in range(num_images):
        batch = next(generator)
        image = batch[0][0]  # Extract the first image from the batch
        label = batch[1][0]  # Extract the corresponding label
        plt.subplot(2, 5, i + 1)
        plt.imshow(image)
        plt.title(f'{title} {i}, Label: {label}', pad=10)
        plt.axis('off')
    plt.subplots_adjust(hspace=0.8)
    plt.show()

In [None]:
def model_eval(model,train_generator,test_generator):
    train_evaluation = model.evaluate(train_generator)
    test_evaluation = model.evaluate(test_generator)

    print("Training Evaluation:")
    print("Loss:", train_evaluation[0])
    print("Accuracy:", train_evaluation[1])

    print("\nTest Evaluation:")
    print("Loss:", test_evaluation[0])
    print("Accuracy:", test_evaluation[1])

In [None]:
def class_report_matrix(model,test_generator,class_names):
    y_true = test_generator.classes
    y_pred = model.predict(test_generator)
    y_pred = np.where(y_pred > 0.48, 1, 0)
    y_pred = np.squeeze(y_pred)
    print(classification_report(y_true, y_pred))
    conf_matrix = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(8, 8))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

In [None]:
# Plot training images
plot_images(train_generator, num_images=3, title='Train')

# Plot test images
plot_images(validation_generator, num_images=3, title='Test')
unique_labels = df_train['labels'].unique()

# 0= Fake
# 1= Real

In [None]:
img_shape = 256

In [None]:
# input_img = tf.keras.layers.Input(shape=(img_shape, img_shape, 3)) #(accepts only 256, 256, 3)

# l1 = tf.keras.layers.Conv2D(32, (3, 3), padding='same', kernel_initializer='he_uniform', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(input_img)
# l2 = tf.keras.layers.MaxPool2D(padding='same')(l1)

# l3 = tf.keras.layers.Conv2D(64, (3, 3), padding='same', kernel_initializer='he_uniform', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(l2)
# l4 = tf.keras.layers.MaxPool2D(padding='same')(l3)

# l5 = tf.keras.layers.Conv2D(128, (3, 3), padding='same', kernel_initializer='he_uniform', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(l4)
# l6 = tf.keras.layers.MaxPool2D(padding='same')(l5)

# l7= Flatten()(l6)
# l7 = Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(l7)
# l7 = Dropout(0.5)(l7)
# output_img = Dense(1, activation='sigmoid')(l7)

detector1 =models.Sequential([
    layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu',kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001), input_shape=(img_shape, img_shape, 3)),
    layers.BatchNormalization(),
    layers.MaxPool2D(pool_size=(2,2)),
    layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
    layers.BatchNormalization(),
    layers.MaxPool2D(pool_size=(3,3)),
    layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
    layers.BatchNormalization(),
#     layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
#     layers.BatchNormalization(),
#     layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
#     layers.BatchNormalization(),
    layers.MaxPool2D(pool_size=(2,2)),
    layers.Flatten(),
    layers.Dense(256,activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1,activation='sigmoid')  
])

In [None]:
# detector1 = tf.keras.models.Model(inputs=(input_img), outputs=output_img)
detector1.compile(optimizer=Adam(learning_rate=0.00005), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
detector1.summary()

In [None]:
checkpoint1 = ModelCheckpoint(os.path.join(output_directory, "cnn_model_weights.h5"), 
                              save_best_only=True, 
                              monitor="val_loss", 
                              mode="min")

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', 
                               patience=5,
                               mode='min',
                               restore_best_weights=True)

In [None]:
history1 =detector1.fit(
    train_generator,
    epochs=100,
    validation_data=validation_generator,
    callbacks=[checkpoint1, early_stopping]
)

In [None]:
plot_history(history1)

In [None]:
detector1.load_weights(os.path.join(output_directory, "cnn_model_weights.h5"))

In [None]:
model_eval(detector1,train_generator,test_generator)

In [None]:
class_names = ["Real", "Fake"]
class_report_matrix(detector1,test_generator,class_names)

## Model with error-level analysis

In [None]:
def error_level_analysis(img_path):
    
    og_img = Image.open(img_path).convert('RGB')
    act_img = og_img
    temp_img_path = "temp.jpg"
    og_img.save(temp_img_path, quality=90)

    temp_img = Image.open(temp_img_path)
    ela_img = ImageChops.difference(og_img, temp_img)

    extrema = ela_img.getextrema()
    max_diff = max([ex[1] for ex in extrema])
    if max_diff == 0:
        max_diff = 1
    scale = 255.0 / max_diff

    ela_img = ImageEnhance.Brightness(ela_img).enhance(scale)

    return act_img, ela_img

In [None]:
def process_images(data_path, label, num_images=5000):
    act_images=[]
    images = []
    labels = []

    for filename in os.listdir(data_path)[:num_images]:
        if filename.endswith('.jpg'):
            img_path = os.path.join(data_path, filename)
            act_img, ela_img = error_level_analysis(img_path)
            act_images.append(act_img)
            images.append(ela_img)
            labels.append(label)

    return act_images, images, labels

act_real_images, real_images, real_labels = process_images('/kaggle/input/deepfake-and-real-images/Dataset/Train/Real', label=1)
act_fake_images, fake_images, fake_labels = process_images('/kaggle/input/deepfake-and-real-images/Dataset/Train/Fake', label=0)

In [None]:
real_df = pd.DataFrame({'images': real_images, 'labels': real_labels})
fake_df = pd.DataFrame({'images': fake_images, 'labels': fake_labels})
ogreal_df = pd.DataFrame({'ogimages': act_real_images})
ogfake_df = pd.DataFrame({'ogimages': act_fake_images})
df_ela = pd.concat([real_df, fake_df], ignore_index=True)
df_og = pd.concat([ogreal_df, ogfake_df], ignore_index=True)

df= pd.concat([df_og, df_ela], ignore_index=True)

In [None]:
# df_ela.head()

In [None]:
# df.head()

In [None]:
train_df, test_val_df = train_test_split(df_ela, test_size=0.2, random_state=42)
test_df, val_df = train_test_split(test_val_df, test_size=0.5, random_state=42)

In [None]:
train_images = [np.array(img) for img in train_df['images']]
test_images = [np.array(img) for img in test_df['images']]
val_images = [np.array(img) for img in val_df['images']]

X_train = np.array(train_images)/ 255.0
X_test = np.array(test_images)/ 255.0
X_val = np.array(val_images) / 255.0

y_train = train_df['labels'].values
y_test = test_df['labels'].values
y_val = val_df['labels'].values

In [None]:
def show_images(X,y, num_images=3):
    plt.figure(figsize=(10, 5))
    for i in range(num_images):
        plt.subplot(1, num_images, i + 1)
        plt.imshow(X[i])
        plt.title(f'Label: {y[i]}')
        plt.axis('off')

    plt.tight_layout()
    plt.show()

# Show images in the training set
show_images(X_train, y_train)

# Show images in the testing set
show_images(X_test, y_test)

# Show images in the validation set
show_images(X_val, y_val)


In [None]:
# detector2 =models.Sequential([
#     layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu',kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001), input_shape=(img_shape, img_shape, 3)),
#     layers.BatchNormalization(),
#     layers.MaxPool2D(pool_size=(2,2)),
#     layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
#     layers.BatchNormalization(),
#     layers.MaxPool2D(pool_size=(3,3)),
#     layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
#     layers.BatchNormalization(),
# #     layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
# #     layers.BatchNormalization(),
# #     layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
# #     layers.BatchNormalization(),
#     layers.MaxPool2D(pool_size=(2,2)),
#     layers.Flatten(),
#     layers.Dense(256,activation='relu'),
#     layers.Dropout(0.3),
#     layers.Dense(1,activation='sigmoid')  
# ])

detector2 =models.Sequential([
#     layers.Conv2D(filters=4, kernel_size = (6,6), activation='relu',kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001), input_shape=(img_shape, img_shape, 3)),
#     layers.MaxPool2D(),
#     layers.BatchNormalization(),
#     layers.Conv2D(filters=8, kernel_size = (5,5), activation='relu',kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
#     layers.MaxPool2D(),
#     layers.BatchNormalization(),
#     layers.Conv2D(filters=16, kernel_size=(4,4), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
#     layers.MaxPool2D(),
#     layers.BatchNormalization(),
#     layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.0001)),
    layers.Conv2D(filters=32, kernel_size = (3,3), activation='relu',kernel_initializer='he_uniform', kernel_regularizer=l2(0.00001), input_shape=(img_shape, img_shape, 3)),
    layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.00001)),
    layers.MaxPool2D(),
    layers.BatchNormalization(),
    layers.Conv2D(filters=64, kernel_size=(2,2), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.00001)),
    layers.Conv2D(filters=64, kernel_size=(2,2), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.00001)),
    layers.MaxPool2D(),
    layers.BatchNormalization(),
    layers.Conv2D(filters=128, kernel_size=(1,1), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.00001)),
    layers.Conv2D(filters=128, kernel_size=(1,1), activation='relu', padding="same",kernel_initializer='he_uniform', kernel_regularizer=l2(0.00001)),
    layers.MaxPool2D(),
    layers.BatchNormalization(),
    layers.Flatten(),
    layers.Dense(256,activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1,activation='sigmoid')  
])

In [None]:
initial_learning_rate = 0.001
lr_schedule = ExponentialDecay(
    initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True
)

rms_opt = RMSprop(learning_rate=lr_schedule, rho=0.9, epsilon=1e-08)

In [None]:
detector2.compile(optimizer=Adam(learning_rate=0.000001), loss='binary_crossentropy', metrics=['accuracy'])
# detector2.compile(optimizer=rms_opt, loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
detector2.summary()

In [None]:
checkpoint2 = ModelCheckpoint(os.path.join(output_directory, "cnn_ela_model_weights.h5"), 
                              save_best_only=True, 
                              monitor="val_accuracy", 
                              mode="min")

In [None]:
def lr_schedule(epoch):
    initial_lr = 0.001
    if epoch < 50:
        return initial_lr
    elif epoch < 75:
        return initial_lr * 0.1
    else:
        return initial_lr * 0.01

lr_scheduler = LearningRateScheduler(lr_schedule)

In [None]:
history2 = detector2.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[checkpoint2, early_stopping])

In [None]:
plot_history(history2)

In [None]:
detector2.load_weights(os.path.join(output_directory, "cnn_ela_model_weights.h5"))

In [None]:
train_evaluation = detector2.evaluate(X_train, y_train)
test_evaluation = detector2.evaluate(X_test, y_test)

print("Training Evaluation:")
print("Loss:", train_evaluation[0])
print("Accuracy:", train_evaluation[1])

print("\nTest Evaluation:")
print("Loss:", test_evaluation[0])
print("Accuracy:", test_evaluation[1])

In [None]:
class_names = ['Fake','Real']
y_true = y_test
y_pred = detector2.predict(X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)
y_pred = np.squeeze(y_pred)
print(classification_report(y_true, y_pred))
conf_matrix = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(8, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [None]:
train_dir = '/kaggle/input/deepfake-and-real-images/Dataset/Train'
test_dir = '/kaggle/input/deepfake-and-real-images/Dataset/Test'
validation_dir = '/kaggle/input/deepfake-and-real-images/Dataset/Validation'

# Ensemble Learning Model with ELA images

# Transfer Learning model with VGG19 using ELA images