In [None]:
# Convert to ELA Images

from PIL import Image, ImageChops, ImageEnhance
import numpy as np

def convert_to_ela_image(path, quality):
    temp_filename = 'temp_file.jpg'
    ela_filename = 'temp_ela.png'

    image = Image.open(path).convert('RGB')
    image.save(temp_filename, 'JPEG', quality=quality)
    temp_image = Image.open(temp_filename)

    ela_image = ImageChops.difference(image, temp_image)
    extrema = ela_image.getextrema()
    max_diff = max([ex[1] for ex in extrema])
    scale = 255.0 / max_diff

    ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)

    return ela_image

In [None]:
# Building a CNN Model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

cnn = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')
])

cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Building a CNN Model using ResNet50

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.optimizers import Adam

# Loading pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

for layer in base_model.layers:
    layer.trainable = False

# Adding custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

rn50 = Model(inputs=base_model.input, outputs=predictions)
rn50.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Building a CNN Model using VGG16

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam

# Loading pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

for layer in base_model.layers:
    layer.trainable = False

# Adding custom layers
x = base_model.output
x = Flatten()(x)
x = Dense(4096, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

vgg16 = Model(inputs=base_model.input, outputs=predictions)
vgg16.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Training and Testing Pipeline

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import cv2

# Load images, convert them to ELA images, and prepare labels
def load_data(directory, target_size):
    images = []
    labels = []
    for label in ["real", "fake"]:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            ela_image = convert_to_ela_image(os.path.join(path, file), quality=90)
            ela_image = ela_image.resize(target_size)
            images.append(np.array(ela_image))
            labels.append(1 if label == "fake" else 0)
    return np.array(images), np.array(labels)

# Example
images, labels = load_data('./140K-RealFakeImages/real_vs_fake/real-vs-fake/train/', target_size=(150, 150))

# Split the data
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Data augmentation
train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=40, width_shift_range=0.2, 
                                   height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, 
                                   horizontal_flip=True, fill_mode='nearest')
train_generator = train_datagen.flow(X_train, y_train, batch_size=20)

# Train the CNN model
hist_cnn = cnn.fit(train_generator, steps_per_epoch=100, epochs=15, validation_data=(X_test, y_test), validation_steps=50)

# Train the ResNet model
hist_rn50 = rn50.fit(train_generator, steps_per_epoch=100, epochs=15, validation_data=(X_test, y_test), validation_steps=50)

# Train the VGG16 model
hist_vgg16 = vgg16.fit(train_generator, steps_per_epoch=100, epochs=15, validation_data=(X_test, y_test), validation_steps=50)

In [None]:
# Confusion Matrices

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

def plot_confusion_matrix(y_true, y_pred, title):
    matrix = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues')
    plt.title(title)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

# Predictions from models
y_pred_cnn = cnn.predict(X_test)
y_pred_resnet = rn50.predict(X_test)
y_pred_vgg = vgg16.predict(X_test)

# Convert predictions to binary (0 or 1)
y_pred_cnn_binary = (y_pred_cnn > 0.5).astype(int)
y_pred_resnet_binary = (y_pred_resnet > 0.5).astype(int)
y_pred_vgg_binary = (y_pred_vgg > 0.5).astype(int)

# Plot confusion matrices
plot_confusion_matrix(y_test, y_pred_cnn_binary, 'Basic CNN Model')
plot_confusion_matrix(y_test, y_pred_resnet_binary, 'ResNet50 Model')
plot_confusion_matrix(y_test, y_pred_vgg_binary, 'VGG16 Model')

In [None]:
# Experimenting with Hyper-Parameter Tuning for CNN

import keras_tuner as kt
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential

def build_model(hp):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(hp.Int('units', min_value=32, max_value=512, step=32), activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=3,
    directory='my_dir',
    project_name='cnn_tuning'
)

tuner.search(train_generator, 
             steps_per_epoch=100, 
             epochs=10, 
             validation_data=(X_test, y_test), 
             validation_steps=50)

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Learning Rate: {best_hps.get('learning_rate')}")
print(f"Best Number of Units: {best_hps.get('units')}")