In [None]:
import os
import random
import shutil
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import ParameterGrid

In [None]:
BASE_DIR = "/kaggle/input/deepfake-and-real-images/Dataset"
SAMPLED_DIR = "./sampled_data"
GRID_SAMPLED_DIR = "./sampled_data2"
os.makedirs(SAMPLED_DIR, exist_ok=True)
os.makedirs(GRID_SAMPLED_DIR, exist_ok=True)

In [None]:
# Function to sample num_samples images per class
def sample_images(source_dir, dest_dir, num_samples):
    os.makedirs(dest_dir, exist_ok=True)
    images = [f for f in os.listdir(source_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    selected_images = random.sample(images, min(len(images), num_samples))
    for img in selected_images:
        shutil.copy(os.path.join(source_dir, img), os.path.join(dest_dir, img))
        
# Select 2000 images for training, validation, and testing, 1000 for grid parameter search
for dataset in ["Train", "Validation", "Test"]:
    for label in ["Real", "Fake"]:
        source = os.path.join(BASE_DIR, dataset, label)
        dest = os.path.join(SAMPLED_DIR, dataset, label)
        dest2 = os.path.join(GRID_SAMPLED_DIR, dataset, label)
        sample_images(source, dest, 4000)
        print("Sample Images Done")
        sample_images(source, dest2, 800)
        print("Sample Grid Images Done")

In [None]:
# Display some sample images
train_fake_dir = os.path.join(SAMPLED_DIR, "Train", "Fake")
train_real_dir = os.path.join(SAMPLED_DIR, "Train", "Real")

plt.figure(figsize=(6, 6))
plt.subplot(1,2,1)
plt.imshow(tf.keras.utils.load_img(os.path.join(train_fake_dir, os.listdir(train_fake_dir)[0])))
plt.title('Sample Fake')

plt.subplot(1,2,2)
plt.imshow(tf.keras.utils.load_img(os.path.join(train_real_dir, os.listdir(train_real_dir)[0])))
plt.title('Sample Real')

plt.tight_layout()
plt.show()

In [None]:
# Early Stopping
# class MyCallback(tf.keras.callbacks.Callback):
#     def on_epoch_end(self, epoch, logs=None):
#         if logs.get('accuracy') > 0.90 and logs.get('val_accuracy') > 0.90:
#             self.model.stop_training = True

# custom_callback = MyCallback()

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=6,
    mode='min',
    min_delta=0.001,
    restore_best_weights=True
)

In [None]:
# Grid Model Definition

def create_model(optimiser,lr,batch_s):
    # Grid Image Data Generators
    grid_train_data = os.path.join(GRID_SAMPLED_DIR, "Train")
    grid_validation_data = os.path.join(GRID_SAMPLED_DIR, "Validation")
    grid_test_data = os.path.join(GRID_SAMPLED_DIR, "Test")
    
    grid_train_datagen = ImageDataGenerator(rescale=1./255)
    grid_val_datagen = ImageDataGenerator(rescale=1./255)
    grid_test_datagen = ImageDataGenerator(rescale=1./255)
    
    grid_train_generator = grid_train_datagen.flow_from_directory(grid_train_data,target_size=(128, 128),batch_size=batch_s,class_mode='categorical')
    grid_val_generator = grid_val_datagen.flow_from_directory(grid_validation_data,target_size=(128, 128),batch_size=batch_s,class_mode='categorical')
    grid_test_generator = grid_test_datagen.flow_from_directory(grid_test_data, target_size=(128, 128), batch_size=batch_s,class_mode='categorical')


    grid_model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        BatchNormalization(),
        tf.keras.layers.MaxPooling2D(2, 2),
        Dropout(0.20),
    
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.20),
    
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.20),
    
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        BatchNormalization(),
        Dropout(0.20),
    
        tf.keras.layers.Flatten(),
        Dropout(0.20),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(2, activation='softmax')
    ])
    if optimiser=="adam":
        grid_model.compile(loss="binary_crossentropy", optimizer=tf.optimizers.Adam(learning_rate=lr), metrics=['accuracy'])
    else:
        grid_model.compile(loss="binary_crossentropy", optimizer=tf.optimizers.RMSprop(learning_rate=lr), metrics=['accuracy'])
    return grid_model,grid_train_generator,grid_val_generator

In [None]:
param_grid = {
    'optimizer': ["adam","rmsprop"],
    'batch_size': [32,64],
    'learning_rate': [0.001, 0.0001]
}
grid = ParameterGrid(param_grid)

In [None]:
def train_grid_model(params):
    grid_model,train_generator,val_generator = create_model(params['optimizer'], params['learning_rate'], params['batch_size'])
    history = grid_model.fit(
        train_generator,
        epochs=20,
        validation_data=val_generator,
        verbose=1,
        callbacks=[early_stopping]
    )
    
    return grid_model, history

In [None]:
best_model=None
best_accuracy=0
best_params=None
val_accuracy_history={}
a=0

for params in grid:
    print(f"Training with params: {params}")
    curr_model, history = train_grid_model(params)
    val_accuracy = max(history.history['val_accuracy'])
    print(f"Validation Accuracy: {val_accuracy}\n")

    val_accuracy_history[a]=(params,history.history['val_accuracy'])
    a+=1
    
    if val_accuracy > best_accuracy:
        best_model = curr_model
        best_accuracy = val_accuracy
        best_params = params

In [None]:
plt.figure(figsize=(10,10))
for i in val_accuracy_history:
    params,acc_history=val_accuracy_history[i]
    plt.plot(acc_history,label=params)
plt.title("Grid Search Params vs Accuracies")
plt.legend()
plt.show()

In [None]:
if best_model:
    print(f"Best Hyperparameters are : {best_params}")

In [None]:
# Image Data Generators
best_batch_size=best_params['batch_size']
train_data = os.path.join(SAMPLED_DIR, "Train")
validation_data = os.path.join(SAMPLED_DIR, "Validation")
test_data = os.path.join(SAMPLED_DIR, "Test")

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(train_data,target_size=(128, 128),batch_size=best_batch_size,class_mode='categorical')
val_generator = val_datagen.flow_from_directory(validation_data,target_size=(128, 128),batch_size=best_batch_size,class_mode='categorical')
test_generator = test_datagen.flow_from_directory(test_data, target_size=(128, 128), batch_size=best_batch_size,class_mode='categorical')

In [None]:
# Model Definition
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    Dropout(0.20),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    BatchNormalization(),
    Dropout(0.20),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    BatchNormalization(),
    Dropout(0.20),

    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    BatchNormalization(),
    Dropout(0.20),

    tf.keras.layers.Flatten(),
    Dropout(0.20),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])
if best_params["optimizer"]=='adam':
    model.compile(loss='binary_crossentropy', optimizer=tf.optimizers.Adam(learning_rate=best_params["learning_rate"]), metrics=['accuracy'])
if best_params["optimizer"]=='rmsprop':
    model.compile(loss='binary_crossentropy', optimizer=tf.optimizers.RMSprop(learning_rate=best_params["learning_rate"]), metrics=['accuracy'])
    

In [None]:
# Training the Model
history = model.fit(
    train_generator,
    epochs=20,
    verbose=1,
    # callbacks=[custom_callback],      ###########################
    callbacks=[early_stopping],
    validation_data=val_generator
)

# Saving Model
model.save("model.h5")

In [None]:
train_loss, train_accuracy = model.evaluate(train_generator)
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"Best Model - Training Loss: {train_loss}, Training Accuracy: {train_accuracy}")
print(f"Best Model - Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}")

In [None]:
# Plotting Results
plt.figure(figsize=(5,10))
plt.subplot(1,2,1)
plt.plot(history.history["accuracy"])
plt.plot(history.history[f'val_accuracy'])
plt.title(f'Model Accuracy')
plt.ylabel("Accuracy")
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'])

plt.subplot(1,2,2)
plt.plot(history.history["loss"])
plt.plot(history.history[f'val_loss'])
plt.title(f'Model Loss')
plt.ylabel("Loss")
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'])
plt.show()

In [None]:
# Testing Model on test dataset
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Best Model - Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")