In [None]:
# Final Project - Digital Forensics
# By: Ryan Joseph
# Date: 12/09/2023
# Description: This program trains a model to detect if an image is a deepfake or not.

In [207]:
# Necessary Depdendencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3, DenseNet121
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers

In [208]:
# paths to training and testing data
train_img_path = 'data/train'
test_img_path = 'data/test'

In [209]:
# image size and batch size
w,h = 224,224
batch_size = 256

# data generators
train_data_gen = ImageDataGenerator(
    rescale=1.0/255,
)
test_data_gen = ImageDataGenerator(rescale = 1.0/255)


# data generators mapping to the directories
train_Dataset = train_data_gen.flow_from_directory(
    train_img_path,
    target_size = (w,h),
    batch_size = batch_size,
    class_mode = 'binary',
    shuffle = True
)

test_Dataset = test_data_gen.flow_from_directory(
    test_img_path,
    target_size = (w,h),
    batch_size = batch_size,
    class_mode = 'binary',
    shuffle = False
)

Found 100000 images belonging to 2 classes.
Found 20000 images belonging to 2 classes.


In [210]:
# check the class indices
test_Dataset.class_indices

{'FAKE': 0, 'REAL': 1}

In [211]:
# check the shape of the images
test_Dataset.image_shape

(224, 224, 3)

In [212]:
# Load the MobileNetV2 model with pre-trained weights
base_model = DenseNet121(weights="imagenet", include_top=False, input_shape=(224, 224, 3))  # Use a valid input shape

# Create a new model by adding your custom top layers for binary classification
model = models.Sequential()
model.add(base_model)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

# Freeze the weights of the MobileNetV2 layers (optional)
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=optimizers.Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Print a summary of the model architecture
model.summary()



Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 densenet121 (Functional)    (None, 7, 7, 1024)        7037504   
                                                                 
 global_average_pooling2d_2  (None, 1024)              0         
 2 (GlobalAveragePooling2D)                                      
                                                                 
 dense_49 (Dense)            (None, 256)               262400    
                                                                 
 dropout_27 (Dropout)        (None, 256)               0         
                                                                 
 dense_50 (Dense)            (None, 1)                 257       
                                                                 
Total params: 7300161 (27.85 MB)
Trainable params: 262657 (1.00 MB)
Non-trainable params: 7037504 (26.85 MB)
__________

In [213]:
# load the weights of best model to train more
model.load_weights('weights/resnet.h5')

In [214]:
# number of epochs
EPOCHS = 2

# train the model
history = model.fit(train_Dataset,
                    steps_per_epoch=100000 // batch_size,
                    validation_data=test_Dataset,
                    validation_steps= 20000 // batch_size,
                    epochs=EPOCHS
                    )

Epoch 1/2
 15/390 [>.............................] - ETA: 32:26 - loss: 0.3305 - accuracy: 0.8565

KeyboardInterrupt: 

In [None]:
# save the model
model.save('weights/resnet_15.h5')

In [None]:
# plot the training and validation accuracy and loss at each epoch
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
# evaluate the model
score = model.evaluate(test_Dataset,
                        steps=10,
                        verbose=1)
print("Test loss:", score[0])
print("Test accuracy:", score[1])