Set colab to access the images in Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Image sample folder setting

In [None]:
import shutil


# Define the source and destination folders
source_folder = '/content/drive/My Drive/RealData_resized'
destination_folder = '/content/drive/My Drive/Sample/Real'

# Ensure the destination folder exists
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

# Get the list of image files in the source folder
image_files = [f for f in os.listdir(source_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Specify the number of images you want to copy
number_of_images_to_copy = 600

# Copy the specified number of images to the destination folder
for i, image_file in enumerate(image_files):
    if i < number_of_images_to_copy:
        source_path = os.path.join(source_folder, image_file)
        destination_path = os.path.join(destination_folder, image_file)
        shutil.copy(source_path, destination_path)
    else:
        break

print(f'Copied {number_of_images_to_copy} images to {destination_folder}')


NameError: name 'os' is not defined

In [None]:
num_files = len([f for f in os.listdir(destination_folder) if os.path.isfile(os.path.join(destination_folder, f))])

print(f'There are {num_files} files in the folder.')

## Model

Import the required libraries

In [None]:
!pip install tensorflow

In [3]:
import os   # To navigate into the drive folders and access the images
import matplotlib.pyplot as plt   # To make plots
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator  # To process images in batches and apply transformations
from keras.applications import VGG19   # Import the VGG19 model
from keras.models import Sequential    # Allow us to add another layer to the pretrained model that makes the classification
from keras.layers import Flatten, Dense, Dropout   # Flatten the output layer to make it able for input in the Dense layer
from keras.optimizers import Adam    # Import the Adam optimizer


Defining paths to our images

In [4]:
# Define the base directory where your 'sample' folder is located
base_dir = '/content/drive/My Drive/Sample'

# Define the paths to the training, validation, and testing directories
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

Data normalization and ImageDataGenerator instances

In [5]:
# Rescale the pixel values from [0, 255] to [0, 1] for normalization

train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen  = ImageDataGenerator(rescale=1./255)
test_datagen  = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224,224),   # This is the input size required for the VGG19 model
    batch_size=20,  # It will be iterating from batches of 20 images
    class_mode= 'binary'  # Label the images in a binary way (Fake or Real)
)

Validation_generator = validation_datagen .flow_from_directory(
    validation_dir,
    target_size=(224,224),   # This is the input size required for the VGG19 model
    batch_size=20,  # It will be iterating from batches of 20 images
    class_mode= 'binary'  # Label the images in a binary way (Fake or Real)
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224,224),   # This is the input size required for the VGG19 model
    batch_size=20,  # It will be iterating from batches of 20 images
    class_mode= 'binary',  # Label the images in a binary way (Fake or Real)
    shuffle=False)  # No need to shuffle the test data

Found 800 images belonging to 2 classes.
Found 200 images belonging to 2 classes.
Found 200 images belonging to 2 classes.


In [6]:
# Load the VGG19 model pre-trained on ImageNet, excluding its top (fully connected) layers
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers of the base model to prevent them from being updated during training
for layer in base_model.layers:
    layer.trainable = False

# Create a new Sequential model and add the VGG19 base model without dropout
model = Sequential([
    base_model,
    Flatten(),  # Flatten the output of the base model to a 1D vector
    Dense(256, activation='relu'),  # Add a fully connected layer with 256 units and ReLU activation
    Dropout(0.4),  # Add dropout for regularization (reduce overfitting)
    Dense(1, activation='sigmoid')  # Add the output layer with sigmoid activation for binary classification
])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [7]:
# Model compilation with Adam activation and binary_crossentropy loss function, good for binary classifications
# Metrics also included

model.compile(optimizer=Adam(learning_rate= 1e-4),
               loss='binary_crossentropy',
               metrics=['accuracy', 'Precision', 'Recall'])

First model training

In [8]:
# Model training
num_epochs = 10

history = model.fit(
    train_generator,
    steps_per_epoch= 40,   # Batch size is 20, 20 times 40 is 800, the size of the training data
    epochs=num_epochs,
    validation_data=Validation_generator,
    validation_steps = 5
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
# Evaluate the model's performance on the test set
test_loss, test_accuracy, test_precision, test_recall = model.evaluate(test_generator)
print('Test accuracy:', test_accuracy)

Test accuracy: 0.8349999785423279


Based on the results, we se that 8 epochs is the optimal number of epochs to get the best results, we are making a second model increasing the dropout and setting just 8 epochs. Remember that it is working with sample data

In [14]:
# Freeze the layers of the base model to prevent them from being updated during training
for layer in base_model.layers:
    layer.trainable = False


model = Sequential([
    base_model,
    Flatten(),  # Flatten the output of the base model to a 1D vector
    Dense(256, activation='relu'),  # Add a fully connected layer with 256 units and ReLU activation
    Dropout(0.5),  # Add dropout for regularization (reduce overfitting)
    Dense(1, activation='sigmoid')  # Add the output layer with sigmoid activation for binary classification
])

# Model compilation with Adam activation and binary_crossentropy loss function, good for binary classifications
# Metrics also included

model.compile(optimizer=Adam(learning_rate= 1e-4),
               loss='binary_crossentropy',
               metrics=['accuracy', 'Precision', 'Recall'])

# Model training
num_epochs = 8

history = model.fit(
    train_generator,
    steps_per_epoch= 40,   # Batch size is 20, 20 times 40 is 800, the size of the training data
    epochs=num_epochs,
    validation_data=Validation_generator,
    validation_steps = 5
)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [15]:
# Evaluate the model's performance on the test set
test_loss, test_accuracy, test_precision, test_recall = model.evaluate(test_generator)
print('Test accuracy:', test_accuracy)

Test accuracy: 0.8299999833106995
