In [103]:
# libraries needed for the project - if you don't have any of them, install them using pip install <library name>

import pandas as pd
import numpy as np
import os
import csv
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import os
from PIL import Image
import matplotlib.pyplot as plt # for plotting the images


# libraries for the supervised deep learning classification model
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Model



In [105]:
# Define the image size to resize all images to
img_size = (64, 64)

# Get the list of image file names
filenames = os.listdir("../question_01/dataset")

# Initialize lists to store the images and labels
images = []
labels = []

# Load the images and create labels
for filename in filenames:
    # Load the image and resize it
    img = load_img(os.path.join("../question_01/dataset", filename), target_size=img_size)
    # Convert the image to a numpy array and normalize the pixel values
    images.append(img_to_array(img) / 255.0)
    # Create a label based on the filename
    if "real" in filename:
        labels.append(0)
    else:
        labels.append(1)

# Convert the lists to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Split the data into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.25, random_state=42)


In [106]:
print("Number of training examples:", len(X_train))
print("Number of test examples:", len(X_test))


Number of training examples: 112
Number of test examples: 38


In [113]:
def create_model():
    # Initialize the model
    model = Sequential()

    # Add a convolutional layer with 32 filters, a 3x3 kernel, and relu activation function
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)))

    # Add a max pooling layer with a 2x2 pool size
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Add another convolutional layer with 64 filters, a 3x3 kernel, and relu activation function
    model.add(Conv2D(64, (3, 3), activation='relu'))

    # Add another max pooling layer with a 2x2 pool size
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Flatten the tensor output from the previous layer
    model.add(Flatten())

    # Add a dense layer
    model.add(Dense(128, activation='relu'))

    # Add the output layer
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Print a summary of the model
    model.summary()

    return model


In [119]:
model = create_model()

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_30 (Conv2D)          (None, 62, 62, 32)        896       
                                                                 
 max_pooling2d_30 (MaxPoolin  (None, 31, 31, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_31 (Conv2D)          (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_31 (MaxPoolin  (None, 14, 14, 64)       0         
 g2D)                                                            
                                                                 
 flatten_15 (Flatten)        (None, 12544)             0         
                                                                 
 dense_30 (Dense)            (None, 128)             

In [115]:
'''
THIS HAS BEEN REMOVED IN V2 - DATA AUGMENTATION PROVIDED SOME SIGNIFICANT ISSUES - NEED TO REVISIT WHEN TIME PERMITS 
HOWEVER I WOULD OF PERFORMED DATA AUGMENTATION IN THE FOLLOWING WAY TO PREVENT OVERFITTING OF THE MODEL. I TRIED IT 
USING TWO DIFFERENT METHODS BUT IT DID NOT WORK AS EXPECTED. I EXPECTED THE FIRST 'DATAGEN' WAS ACTUALLY CAUSING OVERFITTING 
SO I TRIED TO USE LESS PARAMETERS IN THE SECOND 'DATAGEN' BUT IT DID NOT WORK EITHER. 

THE NEXT THING I WOULD OF TRIED IS ALTERING THE MODEL TO PREVENT OVERFITTING. I WOULD OF TRIED TO ADD DROPOUT LAYERS AS WELL 

'''
# data augmentation parameters - used to create more training data and prevent overfitting of the model
# datagen = ImageDataGenerator(
#     rescale=1./255,
#     rotation_range=20,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True,
#     fill_mode='nearest'
# )

# datagen = ImageDataGenerator(
#     rescale=1. / 255,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True)

# # fit the data augmentation generator to the training data
# datagen.fit(X_train)

# # fit the model on the augmented training data
# history = model.fit(datagen.flow(X_train, y_train, batch_size=16), epochs=10, validation_data=(X_test, y_test))



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [120]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [121]:
# print the accuracy of the model on the test data

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy}")


Test accuracy: 0.5263158082962036


In [122]:

# Define the path to the testing data directory
test_data_dir = "..\\rd_test_dataset\\rd_test_dataset"

# Set the image size
img_size = (64, 64)

# Initialize lists to store the preprocessed images, predictions, and labels
test_images = []
predictions = []

# Load and preprocess the testing images
for filename in os.listdir(test_data_dir):
    # Exclude the ".DS_Store" file because it was causing problems - not on MAC so not needed
    if not filename.startswith("."):
        # Check if the file is a PNG or JPEG image
        if filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg"):
            # Load the image and resize it
            img = load_img(os.path.join(test_data_dir, filename), target_size=img_size)
            # convert the image to a np array and normalize the values
            img = img_to_array(img) / 255.0
            # append the preprocessed image to the list
            test_images.append(img)

# convert the list of images to a numpy array
test_images = np.array(test_images)

# Make predictions using the model
predictions = model.predict(test_images)





In [123]:
# initialize a list to store the image names, prediction values, and labels
results = []

# convert predicted values to 'real' or 'fake' classification and store the results
for i in range(len(predictions)):
    image_name = [filename for filename in os.listdir(test_data_dir) if not filename.startswith(".")][i]
    prediction_value = predictions[i][0]
    label = 'fake' if prediction_value > 0.5 else 'real'
    results.append([image_name, prediction_value, label])

# Save the predictions to a CSV file
output_file = "predictions.csv"
with open(output_file, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Image Name", "Prediction Value", "Classification"])
    writer.writerows(results)

print(f"Predictions saved to {output_file}")


Predictions saved to predictions.csv
