In [None]:
import io
import itertools
import numpy as np
import sklearn.metrics
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import tensorflow as tf
import datetime
from tensorboard.plugins.hparams import api as hp
import matplotlib.pyplot as plt
import random
import os
from pathlib import Path
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Absolute paths to dataset directories
real_dir = 'E:\\schoolprojects\\capstone\\dataset\\real'
fake_dir = 'E:\\schoolprojects\\capstone\\dataset\\fakeV2\\fake-v2'
log_dir = "logs\\fit\\" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [None]:
# Constants
IMAGE_SIZE = (128, 128)
BATCH_SIZE = 32
NUM_EPOCHS = 25
BUFFER_SIZE = 70_000

In [None]:
# Display real image
Image.open(real_dir + '\\0.jpg')

In [None]:
# Display fake image
Image.open(fake_dir + '\\10.jpg')

In [None]:
# Adjusts quality of image and converts to jpg
def adjust_image_quality(path, quality):
    temp_filename = 'temp_file_name.jpg'
        
    image = Image.open(path).convert('RGB')
    image.save(temp_filename, 'JPEG', quality = quality)
    temp_image = Image.open(temp_filename)
        
    
    return temp_image

In [None]:
# Testing adjust_image_quality
image1 = adjust_image_quality(real_dir + '\\0.jpg', 80)
image1

In [None]:
# Resizes to image size variable and flattens to array
def prepare_image(path):
    return np.array(adjust_image_quality(path, 80).resize(IMAGE_SIZE)).flatten()/255.0

In [None]:
# Images in image_array, labels in label array: 0 for fake and 1 for real
image_array = []
label_array = []

In [None]:
# Iterates through directory for jpgs and pngs and adds to image and label arrays,
# real_or_fake should be a 0 for fake and 1 for real
# Source from https://www.kaggle.com/code/maikonikkobanaag/dl-project-efficientnetb4
# DL Project | EfficientNetB4
def get_images(path, real_or_fake):
    for dirname, _, filenames in os.walk(path):
        for filename in filenames:
            if filename.endswith('jpg') or filename.endswith('png'):
                full_path = os.path.join(dirname, filename)
                image_array.append(prepare_image(full_path))
                label_array.append(real_or_fake)
                if len(label_array) % 1000 == 0:
                    print(f'Processing {len(label_array)} images')
                if len(label_array) == 10000:
                    break



In [None]:
# Build image array of real and fake images
get_images(real_dir, 1)

In [None]:
get_images(fake_dir, 0)

In [None]:
# Save np arrays for future use without having to get_images each time
np.save("image_array_file", image_array)

In [None]:
np.save("label_array_file", label_array)

In [None]:
# Load np arrays
image_array = np.load("image_array_file.npy")

In [None]:
label_array = np.load("label_array_file.npy")

In [None]:
# Confirm length of arrays
len(image_array)
len(label_array)

In [None]:
# Format arrays
image_array = np.array(image_array)
image_array = image_array.reshape(-1, 128, 128, 3)
label_array = to_categorical(label_array, 2)

In [None]:
# Create train and validation sets
image_train, image_valid, label_train, label_valid = train_test_split(image_array, label_array, test_size = 0.2, random_state = 5)

In [None]:
# Discard image_array
image_array = image_array.reshape(-1, 1, 1, 1)

In [None]:
# Confirm length of training and validation data
print(len(image_train), len(label_train))
print(len(image_valid), len(label_valid))

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(50, 5, activation='relu', input_shape=(128,128,3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(50, 3, activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(2, activation='softmax')
    
])

In [None]:
model.summary(line_length = 75)

In [None]:
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits = False)

In [None]:
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

## Callbacks + Fitting

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# Define early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    mode = 'auto',
    min_delta = 0,
    patience = 2,
    verbose = 0,
    restore_best_weights = True
)

In [None]:
# ImageDataGenerator definition 
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [None]:
# Fit model
hist = model.fit(
    datagen.flow(image_train, label_train, batch_size = BATCH_SIZE),
    epochs = NUM_EPOCHS,
    callbacks = [tensorboard_callback, early_stopping],
    validation_data = (image_valid, label_valid),
    verbose = 2
)

In [None]:
model.save('model_24-3-19.keras')

## Tensorboard

In [None]:
%reload_ext tensorboard
%tensorboard --logdir "logs/fit"

## Confusion Matrix

In [None]:
# Source from https://www.kaggle.com/code/maikonikkobanaag/dl-project-efficientnetb4
# DL Project | EfficientNetB4
# Plotting
fig, ax = plt.subplots(2, 1)
ax[0].plot(hist.history['loss'], color='b', label="Training loss")
ax[0].plot(hist.history['val_loss'], color='r', label="Validation loss")
legend = ax[0].legend(loc='best', shadow=True)

ax[1].plot(hist.history['accuracy'], color='b', label="Training accuracy")
ax[1].plot(hist.history['val_accuracy'], color='r', label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)

In [None]:
# Source from https://www.kaggle.com/code/maikonikkobanaag/dl-project-efficientnetb4
# DL Project | EfficientNetB4
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues,
                          matrix_cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=matrix_cmap)  # Use matrix_cmap for the matrix
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
label_pred = model.predict(image_valid)
label_pred_classes = np.argmax(label_pred,axis=1) 
label_true = np.argmax(label_valid,axis=1) 
confusion_mtx = confusion_matrix(label_true, label_pred_classes) 
plot_confusion_matrix(confusion_mtx, classes=range(2), matrix_cmap=plt.cm.Reds)
plt.show()