# Fake Image Detection with Uniform LBP and CNN

 **Real vs Fake Images Detection with LBP(Local Binary Pattern) and CNN**
For more information, checkout this repo [https://github.com/shhotu010/FakeImageDetector](http://)

# Import Libraries

In [None]:
import numpy as np
from sklearn.model_selection import KFold
from tensorflow import keras

In [None]:
k = 5

In [None]:
# import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
np.random.seed(2)
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from keras.optimizers import Adam
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping

In [None]:
from skimage.feature import local_binary_pattern
from skimage import io
import cv2
from IPython.display import display

In [None]:
from PIL import Image, ImageChops, ImageEnhance
import os
import itertools

## Initial Preparation

### Setup the Path to directories
According to the environment, if this is GoogleColab or local machine

In [None]:
authentic_images = r"C:\Users\theCode\Desktop\img_datasets\CASIA2\Au"
tampered_images  = r"C:\Users\theCode\Desktop\img_datasets\CASIA2\Tp"

# If this is google-colab then import drive
IN_COLAB = False
try:
    from google.colab import drive
    IN_COLAB = True
except:
    pass
    
# If this google colab then mount the Google drive and update the paths    
if IN_COLAB:
    drive.mount('/content/drive')
    authentic_images = "/content/drive/MyDrive/CASIA2_Dataset/Authentic_Images"
    tampered_images  = "/content/drive/MyDrive/CASIA2_Dataset/Tampered_Images"
    print("This is GoogleColab")
else:
    print("This is local machine")

# Open a real Image and convert to LBP image

## Open a real image

In [None]:
real_image_path = authentic_images + "/Au_ani_00001.jpg"
Image.open(real_image_path)

## Convert Real Image to LBP image

In [None]:
real_image = io.imread(real_image_path, as_gray=True)
img_lbp = local_binary_pattern(real_image, 8, 1)
io.imshow(img_lbp, cmap='gray')
io.show()

# Open a fake Image and Convert to LBP image 

## Open a fake image

In [None]:
fake_image_path = tampered_images + "/Tp_D_NRN_S_N_ani10171_ani00001_12458.jpg"
Image.open(fake_image_path)

## Convert Fake Image to LBP Image

In [None]:
# io.imsave("test.png", img_lbp)

In [None]:
fake_image = cv2.imread(fake_image_path, 0)
img_lbp = local_binary_pattern(fake_image, 8, 1)
io.imshow(img_lbp, cmap='gray')
io.show()

# Dataset Preparation

## Set the important parameters ****** ****** ****** **** ***** 
back to original after testing

In [None]:
number_of_images = 4000
epochs = 40
batch_size = 32

## Data preparation starts here 

In [None]:
image_size = (128, 128, 3)

In [None]:
def prepare_image(image_path):
    image = io.imread(image_path, as_gray=True)
    # print(image.shape)
    image = local_binary_pattern(image, 8, 1, method='circular')
    # print(image.shape)
    image = np.resize(image, image_size)
    # print(image.shape)
    return image.flatten() / 255.0

In [None]:
# this is testing cell
prepared_image = prepare_image(real_image_path)
print("Shape of prepared image:", prepared_image.shape)
print("Dimensions of prepared image:", prepared_image.ndim)

## Extract LBP features from Authentic Images
Out of all authentic images, we take a certain number of random number of images for training and testing the model.

In [None]:
X = [] # ELA converted images
Y = [] # 1 for real, 0 for fake

In [None]:
import random

for dirname, _, filenames in os.walk(authentic_images):
    random.shuffle(filenames)
    filenames = filenames[:number_of_images]
    for filename in filenames:
        filenameInLowerCase = filename.lower()
        if filenameInLowerCase.endswith('jpg') or filenameInLowerCase.endswith('bmp'):
            full_path = os.path.join(dirname, filename)
            X.append(prepare_image(full_path))
            Y.append(1)
            if len(Y) % 50 == 0:
                print(f'{len(Y)}', end="...   ")
print("\nTotal images processed: ", len(Y))

## Extract LBP features from Tampered Images
Out of all tampered images, we take a certain number of random number of images for training and testing the model.

In [None]:
for dirname, _, filenames in os.walk(tampered_images):
    random.shuffle(filenames)
    filenames = filenames[:number_of_images]
    for filename in filenames:
        filenameInLowerCase = filename.lower()
        if filenameInLowerCase.endswith('jpg') or filenameInLowerCase.endswith('tif'):
            full_path = os.path.join(dirname, filename)
            X.append(prepare_image(full_path))
            Y.append(0)
            if len(Y) % 50 == 0:
                print(f'{len(Y)}', end="...   ")
print("\nTotal images processed: ", len(Y))

In [None]:
x = np.array(X)
y = to_categorical(Y, 2)
x = x.reshape(-1, 128, 128, 3)

In [None]:
type(x)

In [None]:
x.shape, y.shape

# CNN Model (using k-fold cross validation)

## Build and Train the Model

In [None]:
import numpy as np
from sklearn.model_selection import KFold
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dropout, Flatten, Dense

# Assuming you have X (input data) and y (target labels)

# Define the number of folds (k)
k = 5

# Define your CNN model architecture
def build_model():
    model = Sequential()
    model.add(
        Conv2D(filters=32,
               kernel_size=(5, 5),
               padding='valid',
               activation='relu',
               input_shape=(128, 128, 3)))
    model.add(
        Conv2D(filters=32,
               kernel_size=(5, 5),
               padding='valid',
               activation='relu',
               input_shape=(128, 128, 3)))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation='softmax'))
    return model

In [None]:
#Compile the model with the appropriate loss function, optimizer, and metrics
model = build_model()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
def plot_confusion_matrix(cm, 
                          classes = ['fake image', 'real image'],
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    plt.show()

In [None]:
def plot_graph_suresh(hist):
    fig, ax = plt.subplots(2,1)
    ax[0].plot(hist.history['loss'], color='b', label="Training loss", marker='o')
    ax[0].plot(hist.history['val_loss'], color='r', label="validation loss",axes =ax[0], marker='o')
    legend = ax[0].legend(loc='best', shadow=True)
    ax[1].plot(hist.history['accuracy'], color='b', label="Training accuracy", marker='o')
    ax[1].plot(hist.history['val_accuracy'], color='r',label="Validation accuracy", marker='o')
    legend = ax[1].legend(loc='best', shadow=True)
    ax[0].grid(True)
    ax[1].grid(True)
    plt.show()

In [None]:
def plot_confusion_matrix_suresh(model, X_val, y_val):
    # Predict the values from the validation dataset
    Y_pred = model.predict(X_val)
    # Convert predictions classes to one hot vectors 
    Y_pred_classes = np.argmax(Y_pred,axis = 1) 
    # Convert validation observations to one hot vectors
    Y_true = np.argmax(y_val,axis = 1) 
    # compute the confusion matrix
    confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
    # plot the confusion matrix
    plot_confusion_matrix(confusion_mtx)

In [None]:
def print_fold_number(fold_number):
    print("##############################################################################################")
    print(f"#############################  This is fold {fold_number} iteration ######################################")
    print("##############################################################################################")
    
def print_fold_plot_number(fold_number):
    print(f"**********************************************************************************************")
    print(f"*************************Plots for the {fold_number} fold ************************************")
    print(f"**********************************************************************************************")

In [None]:
# Initialize lists to store performance metrics across all folds
accuracy_scores = []
loss_scores = []

# Perform k-fold cross-validation
kf = KFold(n_splits=k, shuffle=True, random_state=42)
fold_number = 0
for train_index, val_index in kf.split(x):
    print_fold_number(fold_number)
    # Split the data into training and validation sets
    X_train, X_val = x[train_index], x[val_index]
    y_train, y_val = y[train_index], y[val_index]
    ########################################################################################################
    optimizer = RMSprop(learning_rate=0.0005, rho=0.9, epsilon=1e-08, decay=0.0)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
    early_stopping = EarlyStopping(monitor='val_acc', min_delta=0, patience=2, verbose=0, mode='auto')
    hist = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val), callbacks=[early_stopping])
    ########################################################################################################
    # Plot the loss and accuracy curves for training and validation 
    print_fold_plot_number(fold_number)
    plot_graph_suresh(hist)
    plot_confusion_matrix_suresh(model, X_val, y_val)
    ########################################################################################################
    loss, accuracy = model.evaluate(X_val, y_val)
    accuracy_scores.append(accuracy)
    loss_scores.append(loss)
    ########################################################################################################
    fold_number += 1
    ########################################################################################################

# Calculate the average performance metrics
avg_accuracy = np.mean(accuracy_scores)
avg_loss = np.mean(loss_scores)

# Print the average performance metrics
print('Average Accuracy:', avg_accuracy)
print('Average Loss:', avg_loss)

In [None]:
print(accuracy_scores, loss_scores)

## Save/load the Model
Load the model if already saved otherwise save the model

In [None]:
from keras.models import load_model
loaded_model = None
try:
    loaded_model = load_model(r"C:\Users\theCode\Desktop\LBP_\03 - LBP_CNN\models\suresh3.h5")
    print('Loaded the saved model.')
except:
    model.save(r"C:\Users\theCode\Desktop\LBP_\03 - LBP_CNN\models\suresh3.h5")
    print('Saved the model.')

# Graphs: Training Testing Loss and Accuracy Graph

In [None]:
plot_graph_suresh(hist)

# Confusion matrix

In [None]:
plot_confusion_matrix_suresh(model, X_val, y_val)

# Prediction

In [None]:
if loaded_model != None: model = loaded_model

In [None]:
class_names = ['fake', 'real']

## Check authenctic image prediction

In [None]:
real_image_path = authentic_images + "/Au_ani_00002.jpg"
image = prepare_image(real_image_path)
image = image.reshape(-1, 128, 128, 3)
y_pred = model.predict(image)
y_pred_class = np.argmax(y_pred, axis = 1)[0]
print(f'Class: {class_names[y_pred_class]} Confidence: {np.amax(y_pred) * 100:0.2f}')

## Check tampered image prediction

In [None]:
fake_image_path = tampered_images + "/Tp_D_NRN_S_N_ani10171_ani00001_12458.jpg"
image = prepare_image(fake_image_path)
image = image.reshape(-1, 128, 128, 3)
y_pred = model.predict(image)
y_pred_class = np.argmax(y_pred, axis = 1)[0]
print(f'Class: {class_names[y_pred_class]} Confidence: {np.amax(y_pred) * 100:0.2f}')

## Check tampered images predictions

In [None]:
# Check accuracy for tampered_images
correct_t = 0
total_t = 0

for dirname, _, filenames in os.walk(tampered_images):
    filenames = filenames[:number_of_images]
    for filename in filenames:
        filenameInLowerCase = filename.lower()
        if filenameInLowerCase.endswith('jpg') or filenameInLowerCase.endswith('tif'):
            fake_image_path = os.path.join(tampered_images, filename)
            image = prepare_image(fake_image_path)
            image = image.reshape(-1, 128, 128, 3)
            y_pred = model.predict(image)
            y_pred_class = np.argmax(y_pred, axis=1)[0]
            total_t += 1
            if y_pred_class == 0:
                correct_t += 1
                print(f'Class: {class_names[y_pred_class]} Confidence: {np.amax(y_pred) * 100:0.2f}')
            if total_t % 50 == 0:
                print(f'************************ Predicted {total_t} tampered images ***************************')

In [None]:
print("Accuracy of detecting tampered images: ")
print(f'Total: {total_t}, Correct: {correct_t}, Acc: {correct_t / total_t * 100.0}')

## Check authentic images predictions 

In [None]:
# Check accuracy for authentic_images
correct_r = 0
total_r = 0
            
for dirname, _, filenames in os.walk(authentic_images):
    filenames = filenames[:number_of_images]
    for filename in filenames:
        filenameInLowerCase = filename.lower()
        if filenameInLowerCase.endswith('jpg') or filenameInLowerCase.endswith('bmp'):
            real_image_path = os.path.join(authentic_images, filename)
            image = prepare_image(real_image_path)
            image = image.reshape(-1, 128, 128, 3)
            y_pred = model.predict(image)
            y_pred_class = np.argmax(y_pred, axis=1)[0]
            total_r += 1
            if y_pred_class == 1:
                correct_r += 1
                print(f'Class: {class_names[y_pred_class]} Confidence: {np.amax(y_pred) * 100:0.2f}')
            if total_r % 50 == 0:
                print(f'*********************** Predicted {total_r} authentic images ***************************')

In [None]:
print("Accuracy of detecting authentic images: ")
print(f'Total Images: {total_r}, \nCorrect Detection: {correct_r}, \nAcc: {correct_r / total_r * 100.0}')

In [None]:
correct = correct_t + correct_r
total = total_t + total_r

In [None]:
print("Total accuracy of tampering detection: ")
print(f'Total: {total}, Correct: {correct}, Acc: {correct / total * 100.0}')