<a href="https://colab.research.google.com/github/hnarges91/Fake-Face-Detection/blob/main/deepfake_detection_ipyn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#setup
#reset variavle value
from IPython import get_ipython
get_ipython().magic('reset -sf')
#intellisence
%config IPCcompleter.greedy = True
import numpy as np
np.random.seed(400)  # for reproducibility

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
%cd /content/drive/My Drive/

Mounted at /content/drive
/content/drive/My Drive


In [None]:
# #download the dataset
# !unzip '/content/drive/My Drive/datasets/archive.zip'

In [None]:
import os  # Import the os module to interact with the operating system's file system

# Define the directory path for the 'real' images
directory_path = "/content/drive/My Drive/datasets/data_140k/real/"

# Get a sorted list of all files in the specified directory
Images = sorted([vfile for vfile in (os.listdir(directory_path))])

# Print the number of 'real' images found in the directory
print(len(Images))

# Define the directory path for the 'fake' images
directory_path = "/content/drive/My Drive/datasets/data_140k/fake/"

# Get a sorted list of all files in the specified directory
Images2 = sorted([vfile for vfile in os.listdir(directory_path)])

# Print the number of 'fake' images found in the directory
print(len(Images2))


2000
2000


# pretrain models selection

In [None]:
from tensorflow.keras.layers import Input, Flatten, Dense  # Import layers from Keras
from tensorflow.keras.models import Model  # Import Model class from Keras
from tensorflow.keras import applications  # Import applications module from Keras

import tensorflow as tf  # Import TensorFlow library

# Import VGG16 model and preprocessing functions from Keras
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input

# Import VGG19 model and preprocessing functions from Keras
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input

# Import ResNet50 model and preprocessing functions from Keras
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

def choose_model(model_name):
    """
    Choose and configure the specified pre-trained model for feature extraction.

    Parameters:
        model_name (str): Name of the pre-trained model to use.

    Returns:
        tuple: A tuple containing the full model and the custom feature extraction model.
    """
    print("Selecting model...")  # Print status message

    # Define input tensor shape
    input_tensor = Input(shape=(256, 256, 3))

    # Check if the model_name is for VGG16
    if model_name in ['vgg16', 'finetunevgg16']:
        # Load the VGG16 model without the top layers, using the input tensor defined
        model = applications.VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor)
        # Create a custom model for feature extraction, ending at a specific layer
        custom_model = Model(model.inputs, model.layers[-5].output)

    # Check if the model_name is for VGG19
    elif model_name in ['vgg19', 'finetunevgg19']:
        # Load the VGG19 model without the top layers, using the input tensor defined
        model = applications.VGG19(weights='imagenet', include_top=False, input_tensor=input_tensor)
        # Create a custom model for feature extraction, ending at a specific layer
        custom_model = Model(model.inputs, model.layers[-6].output)

    # Check if the model_name is for ResNet50
    elif model_name in ['resnet50', 'finetuneresnet50']:
        # Load the ResNet50 model without the top layers, using the input tensor defined
        model = applications.ResNet50(weights='imagenet', include_top=False, input_tensor=input_tensor)
        # Create a custom model for feature extraction, ending at a specific layer
        custom_model = Model(model.inputs, model.layers[81].output)

    else:
        # Print error message if the model_name is not supported
        print(f"Error: Unsupported model '{model_name}'.")
        return None, None  # Return None if the model is not supported

    print("Model selected.")  # Print status message

    return custom_model  # Return the custom feature extraction model


# read Image

In [None]:
import cv2  # Import OpenCV for image processing
import numpy as np  # Import NumPy for numerical operations

def Image_reader(path, class_name):
    """
    Read images from the specified dataset directory.

    Parameters:
        path (str): Path to the dataset directory.
        class_name (str): Name of the class containing images.

    Returns:
        list: List of images read from the dataset.
    """
    print("Reading images...")  # Print status message

    # Initialize an empty list to store images
    images = []

    # Specify the directory path for the given class
    directory_path = os.path.join(path, class_name)

    # Check if the directory exists
    if not os.path.exists(directory_path):
        # Print error message if the directory does not exist
        print(f"Error: Directory '{directory_path}' does not exist.")
        return []  # Return an empty list

    # Iterate through files in the directory
    for filename in os.listdir(directory_path):
        # Create the full path to the file
        file_path = os.path.join(directory_path, filename)

        # Check if the path is a file (not a directory) and if it's an image file
        if os.path.isfile(file_path) and filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            # Read the image
            image = cv2.imread(file_path)

            # Check if the image was successfully read
            if image is not None:
                # Resize the image to a standard size (256x256)
                image = cv2.resize(image, (256, 256))
                # Append the image to the list
                images.append(image)

    # Print the number of images read
    print(f"Read {len(images)} images.")

    return images  # Return the list of images


# Feature extracting

In [None]:
import numpy as np  # Import NumPy for numerical operations
import os  # Import the os module to interact with the operating system's file system

def feature_extractor_image(path, model_name, class_name, feature_kind):
    """
    Extract features from images using the specified model and feature type.

    Parameters:
        path (str): Path to the images.
        model_name (str): Name of the model to use.
        class_name (str): Name of the class.
        feature_kind (str): Type of feature extraction method.

    Returns:
        numpy.ndarray: Extracted features.
    """
    # Select the appropriate model for feature extraction
    custom_model2 = choose_model(model_name)
    if custom_model2 is None:
        # Log an error if the model fails to load
        print("Error: Failed to load model.")
        return None

    # Load images from the specified dataset and class
    frames_list = Image_reader(path, class_name)
    print("Feature extracting...")

    # Convert the list of images to a NumPy array
    img_data = np.asarray(frames_list)
    del frames_list  # Release memory by deleting the original list

    if feature_kind == "correlation":
        # Initialize an empty list to store extracted features
        frame_features = []
        for img in img_data:
            # Clear the Keras backend session to free up memory
            keras.backend.clear_session()
            # Expand dimensions of the image to match the model input shape
            img = np.expand_dims(img, axis=0)
            # Preprocess the image for the model
            img = preprocess_input(img)
            # Predict features using the custom model
            features = custom_model2.predict(img)
            # Remove the batch dimension from the features
            features = np.reshape(features, np.shape(features)[1:])
            # Log the shape of the extracted features
            print(np.shape(features))
            # Compute the correlation-based features
            frame_features.append(calculate_correlation(features))
            # Log the current shape of the frame features list
            print("frame_features", np.shape(frame_features))
            # Free up memory by deleting intermediate variables
            del features, img

    # Log the final shape of the extracted features
    print(np.shape(frame_features))
    # Convert the list of frame features to a NumPy array
    frame_features = np.array(frame_features)
    return frame_features  # Return the extracted features


# correlation

In [None]:
import numpy as np  # Import NumPy for numerical operations

def calculate_correlation(feature_map):
    """
    Calculate the correlation matrix for each matrix in the feature map, removing the upper triangular part.

    Parameters:
    - feature_map (numpy.ndarray): Feature map with shape (m, n, c), where c is the number of channels.

    Returns:
    - numpy.ndarray: Lower triangular part of the correlation matrix for each channel in the feature map.
    """
    # Get the shape of the feature map
    m, n, c = feature_map.shape

    # Initialize a list to store the flattened matrices for each channel
    flattened_matrices = []

    # Iterate over each channel
    for j in range(c):
        keras.backend.clear_session()  # Clear Keras backend session to free up memory

        # Extract the matrix for channel j and flatten it
        matrix = feature_map[:, :, j].flatten()
        flattened_matrices.append(matrix)  # Append the flattened matrix to the list

    # Convert the list of flattened matrices to a NumPy array
    flattened_matrices = np.array(flattened_matrices)

    # Transpose the matrix to prepare for dot product
    transposed_matrix = flattened_matrices.T

    # Calculate the dot product of the matrix with its transpose
    correlation_matrix = np.dot(flattened_matrices, transposed_matrix)
    print("Correlation matrix shape:", np.shape(correlation_matrix))

    # Delete intermediate variables to free up memory
    del matrix, transposed_matrix, flattened_matrices

    # Get the indices for the lower triangular part of the matrix
    lower_indices = np.tril_indices(c)

    # Extract the lower triangular part of the correlation matrix
    lower_triangular = correlation_matrix[lower_indices]
    # print("Lower triangular shape:", np.shape(lower_triangular))

    # Delete the correlation matrix to free up memory
    del correlation_matrix

    return lower_triangular  # Return the lower triangular part of the correlation matrix


# Dataset's Address

In [None]:
def dataset_address(dataset_name):
  if dataset_name=='140k':
    return "/content/drive/My Drive/datasets/data_140k"

In [None]:
def featurs_address():
    return '/content/drive/My Drive/datasets/features/ten/'

# Loading Data

In [None]:
import os  # Import the os module to interact with the operating system's file system
import numpy as np  # Import NumPy for numerical operations

def feature_loader(dataset_name, model_name, feature_kind):
    """
    Load pre-computed features from files if available, otherwise extract features using the specified model.

    Parameters:
        dataset_name (str): Name of the dataset.
        model_name (str): Name of the model used for feature extraction.
        feature_kind (str): Type of features to load or extract.

    Returns:
        tuple: Tuple containing the loaded real and fake features.
    """
    try:
        # Determine the feature address based on the provided layer
        feature_address = featurs_address()
    except Exception as e:
        # Log an error message if determining the feature address fails
        print(f"An error occurred while determining the feature address: {e}")
        return None, None

    try:
        # Initialize variables to store features
        real_feature, fake_feature = None, None

        # Load real features
        label = 0  # Label for real class
        class_name = 'real'  # Class name for real images
        print(f"Loading real features for '{dataset_name}'...")
        feature_filename = f"{dataset_name}_{class_name}_{model_name}_{feature_kind}.npy"  # Filename for real features
        feature_path = os.path.join(feature_address, feature_filename)  # Full path to the feature file

        if os.path.isfile(feature_path):
            # If the feature file exists, load the features
            print(f"File '{feature_filename}' exists. Loading...")
            real_feature = np.load(feature_path, allow_pickle=True)
        else:
            # If the feature file does not exist, extract features
            print(f"File '{feature_filename}' does not exist. Extracting features...")
            data_path = dataset_address(dataset_name)  # Get the data path for the dataset
            real_feature = feature_extractor_image( data_path, model_name, class_name, feature_kind)
            # Save the extracted features
            np.save(feature_path, real_feature)
        print(f"Real features loaded. Shape: {np.shape(real_feature)}")

        # Load fake features
        label = 1  # Label for fake class
        class_name = 'fake'  # Class name for fake images
        print(f"Loading fake features for '{dataset_name}'...")
        feature_filename = f"{dataset_name}_{class_name}_{model_name}_{feature_kind}.npy"  # Filename for fake features
        feature_path = os.path.join(feature_address, feature_filename)  # Full path to the feature file

        if os.path.isfile(feature_path):
            # If the feature file exists, load the features
            print(f"File '{feature_filename}' exists. Loading...")
            fake_feature = np.load(feature_path, allow_pickle=True)
        else:
            # If the feature file does not exist, extract features
            print(f"File '{feature_filename}' does not exist. Extracting features...")
            data_path = dataset_address(dataset_name)  # Get the data path for the dataset
            fake_feature = feature_extractor_image( data_path, model_name, class_name, feature_kind)
            # Save the extracted features
            np.save(feature_path, fake_feature)
        print(f"Fake features loaded. Shape: {np.shape(fake_feature)}")

        return real_feature, fake_feature  # Return the loaded real and fake features

    except Exception as e:
        # Log an error message if any exception occurs
        print(f"An error occurred during feature loading: {e}")
        return None, None


# Labeling

In [None]:
import numpy as np  # Import NumPy for numerical operations
import tensorflow.keras.backend as K  # Import Keras backend for session management

def labeling(real_test, fake_test):
    """
    Label the test features and combine them into a single dataset.

    Parameters:
        real_test (numpy.ndarray): Array of features for real images.
        fake_test (numpy.ndarray): Array of features for fake images.

    Returns:
        tuple: Tuple containing the labeled test features (X_test) and their corresponding labels (y_test).
    """
    try:
        # Clear the Keras backend session to free up memory
        K.clear_session()
        print("********** Labeling **********")

        # Label real images with 0
        real_label_test = np.zeros((real_test.shape[0], 1))
        print("Real labels:", real_label_test.shape)

        # Label fake images with 1
        fake_label_test = np.ones((fake_test.shape[0], 1))
        print("Fake labels:", fake_label_test.shape)

        # Combine real and fake features
        features_combined = np.concatenate((real_test, fake_test), axis=0)
        # Combine real and fake labels
        labels_combined = np.concatenate((real_label_test, fake_label_test))

        # Shuffle the combined dataset
        indices = np.arange(features_combined.shape[0])  # Generate indices for the dataset
        np.random.shuffle(indices)  # Shuffle the indices
        X_test = features_combined[indices]  # Shuffle the features
        y_test = labels_combined[indices]  # Shuffle the labels



        # Delete intermediate variables to free up memory
        del real_test, fake_test, features_combined, labels_combined

        # Clear the Keras backend session to free up memory again
        K.clear_session()

        # Print the shapes of the combined features and labels
        print("Combined features shape:", X_test.shape)
        print("Labels shape:", y_test.shape)

        return X_test, y_test  # Return the combined and shuffled features and labels

    except Exception as e:
        # Print the error message if an exception occurs
        print(f"An error occurred: {e}")
        return None, None


# Classificatoin

In [None]:
import tensorflow.keras as keras  # Import Keras from TensorFlow
from tensorflow.keras.models import Sequential  # Import Sequential model type from Keras
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization  # Import layers from Keras
from tensorflow.keras.utils import to_categorical  # Import utility for one-hot encoding

def neural_network_classification(X_train, y_train, input_shape):
    """
    Train a neural network classifier.

    Parameters:
        X_train (numpy.ndarray): Input features for training.
        y_train (numpy.ndarray): Target labels for training.
        input_shape (tuple): Shape of the input data.

    Returns:
        tuple: Tuple containing the training history and the trained model.
    """
    try:
        # Clear any previous Keras session to free up memory
        keras.backend.clear_session()

        # Convert labels to one-hot encoded format
        y_train = to_categorical(y_train)

        # Define the neural network architecture
        model = Sequential([
            Dense(1024, activation='relu', input_shape=input_shape),  # First dense layer with ReLU activation
            BatchNormalization(),  # Batch normalization layer
            Dense(1024, activation='relu'),  # Second dense layer with ReLU activation
            BatchNormalization(),  # Batch normalization layer
            Dense(1024, activation='relu'),  # Third dense layer with ReLU activation
            Dense(1024, activation='relu'),  # Fourth dense layer with ReLU activation
            Dropout(0.2),  # Dropout layer with 20% dropout rate
            Dense(1024, activation='relu'),  # Fifth dense layer with ReLU activation
            Dropout(0.2),  # Dropout layer with 20% dropout rate
            Dense(1024, activation='relu'),  # Sixth dense layer with ReLU activation
            Dropout(0.3),  # Dropout layer with 30% dropout rate
            Dense(2, activation='softmax')  # Output layer with softmax activation for binary classification
        ])

        # Print the model summary to visualize the architecture
        model.summary()

        # Define callback for early stopping to prevent overfitting
        callback = keras.callbacks.EarlyStopping(
            monitor="val_accuracy",  # Monitor validation accuracy
            patience=5,  # Number of epochs with no improvement after which training will be stopped
            mode="max",  # Mode for monitoring the accuracy
            restore_best_weights=True  # Restore model weights from the epoch with the best value of the monitored quantity
        )

        # Compile the model with Adam optimizer and binary cross-entropy loss
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=6e-5),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        # Train the model with the training data
        history = model.fit(X_train, y_train, epochs=200, verbose=0,  # Number of epochs and verbosity level
                            callbacks=[callback], validation_split=0.2, shuffle=True)  # Validation split and data shuffling

        return history, model  # Return the training history and the trained model

    except Exception as e:
        # Print the error message if an exception occurs
        print(f"An error occurred: {e}")
        return None, None


# Generating results

In [None]:
import numpy as np  # Import NumPy for numerical operations
import matplotlib.pyplot as plt  # Import Matplotlib for plotting
from sklearn.metrics import classification_report, confusion_matrix  # Import metrics for model evaluation

def result(x, y, history, model):
    """
    Generate and print the classification report, confusion matrix, and plot accuracy and loss.

    Parameters:
        x (numpy.ndarray): Input features.
        y (numpy.ndarray): Target labels.
        history (keras.History): Training history of the model.
        model (keras.Model): Trained model.

    Returns:
        None
    """
    try:
        print("+++ Generating result... +++")

        # Predict on test data
        pred = model.predict(x)

        # Evaluate model performance on test data
        score = model.evaluate(x, y, verbose=0)
        print("------------- Neural Network Classification ----------------")
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

        # Convert predictions and ground truth labels to binary classification
        prediction = np.argmax(pred, axis=1)  # Get the index of the maximum value in each prediction
        real = np.argmax(y, axis=1)  # Get the index of the maximum value in each true label

        # Print classification report
        print("-----------------------------" * 3)
        print("--------------------- Classification Report -----------------------")
        print("-----------------------------" * 3)
        print(classification_report(real, prediction))

        # Print confusion matrix
        print("-----------------------------" * 3)
        print("--------------------- Confusion Matrix ----------------")
        print("-----------------------------" * 3)
        conf_mat = confusion_matrix(real, prediction)
        print(conf_mat)
        print("-----------------------------" * 3)

        print("--------- Plot Accuracy and Loss ------", "\n")

        # Plot accuracy and loss
        plt.figure(figsize=(12, 4))

        # Plot model accuracy
        plt.subplot(1, 2, 1)
        plt.plot(history.history['accuracy'], label='Train')
        plt.plot(history.history['val_accuracy'], label='Validation')
        plt.title('Model Accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(loc='lower right')

        # Plot model loss
        plt.subplot(1, 2, 2)
        plt.plot(history.history['loss'], label='Train')
        plt.plot(history.history['val_loss'], label='Validation')
        plt.title('Model Loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(loc='upper right')

        plt.show()  # Display the plots

        return score[1], score[0]  # Return the test accuracy and loss

    except Exception as e:
        # Print the error message if an exception occurs
        print(f"An error occurred: {e}")


# Deepfack Detection

In [None]:
import numpy as np
import time
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
def Face_Fake_Detection(dataset_name,feature_kind,model_name):
    # Lists to store loss and accuracy for each iteration
    losses = []
    accuracies = []

    for i in range(15):
        """
        Perform deepfake detection using the specified model and dataset features.

        Parameters:
            model_name (str): Name of the model to be used.
            dataset_name (str): Name of the dataset.
            feature_kind (str): Type of features to be used.

        Returns:
            tuple: Tuple containing the accuracy and loss scores of the model.
        """
        try:
            # Clear previous Keras session to free up memory
            keras.backend.clear_session()

            # Load features from the dataset
            real_feature, fake_feature = feature_loader(dataset_name, model_name, feature_kind)
            if real_feature is None or fake_feature is None:
                print("There is something wrong during loading features....")
                break

            # Label the features as real (0) and fake (1)
            print("Labeling features...")
            data, label = labeling(real_feature, fake_feature)
            del real_feature, fake_feature

            # Split the labeled data into training and testing sets
            print("Splitting data into train and test sets...")
            X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.2, random_state=42, shuffle=True)
            del data, label

            # Define the input shape for the neural network
            input_shape = X_train.shape[1:]

            # Train the neural network classifier and measure training time
            start_time = time.process_time()
            history, model = neural_network_classification(X_train, y_train, input_shape)
            print("Training time:", time.process_time() - start_time)

            # Evaluate the model's performance on the test set
            y_test = to_categorical(y_test)
            accuracy, loss = result(X_test, y_test, history, model)

            # Clean up memory
            del X_train, y_train, X_test, y_test

            # Append the accuracy and loss to the respective lists
            accuracies.append(accuracy)
            losses.append(loss)

            # Clear the session to free up memory
            keras.backend.clear_session()

        except Exception as e:
            print(f"An error occurred: {e}")
    return  accuracies,losses

# ***variabels:***

In [None]:
#variabels:
dataset_name = '140k'
feature_kind="correlation" #correlation

In [None]:
# List of valid options for the model_name
valid_options_model_name = ["vgg16", "vgg19", "resnet50"]

# Prompting the user for the first input and validating it
while True:
    model_name = input("Enter one of the following options for the model_name: vgg16, vgg19, resnet50:").strip().lower()
    if model_name in valid_options_model_name:
        print("Valid input for the model_name detected:", model_name)
        break  # Break out of the loop if input is valid
    else:
        print("Invalid input. Please enter one of the specified options.")



Enter one of the following options for the model_name: vgg16, vgg19, resnet50:vgg16
Valid input for the model_name detected: vgg16


In [None]:
accuracy, loss=Face_Fake_Detection(dataset_name,feature_kind,model_name)
print("-------------------"*5)
print("+++ mean ... +++")
acc_mean= np.mean(accuracy)
loss_mean=np.mean(loss)
print(acc_mean)
print(loss_mean)

print("-------------------"*5)
print("+++ median ... +++")
median_acc = np.median(accuracy)
median_loss = np.median(loss)
print(median_acc)
print(median_loss)