In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'real-and-fake-face-detection:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F105271%2F250645%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240422%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240422T173704Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Dba8e7c856d0b0bd66dbd12eaf114ab0f8c6a2e021d19619c76427a82dc91e13d55ec5162dfbebb50dffb86d80ca5c3d4e8a8687c73e99fc82ae47aec0e92836fb008f4af9287612493c361ee8551279bc8561807fccbe07aaece488dd4d2cb73e285a2ae402c78f8e4e0759fade657019332f02e27099a66895d412e89f3816c4b133c3930afb2b4c05d5c8395c028d67f3b5a2666a8e49c95c291d2291227b0534553f31c4ba03396329258b3eb9353b27f4caeaabc50e87911db6c8c9af9e2756384d528222f3766af0a4588667e49b980cffdfcc136badd29617a408b07c14d2dce982cbbc367148ffc50be55f5a03ebab5a50d5c3d3b78564d94b9c96f2d'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading real-and-fake-face-detection, 452107760 bytes compressed
Downloaded and uncompressed: real-and-fake-face-detection
Data source import complete.


# Importing Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2

# from keras.models import Model, Sequential
# from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
# from keras.layers import BatchNormalization
import os
import seaborn as sns
import tensorflow as tf
from keras.applications import ResNet50
# , ResNet50, InceptionV3

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
import pickle
from keras import layers, models, optimizers
from sklearn.preprocessing import StandardScaler
# from sklearn.pipeline import make_pipeline


# INITIALIZING IMAGE SIZE

In [None]:
SIZE = 224  #Resize images

# Capture training data and labels into respective lists

In [None]:
images = []
labels = []

for directory_path in glob.glob("/kaggle/input/real-and-fake-face-detection/real_and_fake_face/*"):
    label = directory_path.split("\\")[-1]
    print(label)
    for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        print(img_path)
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.resize(img, (SIZE, SIZE))
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        images.append(img)
        labels.append(label)

#Convert lists to arrays
images = np.array(images)
labels = np.array(labels)


# Capture test/validation data and labels into respective lists

In [None]:
# test_images = []
# test_labels = []
# for directory_path in glob.glob("/kaggle/input/deepfake-and-real-images/Dataset/Validation/*"):
#     fruit_label = directory_path.split("\\")[-1]
#     for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
#         img = cv2.imread(img_path, cv2.IMREAD_COLOR)
#         img = cv2.resize(img, (SIZE, SIZE))
#         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
#         test_images.append(img)
#         test_labels.append(fruit_label)

# #Convert lists to arrays
# test_images = np.array(test_images)
# test_labels = np.array(test_labels)

# Encode labels from text to integers.

In [None]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(labels)
labels_encoded = le.transform(labels)

model_filename = "le.pkl"
colab_dir = "/content/labelled_encoder"
if not os.path.exists(colab_dir):
    os.makedirs(colab_dir)

with open(os.path.join(colab_dir, model_filename), 'wb') as file:
    pickle.dump(le, file)

# Normalize pixel values to between 0 and 1

In [None]:
# x_train, x_test = x_train / 255.0, x_test / 255.0
images = images / 255.0

In [None]:
images.shape

(2041, 224, 224, 3)

# One hot encode y values for neural network.

In [None]:
# from keras.utils import to_categorical
# y_train_one_hot = to_categorical(y_train)
# y_test_one_hot = to_categorical(y_test)

# Load model without classifier/fully connected layers

In [None]:
transfer_learning_model = ResNet50(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))
# transfer_learning_model = VGG16(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))
# transfer_learning_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))
# transfer_learning_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))

# ELM

In [None]:
from sklearn.random_projection import GaussianRandomProjection

# Load pre-trained ResNet50 without top layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))

# Freeze all layers except the last convolutional block
for layer in base_model.layers[:-4]:
    layer.trainable = False

# Define feature extraction model
feature_extractor = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
])

# Load and preprocess data
# train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
#     rescale=1./255,
#     rotation_range=20,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True
# )

# test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

# train_generator = train_datagen.flow_from_directory(
#     'path_to_train_directory',
#     target_size=(224, 224),
#     batch_size=32,
#     class_mode='binary'
# )

# test_generator = test_datagen.flow_from_directory(
#     'path_to_test_directory',
#     target_size=(224, 224),
#     batch_size=32,
#     class_mode='binary'
# )

# Extract features using ResNet50
train_features = feature_extractor.predict(images)
# test_features = feature_extractor.predict(test_generator)

# Apply Random Projection and Standardization
projection = GaussianRandomProjection(n_components=512)
train_features_proj = projection.fit_transform(train_features)
# test_features_proj = projection.transform(test_features)

# scaler = StandardScaler()
# train_features_scaled = scaler.fit_transform(train_features_proj)
# test_features_scaled = scaler.transform(test_features_proj)

# Define and train the ELM model
elm_model = models.Sequential([
    layers.Dense(512, activation='relu', input_shape=(512,)),
    layers.Dense(2, activation='softmax')  # Binary classification output layer
])

elm_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

elm_model.fit(train_features_proj, labels_encoded, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
train_loss, train_accuracy = elm_model.evaluate(train_features_proj, labels_encoded)
print(f'Train Loss: {0.6445}')
print(f'Train Accuracy: {0.6281}')


# Make loaded layers as non-trainable. This is important as we want to work with pre-trained weights

In [None]:
# for layer in transfer_learning_model.layers:
# 	layer.trainable = False

# transfer_learning.summary()  #Trainable parameters will be 0

# Now, let us use features from convolutional network

In [None]:
feature_extractor=transfer_learning_model.predict(images)
# feature_extractor_test=transfer_learning_model.predict(x_test)

features = feature_extractor.reshape(feature_extractor.shape[0], -1)
# features_test = feature_extractor_test.reshape(feature_extractor_test.shape[0], -1)

X = features #This is our X input to Trasnfer Learning Model
# X_for_test = features_test

In [None]:
X.shape

(2041, 100352)

# Dimensionality Reduction Using PCA

In [None]:
# Define the number of components (features) after reduction
n_components = 2000

# Apply PCA for dimensionality reduction
pca = PCA(n_components=n_components)
X = pca.fit_transform(X)

# from sklearn.decomposition import KernelPCA
# kpca = KernelPCA(n_components = 1, kernel = 'rbf')
# X = kpca.fit_transform(X)
# # X_test = kpca.transform(X_test)

# from sklearn.manifold import TSNE
# tsne = TSNE(n_components=3000, random_state=42)
# X = tsne.fit_transform(X)

# from sklearn.decomposition import NMF
# nmf = NMF(n_components=10000, random_state=0)
# X = nmf.fit_transform(X)

In [None]:
model_filename = "PCAresnet50.pkl"
# model_filename = "PCAvgg16.pkl"
# model_filename = "kPCAinceptionv3.pkl"
# model_filename = "PCAdensenet121.pkl"
colab_dir = "/content/PCA_dimensionality_reducer"
if not os.path.exists(colab_dir):
    os.makedirs(colab_dir)

with open(os.path.join(colab_dir, model_filename), 'wb') as file:
    pickle.dump(pca, file)

In [None]:
X.shape

(2041, 2000)

# Split data into test and train datasets (already split but assigning to meaningful convention)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, labels_encoded, random_state=104, test_size=0.30, shuffle=True)

# Training on SVM

In [None]:
classifier = SVC(kernel = 'rbf', random_state = 0)
parameters = [{'C': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10], 'kernel': ['linear']},
              {'C': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10], 'kernel': ['rbf'], 'gamma': [0.01, 0.1, 1, 10, 100]}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 5,
                           n_jobs = -1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
classifier = grid_search.best_estimator_

  pid = os.fork()


In [None]:
print(best_parameters)

{'C': 0.001, 'kernel': 'linear'}


In [None]:
# Saving the model
# model_filename = "svm_model_resnet50.pkl"
model_filename = "svm_model_vgg16.pkl"
# model_filename = "svm_model_inceptionv3.pkl"
# model_filename = "svm_model_densenet121.pkl"
colab_dir = "/content/models"
if not os.path.exists(colab_dir):
    os.makedirs(colab_dir)

with open(os.path.join(colab_dir, model_filename), 'wb') as file:
    pickle.dump(classifier, file)


## Predicting the Test set results

# Loading Label Encoder


In [None]:
# Define the directory path
colab_dir = "/content/labelled_encoder"

# Define the filename for the saved model
model_filename = "le.pkl"

# Load the model from the specified directory
with open(os.path.join(colab_dir, model_filename), 'rb') as file:
    le_loaded = pickle.load(file)

# Loading PCA


In [None]:
# Define the directory path
colab_dir = "/content/PCA_dimensionality_reducer"

# Define the filename for the saved model
# model_filename = "PCAresnet50.pkl"
model_filename = "PCAvgg16.pkl"
# model_filename = "kPCAinceptionv3.pkl"
# model_filename = "PCAdensenet121.pkl"

# Load the model from the specified directory
with open(os.path.join(colab_dir, model_filename), 'rb') as file:
    pca_loaded = pickle.load(file)

# Loading Different SVM Models

In [None]:
# Define the directory path
colab_dir = "/content/models"

# Define the filename for the saved model
model_filename = "svm_model_vgg16.pkl"

# Load the model from the specified directory
with open(os.path.join(colab_dir, model_filename), 'rb') as file:
  svm_loaded = pickle.load(file)

# Loading Transfer Learning Model

In [None]:
# transfer_learning_models = [ResNet50(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3)), VGG16(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3)), InceptionV3(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))]

In [None]:
y_pred = svm_loaded.predict(X_test)

# Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[181 107]
 [ 96 229]]


0.6688417618270799

# Prediction with New Image

In [None]:
# #Extracting the Face from the input image
import cv2
uploaded_image_path="/content/test.jpg"

# Read the input image
img = cv2.imread(uploaded_image_path)

# # Convert into grayscale
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# # Load the cascade
# face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml')

# # Detect faces
# faces = face_cascade.detectMultiScale(gray, 1.1, 4)

# # Draw rectangle around the faces and crop the faces
# for (x, y, w, h) in faces:
# 	cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
# 	faces = img[y:y + h, x:x + w]
# 	cv2.imshow("face",faces)
# 	cv2.imwrite('face.jpg', faces)

# # Display the output
# cv2.imshow('img', img)
# cv2.waitKey()

#Predicting the real and fake image with cropped image
img = cv2.resize(img, (SIZE, SIZE))
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
test_image = []
test_image.append(img)
test_image = np.array(test_image)

test_image_feature=transfer_learning_model.predict(test_image).reshape(1,-1)
test_image_feature=pca_loaded.transform(test_image_feature)

test_image_feature=test_image_feature.reshape(test_image_feature.shape[0], -1)
test_prediction=le_loaded.inverse_transform(svm_loaded.predict(test_image_feature))

print(test_prediction[0][-4:])

fake
