In [20]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.keras.applications.resnet50 import ResNet50
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Input and Preprocessing of Images

In [21]:
dataset_path = "/kaggle/input/140k-real-and-fake-faces/train.csv"
df = pd.read_csv(dataset_path)
dataset_path = '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake'
image_dir = os.path.join(dataset_path, 'train')
real_dir = os.path.join(image_dir, 'real')
fake_dir = os.path.join(image_dir, 'fake')
image_paths = df['path'].apply(lambda x: os.path.join(real_dir if 'real' in x else fake_dir, x.split('/')[-1]))
labels = df['label'].tolist()

In [22]:
# Reducing the input data to 10000 images
random_indices = np.random.choice(len(image_paths), size=int(0.1 * len(image_paths)), replace=False)
image_paths = image_paths[random_indices]
labels = np.array(labels)[random_indices]

In [23]:
# Load and preprocess the images
images = []
for img_path in image_paths:
    img = Image.open(img_path)
    img = img.resize((224, 224))  # Resize the image if needed
    img_array = np.array(img)  # Convert the image to a NumPy array
    images.append(img_array)

images = np.stack(images)  # Convert the list of arrays to a single NumPy array

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [24]:
# Convert y_train and y_test to categorical (one-hot encode)
num_classes = 2 
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

# Resnet50 Model

In [25]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [26]:
# Custom Layers on top of Resnet50
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

In [27]:
model = Model(inputs=base_model.input, outputs=predictions)

In [28]:
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [29]:
# Training the model
history = model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [30]:
# Result
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

63/63 - 174s - loss: 0.4053 - accuracy: 0.8685 - 174s/epoch - 3s/step
Test Loss: 0.40532851219177246
Test Accuracy: 0.8684999942779541


# Testing on 1000 new images

In [46]:
test_dataset_path = "/kaggle/input/140k-real-and-fake-faces/test.csv"
test_df = pd.read_csv(test_dataset_path)
test_data_path = '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake'
image_dir = os.path.join(test_data_path, 'test')
test_real_dir = os.path.join(image_dir, 'real')
test_fake_dir = os.path.join(image_dir, 'fake')
test_image_paths = test_df['path'].apply(lambda x: os.path.join(test_real_dir if 'real' in x else test_fake_dir, x.split('/')[-1]))
labels = test_df['label'].tolist()

In [47]:
# Reducing input data to 1000 images
random_indices = np.random.choice(len(test_image_paths), size=int(0.01 * len(test_image_paths)), replace=False)
test_image_paths = test_image_paths[random_indices]
labels = np.array(labels)[random_indices]

In [49]:
# Load and preprocess the images
test_images = []
for img_path in test_image_paths:
    img = Image.open(img_path)
    img = img.resize((224, 224))  # Resize the image if needed
    img_array = np.array(img)  # Convert the image to a NumPy array
    test_images.append(img_array)

test_images = np.stack(test_images)  # Convert the list of arrays to a single NumPy array

x_test = test_images
y_test = labels
y_test = to_categorical(y_test, num_classes)

In [50]:
# Result
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

7/7 - 18s - loss: 0.4811 - accuracy: 0.8500 - 18s/epoch - 3s/step
Test Loss: 0.4810963571071625
Test Accuracy: 0.8500000238418579
