In [1]:
import numpy as np 
import pandas as pd
import cv2
import os
from tqdm import tqdm
import seaborn as sns

In [None]:
import tensorflow as tf

# Define the main folder path
main_folder_path = 'dataset/Train'

# Use image_dataset_from_directory to load images efficiently
batch_size = 32  # Adjust batch size as per your system's memory capacity
img_height, img_width = 224, 224  # Set the desired image size

# Load the dataset
dataset = tf.keras.utils.image_dataset_from_directory(
    main_folder_path,
    labels='inferred',  # Automatically infer labels based on subfolder names
    label_mode='int',   # Use 'int' for integer labels (0 or 1)
    class_names=['Fake', 'Real'],  # Ensure 'Fake' maps to 0 and 'Real' maps to 1
    image_size=(img_height, img_width),
    batch_size=batch_size,
    shuffle=True
)

# Optionally, convert the dataset to numpy arrays if needed
images = []
labels = []

for img_batch, label_batch in dataset:
    images.append(img_batch.numpy())
    labels.append(label_batch.numpy())

# Convert to numpy arrays
images = np.concatenate(images, axis=0)
labels = np.concatenate(labels, axis=0)

print(f"Loaded {len(images)} images with labels.")

Found 140002 files belonging to 2 classes.


In [None]:
from sklearn.model_selection import train_test_split

X_temp, X_test, y_temp, y_test = train_test_split(images, labels, test_size=0.1, random_state=42) 
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.1, random_state=42) 

print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

190335 190335
(190335, 2)


In [None]:
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt

def create_resnet_model(input_shape=(224, 224, 3)):
    model = models.Sequential([
        ResNet50(weights='imagenet', include_top=False, input_shape=input_shape),
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
        layers.Dropout(0.3),
        layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.001))
    ])
    model.layers[0].trainable = False  # Freeze base layers
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create the ResNet model
model = create_resnet_model()

Unnamed: 0,image,label
0,dataset\Test\Fake\fake_0.jpg,Fake
1,dataset\Test\Fake\fake_1.jpg,Fake
2,dataset\Test\Fake\fake_10.jpg,Fake
3,dataset\Test\Fake\fake_100.jpg,Fake
4,dataset\Test\Fake\fake_1000.jpg,Fake


In [None]:
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping])
test_loss, test_accuracy = model.evaluate(X_test, y_test)
train_loss, train_accuracy = model.evaluate(X_train, y_train)
print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

y_pred_proba_test = model.predict(X_test).ravel()
y_pred = (y_pred_proba_test >= 0.5).astype(int)
cm_test = confusion_matrix(y_test, y_pred)
sns.heatmap(cm_test, annot=True,fmt='g', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title(f'Confusion Matrix (Test Accuracy: {test_accuracy:.4f})')
plt.show()

plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

array(['Fake', 'Real'], dtype=object)