In [1]:
import os
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import regularizers
from sklearn.metrics import confusion_matrix, roc_auc_score, precision_recall_curve, auc, f1_score, roc_curve
from PIL import Image, ImageFile
import albumentations as A
from albumentations.pytorch import ToTensorV2
import seaborn as sns
import warnings

In [2]:
# Ignore warnings and configure PIL
warnings.filterwarnings("ignore")
ImageFile.LOAD_TRUNCATED_IMAGES = True
Image.MAX_IMAGE_PIXELS = None

In [3]:
# Define directories
train_fake_dir = r'C:\Desktop\Major\train_data\fake'
train_real_dir = r'C:\Desktop\Major\train_data\real'
test_fake_dir = r'C:\Desktop\Major\test_data\fake'
test_real_dir = r'C:\Desktop\Major\test_data\real'

In [4]:
# Data augmentation
train_transform = A.Compose([
    A.Resize(224, 224),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.CoarseDropout(max_holes=8, max_height=20, max_width=20, p=0.4),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

In [5]:
test_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

In [6]:
def preprocess_and_augment(image_path, transform):
    image = Image.open(image_path)
    if image.mode != 'RGB':
        image = image.convert('RGB')
    augmented = transform(image=np.array(image))['image']
    return augmented.permute(1, 2, 0).numpy()

In [7]:
def sequential_train_generator(fake_dir, real_dir, batch_size, transform):
    valid_extensions = ('.png', '.jpg', '.jpeg')
    fake_files = [os.path.join(fake_dir, f) for f in os.listdir(fake_dir) if f.lower().endswith(valid_extensions)]
    real_files = [os.path.join(real_dir, f) for f in os.listdir(real_dir) if f.lower().endswith(valid_extensions)]
    batch_half = batch_size // 2
    
    while True:
        np.random.shuffle(fake_files)
        np.random.shuffle(real_files)
        for i in range(100):  # Fixed to 100 steps per epoch
            batch_fake = fake_files[i*batch_half % len(fake_files):(i+1)*batch_half % len(fake_files)]
            batch_real = real_files[i*batch_half % len(real_files):(i+1)*batch_half % len(real_files)]
            batch_files = batch_fake + batch_real
            batch_labels = [0]*len(batch_fake) + [1]*len(batch_real)
            
            batch_images = np.stack([preprocess_and_augment(f, transform) for f in batch_files], axis=0)
            yield batch_images, np.array(batch_labels, dtype=np.float32)

In [8]:
def deterministic_data_generator(fake_dir, real_dir, batch_size, transform):
    valid_extensions = ('.png', '.jpg', '.jpeg')
    fake_files = sorted([os.path.join(fake_dir, f) for f in os.listdir(fake_dir) if f.lower().endswith(valid_extensions)])
    real_files = sorted([os.path.join(real_dir, f) for f in os.listdir(real_dir) if f.lower().endswith(valid_extensions)])
    file_list = fake_files + real_files
    labels_list = [0]*len(fake_files) + [1]*len(real_files)
    
    for i in range(0, len(file_list), batch_size):
        batch_files = file_list[i:i+batch_size]
        batch_labels = labels_list[i:i+batch_size]
        batch_images = np.stack([preprocess_and_augment(f, transform) for f in batch_files], axis=0)
        yield batch_images, np.array(batch_labels, dtype=np.float32)

In [9]:
# Hyperparameters
batch_size = 100
epochs = 30
steps_per_epoch_train = 100
steps_test = math.ceil(
    (len([f for f in os.listdir(test_fake_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]) + 
     len([f for f in os.listdir(test_real_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])) 
    / batch_size
)

In [10]:
# Model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = True
for layer in base_model.layers[:120]:
    layer.trainable = False

In [11]:
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.02)),
    BatchNormalization(),
    Dropout(0.7),
    Dense(1, activation='sigmoid')
])

In [12]:
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=1e-4,
    decay_steps=steps_per_epoch_train * 20,
    alpha=0.1
)

In [13]:
model.compile(optimizer=Adam(learning_rate=lr_schedule),
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

In [14]:
# Callbacks (removed EarlyStopping)
callbacks = [
    ModelCheckpoint('best_model.keras', monitor='val_auc', mode='max', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3)
]

In [15]:
# Training
train_generator = sequential_train_generator(train_fake_dir, train_real_dir, batch_size, train_transform)
test_generator = deterministic_data_generator(test_fake_dir, test_real_dir, batch_size, test_transform)

In [None]:
history = model.fit(
    train_generator,
    epochs=epochs,
    steps_per_epoch=steps_per_epoch_train,
    validation_data=test_generator,
    validation_steps=steps_test,
    callbacks=callbacks
)

In [None]:
# Save and evaluate
model.save('final_model.h5')

In [None]:
all_preds = []
all_true = []
for batch_images, batch_labels in test_generator:
    preds = model.predict(batch_images).flatten()
    all_preds.extend(preds)
    all_true.extend(batch_labels)

In [None]:
y_pred = (np.array(all_preds) > 0.5).astype('int32')
print(f"ROC AUC: {roc_auc_score(all_true, all_preds):.4f}")
print(f"F1 Score: {f1_score(all_true, y_pred):.4f}")

In [None]:
# Plotting (same as your original)
conf_matrix = confusion_matrix(all_true, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="Blues", 
            xticklabels=['Fake', 'Real'], yticklabels=['Fake', 'Real'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.savefig('confusion_matrix.png')
plt.show()

In [None]:
fpr, tpr, _ = roc_curve(all_true, all_preds)
plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc_score(all_true, all_preds):.4f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
plt.savefig('roc_curve.png')
plt.show()

In [None]:
roc_auc = roc_auc_score(all_true, all_preds)
precision, recall, _ = precision_recall_curve(all_true, all_preds)
pr_auc = auc(recall, precision)
f1 = f1_score(all_true, y_pred)
print(f"ROC AUC Score: {roc_auc:.4f}")
print(f"Precision-Recall AUC: {pr_auc:.4f}")
print(f"F1 Score: {f1:.4f}")

In [None]:
# Plot and save the confusion matrix.
conf_matrix = confusion_matrix(all_true, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="Blues", xticklabels=['Fake', 'Real'], yticklabels=['Fake', 'Real'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.savefig('confusion_matrix.png')
plt.show()

In [None]:
# Plot and save the ROC curve.
fpr, tpr, _ = roc_curve(all_true, all_preds)
plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.savefig('roc_curve.png')
plt.show()

In [None]:
# Plot training and validation accuracy over epochs.
plt.figure(figsize=(8, 6))
epochs_range = range(len(history.history['accuracy']))
plt.plot(epochs_range, history.history['accuracy'], label='Training Accuracy')
plt.plot(epochs_range, history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('training_validation_accuracy.png')
plt.show()

In [None]:
# Plot loss improvement (initial loss minus current loss) over epochs.
initial_train_loss = history.history['loss'][0]
initial_val_loss = history.history['val_loss'][0]
loss_improvement_train = initial_train_loss - np.array(history.history['loss'])
loss_improvement_val = initial_val_loss - np.array(history.history['val_loss'])
plt.figure(figsize=(8, 6))
plt.plot(epochs_range, loss_improvement_train, label='Training Loss Improvement', color='blue')
plt.plot(epochs_range, loss_improvement_val, label='Validation Loss Improvement', color='orange')
plt.title('Training and Validation Loss Improvement')
plt.xlabel('Epochs')
plt.ylabel('Loss Improvement (Initial Loss - Current Loss)')
plt.legend()
plt.grid(True)
plt.savefig('loss_improvement_curve.png')
plt.show()