# AI Face Detection Training Notebook
## Real vs Fake Face Classification using CNN

This notebook demonstrates the complete training pipeline for detecting real vs AI-generated faces.

### Project Overview
- **Goal**: Build a binary classifier to detect real vs fake faces
- **Model**: Simple Convolutional Neural Network (CNN)
- **Dataset**: ~200 images (50% real, 50% fake)
- **Framework**: TensorFlow/Keras

## 1. Import Required Libraries

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import seaborn as sns

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

## 2. Configuration Parameters

In [None]:
# Configuration
IMG_SIZE = (128, 128)
BATCH_SIZE = 16
EPOCHS = 20
LEARNING_RATE = 0.001
DATASET_PATH = 'dataset'
MODEL_SAVE_PATH = 'models/face_detector_model.h5'

# Create models directory if it doesn't exist
os.makedirs('models', exist_ok=True)

print("Configuration:")
print(f"Image Size: {IMG_SIZE}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Epochs: {EPOCHS}")
print(f"Learning Rate: {LEARNING_RATE}")

## 3. Load and Explore Dataset

In [None]:
def load_dataset(dataset_path, img_size):
    """
    Load images from dataset folder
    
    Returns:
        X: numpy array of images
        y: numpy array of labels (1 for real, 0 for fake)
    """
    images = []
    labels = []
    
    # Load real faces (label = 1)
    real_path = os.path.join(dataset_path, 'real')
    if os.path.exists(real_path):
        print(f"Loading real faces from {real_path}...")
        for img_name in os.listdir(real_path):
            if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(real_path, img_name)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = cv2.resize(img, img_size)
                    images.append(img)
                    labels.append(1)  # Real = 1
        print(f"Loaded {sum(1 for l in labels if l == 1)} real faces")
    
    # Load fake faces (label = 0)
    fake_path = os.path.join(dataset_path, 'fake')
    if os.path.exists(fake_path):
        print(f"Loading fake faces from {fake_path}...")
        for img_name in os.listdir(fake_path):
            if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(fake_path, img_name)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = cv2.resize(img, img_size)
                    images.append(img)
                    labels.append(0)  # Fake = 0
        print(f"Loaded {sum(1 for l in labels if l == 0)} fake faces")
    
    # Convert to numpy arrays and normalize
    X = np.array(images, dtype='float32') / 255.0
    y = np.array(labels, dtype='float32')
    
    return X, y

# Load the dataset
print("Loading dataset...")
X, y = load_dataset(DATASET_PATH, IMG_SIZE)

print(f"\nDataset Information:")
print(f"Total images: {len(X)}")
print(f"Image shape: {X[0].shape}")
print(f"Real faces: {np.sum(y == 1)}")
print(f"Fake faces: {np.sum(y == 0)}")
print(f"Class distribution: {np.bincount(y.astype(int))}")

## 4. Visualize Sample Images

In [None]:
# Display sample images
def plot_samples(X, y, num_samples=8):
    fig, axes = plt.subplots(2, 4, figsize=(12, 6))
    axes = axes.ravel()
    
    for i in range(num_samples):
        idx = np.random.randint(0, len(X))
        axes[i].imshow(X[idx])
        label = 'REAL' if y[idx] == 1 else 'FAKE'
        color = 'green' if y[idx] == 1 else 'red'
        axes[i].set_title(f'{label}', color=color, fontweight='bold')
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()

plot_samples(X, y)

## 5. Split Dataset into Train and Validation Sets

In [None]:
# Split dataset
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Dataset Split:")
print(f"Training set: {len(X_train)} images")
print(f"  - Real: {np.sum(y_train == 1)}")
print(f"  - Fake: {np.sum(y_train == 0)}")
print(f"\nValidation set: {len(X_val)} images")
print(f"  - Real: {np.sum(y_val == 1)}")
print(f"  - Fake: {np.sum(y_val == 0)}")

## 6. Build CNN Model Architecture

In [None]:
def create_cnn_model(input_shape=(128, 128, 3)):
    """
    Create a simple CNN model for binary classification
    """
    model = models.Sequential([
        # First Convolutional Block
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Second Convolutional Block
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Third Convolutional Block
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Fourth Convolutional Block
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Flatten and Dense Layers
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        
        # Output Layer (Binary Classification)
        layers.Dense(1, activation='sigmoid')
    ])
    
    return model

# Create the model
model = create_cnn_model(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='binary_crossentropy',
    metrics=['accuracy', 
             keras.metrics.Precision(name='precision'),
             keras.metrics.Recall(name='recall')]
)

# Display model architecture
model.summary()

print(f"\nTotal Parameters: {model.count_params():,}")

## 7. Setup Training Callbacks

In [None]:
# Define callbacks
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    ),
    keras.callbacks.ModelCheckpoint(
        MODEL_SAVE_PATH,
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
]

print("Callbacks configured:")
print("- Early Stopping (patience=5)")
print("- Learning Rate Reduction (factor=0.5, patience=3)")
print("- Model Checkpoint (save best model)")

## 8. Train the Model

In [None]:
print("Starting training...\n")

# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

print("\n✅ Training completed!")

## 9. Evaluate Model Performance

In [None]:
# Evaluate on validation set
print("Evaluating model on validation set...\n")
val_loss, val_acc, val_precision, val_recall = model.evaluate(X_val, y_val, verbose=0)

print("Validation Results:")
print(f"  Loss: {val_loss:.4f}")
print(f"  Accuracy: {val_acc*100:.2f}%")
print(f"  Precision: {val_precision*100:.2f}%")
print(f"  Recall: {val_recall*100:.2f}%")
print(f"  F1-Score: {2*(val_precision*val_recall)/(val_precision+val_recall)*100:.2f}%")

## 10. Plot Training History

In [None]:
# Plot training history
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Plot accuracy
axes[0, 0].plot(history.history['accuracy'], label='Train Accuracy', linewidth=2)
axes[0, 0].plot(history.history['val_accuracy'], label='Val Accuracy', linewidth=2)
axes[0, 0].set_title('Model Accuracy', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Plot loss
axes[0, 1].plot(history.history['loss'], label='Train Loss', linewidth=2)
axes[0, 1].plot(history.history['val_loss'], label='Val Loss', linewidth=2)
axes[0, 1].set_title('Model Loss', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Plot precision
axes[1, 0].plot(history.history['precision'], label='Train Precision', linewidth=2)
axes[1, 0].plot(history.history['val_precision'], label='Val Precision', linewidth=2)
axes[1, 0].set_title('Model Precision', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Precision')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Plot recall
axes[1, 1].plot(history.history['recall'], label='Train Recall', linewidth=2)
axes[1, 1].plot(history.history['val_recall'], label='Val Recall', linewidth=2)
axes[1, 1].set_title('Model Recall', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Recall')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150, bbox_inches='tight')
print("Training history saved as 'training_history.png'")
plt.show()

## 11. Confusion Matrix and Classification Report

In [None]:
# Make predictions
y_pred_prob = model.predict(X_val)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

# Confusion Matrix
cm = confusion_matrix(y_val, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Fake', 'Real'], 
            yticklabels=['Fake', 'Real'])
plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.savefig('confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()

# Classification Report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=['Fake', 'Real']))

## 12. Test Predictions on Sample Images

In [None]:
# Display some predictions
def plot_predictions(model, X_val, y_val, num_samples=8):
    fig, axes = plt.subplots(2, 4, figsize=(14, 7))
    axes = axes.ravel()
    
    indices = np.random.choice(len(X_val), num_samples, replace=False)
    
    for i, idx in enumerate(indices):
        img = X_val[idx]
        true_label = int(y_val[idx])
        
        # Make prediction
        pred_prob = model.predict(np.expand_dims(img, axis=0), verbose=0)[0][0]
        pred_label = 1 if pred_prob > 0.5 else 0
        
        # Display image
        axes[i].imshow(img)
        
        # Create title
        true_text = 'REAL' if true_label == 1 else 'FAKE'
        pred_text = 'REAL' if pred_label == 1 else 'FAKE'
        confidence = pred_prob if pred_label == 1 else (1 - pred_prob)
        
        title = f'True: {true_text}\nPred: {pred_text} ({confidence*100:.1f}%)'
        color = 'green' if true_label == pred_label else 'red'
        
        axes[i].set_title(title, color=color, fontsize=10, fontweight='bold')
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.savefig('sample_predictions.png', dpi=150, bbox_inches='tight')
    plt.show()

plot_predictions(model, X_val, y_val)

## 13. Save the Final Model

In [None]:
# Save the model
model.save(MODEL_SAVE_PATH)
print(f"✅ Model saved to: {MODEL_SAVE_PATH}")
print(f"\nModel file size: {os.path.getsize(MODEL_SAVE_PATH) / (1024*1024):.2f} MB")

print("\n" + "="*60)
print("Training Complete! Next Steps:")
print("="*60)
print("1. The trained model has been saved")
print("2. Run the Streamlit app: streamlit run app.py")
print("3. Upload face images to test the model")
print("="*60)

## Summary

This notebook demonstrated:
1. ✅ Loading and preprocessing face images
2. ✅ Building a CNN model architecture
3. ✅ Training the model with proper callbacks
4. ✅ Evaluating model performance
5. ✅ Visualizing results and predictions
6. ✅ Saving the trained model for deployment

The model is now ready to be used in the Streamlit application!