In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd

## core model

In [None]:
def CNN_model(input_features, input_shape = (512, 512, 3)):
    model = keras.Sequential([
        keras.layers.Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same', input_shape=input_shape),
        keras.layers.Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.ReLU(),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),

        keras.layers.Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.ReLU(),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),

        keras.layers.Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.ReLU(),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),

        keras.layers.Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.ReLU(),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),

        keras.layers.Flatten(),

        keras.layers.Dense(1024),
        keras.layers.ReLU(),
        keras.layers.Dense(512),
        keras.layers.ReLU(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(input_features, activation='softmax')
    ])
    return model

In [2]:
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Data paths
TRAIN_IMAGES_PATH = r"d:\college docs\3rd_year_sem_5\deep_learning\PyTorch_CNN_Model\archive\images\train"
VAL_IMAGES_PATH = r"d:\college docs\3rd_year_sem_5\deep_learning\PyTorch_CNN_Model\archive\images\val"
TRAIN_LABELS_PATH = r"d:\college docs\3rd_year_sem_5\deep_learning\PyTorch_CNN_Model\archive\labels\train"
VAL_LABELS_PATH = r"d:\college docs\3rd_year_sem_5\deep_learning\PyTorch_CNN_Model\archive\labels\val"

# Model parameters
IMG_SIZE = 224  # Using smaller size for faster training
BATCH_SIZE = 32
NUM_CLASSES = 2  # person vs no-person (binary classification)

print(f"Training images path: {TRAIN_IMAGES_PATH}")
print(f"Training labels path: {TRAIN_LABELS_PATH}")
print(f"Image size: {IMG_SIZE}x{IMG_SIZE}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Number of classes: {NUM_CLASSES}")

Training images path: d:\college docs\3rd_year_sem_5\deep_learning\PyTorch_CNN_Model\archive\images\train
Training labels path: d:\college docs\3rd_year_sem_5\deep_learning\PyTorch_CNN_Model\archive\labels\train
Image size: 224x224
Batch size: 32
Number of classes: 2


In [4]:
def load_yolo_data(images_path, labels_path, img_size=224):
    """
    Load and preprocess YOLO format data for binary classification (person vs no-person)
    """
    images = []
    labels = []
    
    # Get all image files
    image_files = [f for f in os.listdir(images_path) if f.endswith('.jpg')]
    
    print(f"Found {len(image_files)} images")
    
    for i, image_file in enumerate(image_files):
        if i % 1000 == 0:
            print(f"Processing image {i+1}/{len(image_files)}")
            
        # Load image
        image_path = os.path.join(images_path, image_file)
        image = cv2.imread(image_path)
        if image is None:
            continue
            
        # Resize image
        image = cv2.resize(image, (img_size, img_size))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = image.astype(np.float32) / 255.0  # Normalize to [0,1]
        
        # Load corresponding label
        label_file = image_file.replace('.jpg', '.txt')
        label_path = os.path.join(labels_path, label_file)
        
        # Check if person is present (class 0 in YOLO format)
        has_person = 0
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                lines = f.readlines()
                for line in lines:
                    class_id = int(line.strip().split()[0])
                    if class_id == 0:  # Person class
                        has_person = 1
                        break
        
        images.append(image)
        labels.append(has_person)
    
    return np.array(images), np.array(labels)

print("Data loading function defined successfully!")

Data loading function defined successfully!


In [5]:
# Load training data
print("Loading training data...")
X_train, y_train = load_yolo_data(TRAIN_IMAGES_PATH, TRAIN_LABELS_PATH, IMG_SIZE)

print(f"\nTraining data loaded:")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"Positive samples (with person): {np.sum(y_train)}")
print(f"Negative samples (no person): {len(y_train) - np.sum(y_train)}")

# Check if validation data exists
if os.path.exists(VAL_IMAGES_PATH) and os.path.exists(VAL_LABELS_PATH):
    print("\nLoading validation data...")
    X_val, y_val = load_yolo_data(VAL_IMAGES_PATH, VAL_LABELS_PATH, IMG_SIZE)
    print(f"X_val shape: {X_val.shape}")
    print(f"y_val shape: {y_val.shape}")
    print(f"Validation positive samples: {np.sum(y_val)}")
    print(f"Validation negative samples: {len(y_val) - np.sum(y_val)}")
else:
    # Split training data if no validation set exists
    print("\nNo validation set found. Splitting training data...")
    X_train, X_val, y_train, y_val = train_test_split(
        X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
    )
    print(f"After split:")
    print(f"X_train shape: {X_train.shape}")
    print(f"X_val shape: {X_val.shape}")
    print(f"Training positive samples: {np.sum(y_train)}")
    print(f"Validation positive samples: {np.sum(y_val)}")

Loading training data...
Found 13754 images
Processing image 1/13754
Processing image 1001/13754
Processing image 2001/13754
Processing image 3001/13754
Processing image 4001/13754
Processing image 5001/13754
Processing image 6001/13754
Processing image 7001/13754
Processing image 8001/13754
Processing image 9001/13754
Processing image 10001/13754
Processing image 11001/13754
Processing image 12001/13754
Processing image 13001/13754

Training data loaded:
X_train shape: (13754, 224, 224, 3)
y_train shape: (13754,)
Positive samples (with person): 13754
Negative samples (no person): 0

Loading validation data...
Found 4000 images
Processing image 1/4000
Processing image 1001/4000
Processing image 2001/4000
Processing image 3001/4000
X_val shape: (4000, 224, 224, 3)
y_val shape: (4000,)
Validation positive samples: 4000
Validation negative samples: 0


In [None]:
# Visualize some sample data
plt.figure(figsize=(15, 10))
for i in range(12):
    plt.subplot(3, 4, i+1)
    plt.imshow(X_train[i])
    label = "Person" if y_train[i] == 1 else "No Person"
    plt.title(f'{label}')
    plt.axis('off')

plt.suptitle('Sample Training Images', fontsize=16)
plt.tight_layout()
plt.show()

# Check data distribution
unique, counts = np.unique(y_train, return_counts=True)
plt.figure(figsize=(8, 6))
plt.bar(['No Person', 'Person'], counts)
plt.title('Training Data Distribution')
plt.ylabel('Number of Images')
plt.show()

print(f"Class distribution: {dict(zip(unique, counts))}")
print(f"Positive class ratio: {counts[1]/(counts[0]+counts[1]):.3f}")

In [None]:
# Update the CNN model for people detection (binary classification)
def CNN_model_updated(input_shape=(224, 224, 3), num_classes=2):
    """
    Updated CNN model optimized for people detection
    """
    model = keras.Sequential([
        # First block
        keras.layers.Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same', input_shape=input_shape),
        keras.layers.Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.ReLU(),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
        keras.layers.Dropout(0.25),

        # Second block
        keras.layers.Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.ReLU(),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
        keras.layers.Dropout(0.25),

        # Third block
        keras.layers.Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.ReLU(),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
        keras.layers.Dropout(0.25),

        # Fourth block
        keras.layers.Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.ReLU(),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'),
        keras.layers.Dropout(0.25),

        # Classifier
        keras.layers.Flatten(),
        keras.layers.Dense(512),
        keras.layers.ReLU(),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(256),
        keras.layers.ReLU(),
        keras.layers.Dropout(0.5),
        
        # Output layer for binary classification
        keras.layers.Dense(1, activation='sigmoid')  # Binary classification (person/no-person)
    ])
    return model

# Create the model
model = CNN_model_updated(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=1)
model.summary()

In [None]:
# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',  # For binary classification
    metrics=['accuracy', 'precision', 'recall']
)

print("Model compiled successfully!")
print(f"Total parameters: {model.count_params():,}")

In [None]:
# Data augmentation for better generalization
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# No augmentation for validation data
val_datagen = ImageDataGenerator()

# Prepare data generators
train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCH_SIZE)
val_generator = val_datagen.flow(X_val, y_val, batch_size=BATCH_SIZE)

# Callbacks for training
callbacks = [
    ModelCheckpoint(
        'best_people_detection_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_accuracy',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    )
]

print("Data augmentation and callbacks prepared!")

In [None]:
# Train the model
EPOCHS = 50

print("Starting training...")
print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Epochs: {EPOCHS}")
print(f"Batch size: {BATCH_SIZE}")

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks,
    verbose=1
)

print("Training completed!")

In [None]:
# Plot training history
def plot_training_history(history):
    """Plot training and validation metrics"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Accuracy
    axes[0, 0].plot(history.history['accuracy'], label='Training Accuracy')
    axes[0, 0].plot(history.history['val_accuracy'], label='Validation Accuracy')
    axes[0, 0].set_title('Model Accuracy')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Loss
    axes[0, 1].plot(history.history['loss'], label='Training Loss')
    axes[0, 1].plot(history.history['val_loss'], label='Validation Loss')
    axes[0, 1].set_title('Model Loss')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Precision
    axes[1, 0].plot(history.history['precision'], label='Training Precision')
    axes[1, 0].plot(history.history['val_precision'], label='Validation Precision')
    axes[1, 0].set_title('Model Precision')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Precision')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    # Recall
    axes[1, 1].plot(history.history['recall'], label='Training Recall')
    axes[1, 1].plot(history.history['val_recall'], label='Validation Recall')
    axes[1, 1].set_title('Model Recall')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Recall')
    axes[1, 1].legend()
    axes[1, 1].grid(True)
    
    plt.tight_layout()
    plt.show()

# Plot the training history
plot_training_history(history)

In [None]:
# Evaluate the model
val_loss, val_accuracy, val_precision, val_recall = model.evaluate(X_val, y_val, verbose=0)

print("Final Model Performance:")
print(f"Validation Accuracy: {val_accuracy:.4f}")
print(f"Validation Precision: {val_precision:.4f}")
print(f"Validation Recall: {val_recall:.4f}")
print(f"Validation Loss: {val_loss:.4f}")

# Calculate F1-score
f1_score = 2 * (val_precision * val_recall) / (val_precision + val_recall)
print(f"Validation F1-Score: {f1_score:.4f}")

# Make predictions on validation set
y_pred_proba = model.predict(X_val)
y_pred = (y_pred_proba > 0.5).astype(int).flatten()

# Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(y_val, y_pred)

plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, ['No Person', 'Person'])
plt.yticks(tick_marks, ['No Person', 'Person'])

# Add text annotations
thresh = cm.max() / 2.
for i, j in np.ndindex(cm.shape):
    plt.text(j, i, format(cm[i, j], 'd'),
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")

plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()

print("\nDetailed Classification Report:")
print(classification_report(y_val, y_pred, target_names=['No Person', 'Person']))

In [None]:
# Visualize predictions
def visualize_predictions(X, y_true, y_pred_proba, num_samples=12):
    """Visualize model predictions"""
    indices = np.random.choice(len(X), num_samples, replace=False)
    
    plt.figure(figsize=(15, 12))
    for i, idx in enumerate(indices):
        plt.subplot(3, 4, i+1)
        plt.imshow(X[idx])
        
        true_label = "Person" if y_true[idx] == 1 else "No Person"
        pred_proba = y_pred_proba[idx][0]
        pred_label = "Person" if pred_proba > 0.5 else "No Person"
        
        color = 'green' if true_label == pred_label else 'red'
        plt.title(f'True: {true_label}\nPred: {pred_label} ({pred_proba:.3f})', 
                 color=color, fontsize=10)
        plt.axis('off')
    
    plt.suptitle('Model Predictions on Validation Set', fontsize=16)
    plt.tight_layout()
    plt.show()

# Show prediction examples
visualize_predictions(X_val, y_val, y_pred_proba)

# Save the trained model
model.save('people_detection_cnn_model.h5')
print("\nModel saved as 'people_detection_cnn_model.h5'")

# Save model architecture as JSON (optional)
model_json = model.to_json()
with open("people_detection_model_architecture.json", "w") as json_file:
    json_file.write(model_json)

print("Model architecture saved as 'people_detection_model_architecture.json'")
print("\nTraining completed successfully!")