1. Image classification using SVM with HOG features.

In [None]:
"""
Binary Image Classification using HOG Features and SVM

This program classifies images into two classes using:
1. HOG (Histogram of Oriented Gradients) feature extraction
2. SVM (Support Vector Machine) classification

The program loads images from specified directories, extracts HOG features,
trains an SVM model, and evaluates its performance with metrics like F1-score.
"""

import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score

# Image Loading and Preprocessing Functions
def load_images(folder_path, label, target_size=(64, 128)):
    """
    Load images from a folder, preprocess them, and assign labels.
    
    Args:
        folder_path (str): Path to the folder containing images
        label (int): Class label (0 or 1)
        target_size (tuple): Size to resize images to
        
    Returns:
        tuple: (images, labels) - list of preprocessed images and their labels
    """
    images = []
    labels = []
    
    # Get all image files
    image_paths = []
    for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp']:
        image_paths.extend(glob.glob(os.path.join(folder_path, ext)))
    
    if not image_paths:
        print(f"Warning: No images found in {folder_path}")
        return images, labels
    
    print(f"Processing {len(image_paths)} images from {folder_path}...")
    
    # Process each image
    for img_path in image_paths:
        try:
            # Read image
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read image {img_path}")
                continue
            
            # Convert to grayscale
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            # Resize image
            resized = cv2.resize(gray, target_size)
            
            # Add to dataset
            images.append(resized)
            labels.append(label)
            
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
    
    print(f"Processed {len(images)} images successfully")
    return images, labels

# HOG Feature Extraction Function
def extract_hog_features(images, orientations=9, pixels_per_cell=(8, 8), 
                        cells_per_block=(2, 2), block_norm='L2-Hys'):
    """
    Extract HOG features from a list of images.
    
    Args:
        images (list): List of grayscale images
        orientations (int): Number of orientation bins for HOG
        pixels_per_cell (tuple): Size of a cell in pixels
        cells_per_block (tuple): Number of cells in each block
        block_norm (str): Block normalization method
        
    Returns:
        numpy.ndarray: HOG features for all images
    """
    hog_features = []
    
    print("Extracting HOG features...")
    for img in images:
        features = hog(
            img,
            orientations=orientations,
            pixels_per_cell=pixels_per_cell,
            cells_per_block=cells_per_block,
            block_norm=block_norm,
            visualize=False
        )
        hog_features.append(features)
    
    print(f"Extracted HOG features from {len(hog_features)} images")
    return np.array(hog_features)

# Visualize HOG features for a sample image
def visualize_hog(image, orientations=9, pixels_per_cell=(8, 8), 
                 cells_per_block=(2, 2), block_norm='L2-Hys'):
    """
    Visualize HOG features for a single image.
    
    Args:
        image (numpy.ndarray): Grayscale image
        orientations (int): Number of orientation bins for HOG
        pixels_per_cell (tuple): Size of a cell in pixels
        cells_per_block (tuple): Number of cells in each block
        block_norm (str): Block normalization method
    """
    features, hog_image = hog(
        image,
        orientations=orientations,
        pixels_per_cell=pixels_per_cell,
        cells_per_block=cells_per_block,
        block_norm=block_norm,
        visualize=True
    )
    
    # Display original and HOG image
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
    ax1.imshow(image, cmap='gray')
    ax1.set_title('Original Image (Grayscale)')
    ax1.axis('off')
    
    ax2.imshow(hog_image, cmap='gray')
    ax2.set_title('HOG Visualization')
    ax2.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    print(f"HOG feature vector shape: {features.shape}")
    return features

# SVM Training Function
def train_svm(X_train, y_train, kernel='rbf', C=1.0, gamma='scale', probability=True):
    """
    Train an SVM model.
    
    Args:
        X_train (numpy.ndarray): Training features
        y_train (numpy.ndarray): Training labels
        kernel (str): Kernel type ('linear', 'rbf', 'poly', etc.)
        C (float): Regularization parameter
        gamma (str/float): Kernel coefficient for 'rbf', 'poly', 'sigmoid'
        probability (bool): Enable probability estimates
        
    Returns:
        SVC: Trained SVM model
    """
    print(f"Training SVM with kernel={kernel}, C={C}, gamma={gamma}...")
    
    # Create and train SVM model
    model = SVC(
        kernel=kernel,
        C=C,
        gamma=gamma,
        probability=probability,
        random_state=42
    )
    
    model.fit(X_train, y_train)
    print("SVM model trained successfully")
    return model

# Model Evaluation Function
def evaluate_model(model, X_test, y_test):
    """
    Evaluate the model and display performance metrics.
    
    Args:
        model: Trained classification model
        X_test (numpy.ndarray): Test features
        y_test (numpy.ndarray): Test labels
        
    Returns:
        dict: Dictionary of evaluation metrics
    """
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Print evaluation results
    print("\nModel Evaluation Results:")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    
    # Print confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:")
    print(cm)
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Visualize confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
               xticklabels=['Class 0', 'Class 1'],
               yticklabels=['Class 0', 'Class 1'])
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.title('Confusion Matrix')
    plt.show()
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': cm
    }

# Main function
def main():
    """
    Main function to perform the image classification process.
    """
    print("Binary Image Classification using HOG Features and SVM")
    print("=" * 60)
    
    # Directory paths
    print("\nLoading image directories:")
    train_path_class1 = "./resources/classification/cats"
    train_path_class2 = "./resources/classification/dogs"
    test_path_class1 = "./resources/classification/cat_test"
    test_path_class2 = "./resources/classification/dog_test"
    
    # Validate paths
    for path in [train_path_class1, train_path_class2, test_path_class1, test_path_class2]:
        if not os.path.exists(path):
            print(f"Error: Path {path} does not exist!")
            return
    
    # Set HOG parameters
    target_size = (64, 128)
    hog_params = {
        'orientations': 9,
        'pixels_per_cell': (8, 8),
        'cells_per_block': (2, 2),
        'block_norm': 'L2-Hys'
    }
    
    # Load and preprocess images
    print("\nStep 1: Loading and preprocessing images...")
    train_images_class1, train_labels_class1 = load_images(train_path_class1, 0, target_size)
    train_images_class2, train_labels_class2 = load_images(train_path_class2, 1, target_size)
    test_images_class1, test_labels_class1 = load_images(test_path_class1, 0, target_size)
    test_images_class2, test_labels_class2 = load_images(test_path_class2, 1, target_size)
    
    # Combine datasets
    train_images = train_images_class1 + train_images_class2
    train_labels = train_labels_class1 + train_labels_class2
    test_images = test_images_class1 + test_images_class2
    test_labels = test_labels_class1 + test_labels_class2
    
    # Visualize HOG features for a sample image (optional)
    if train_images:
        print("\nStep 2: Visualizing HOG features for a sample image...")
        visualize_hog(train_images[0], **hog_params)
    
    # Extract HOG features
    print("\nStep 3: Extracting HOG features...")
    X_train = extract_hog_features(train_images, **hog_params)
    X_test = extract_hog_features(test_images, **hog_params)
    y_train = np.array(train_labels)
    y_test = np.array(test_labels)
    
    # Normalize features using StandardScaler
    print("\nStep 4: Normalizing features...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Print dataset shapes
    print(f"\nTraining data: {X_train_scaled.shape}, Labels: {y_train.shape}")
    print(f"Testing data: {X_test_scaled.shape}, Labels: {y_test.shape}")
    
    # Train SVM model
    print("\nStep 5: Training SVM model...")
    svm_model = train_svm(X_train_scaled, y_train, kernel='linear', C=1.0)
    
    # Evaluate the model
    print("\nStep 6: Evaluating the model...")
    metrics = evaluate_model(svm_model, X_test_scaled, y_test)
    
    print("\nBinary Image Classification process completed!")
    print(f"Final F1-Score: {metrics['f1']:.4f}")

if __name__ == "__main__":
    main()

2. Image classification using SVM with SIFT descriptors

In [None]:
"""
Binary Image Classification using SIFT Features and SVM

This program classifies images into two classes by:
1. Extracting SIFT descriptors from images
2. Normalizing the number of descriptors (padding/truncating)
3. Flattening descriptors into feature vectors
4. Training an SVM model for classification
5. Evaluating model performance using metrics like F1-score
"""

import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score

# Number of SIFT keypoints to use per image (fixed size)
N_KEYPOINTS = 100

# Image Loading and Preprocessing Functions
def load_images(folder_path, label):
    """
    Load images from a folder and assign labels.
    
    Args:
        folder_path (str): Path to the folder containing images
        label (int): Class label (0 or 1)
        
    Returns:
        tuple: (image_paths, labels) - list of image paths and their labels
    """
    image_paths = []
    labels = []
    
    # Get all image files
    for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp']:
        image_paths.extend(glob.glob(os.path.join(folder_path, ext)))
    
    if not image_paths:
        print(f"Warning: No images found in {folder_path}")
        return image_paths, labels
    
    print(f"Found {len(image_paths)} images in {folder_path}")
    
    # Assign labels
    labels = [label] * len(image_paths)
    
    return image_paths, labels

# SIFT Feature Extraction Functions
def extract_sift_features(image_path, n_keypoints=N_KEYPOINTS):
    """
    Extract SIFT features from a single image and normalize to a fixed number of keypoints.
    
    Args:
        image_path (str): Path to the image file
        n_keypoints (int): Number of keypoints to extract/normalize to
        
    Returns:
        numpy.ndarray: Normalized SIFT descriptors (n_keypoints x 128)
    """
    try:
        # Read the image
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}")
            # Return zeros if image cannot be read
            return np.zeros((n_keypoints, 128))
        
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Create SIFT detector
        sift = cv2.SIFT_create()
        
        # Detect keypoints and compute descriptors
        keypoints, descriptors = sift.detectAndCompute(gray, None)
        
        # Handle case where no keypoints are found
        if descriptors is None or len(descriptors) == 0:
            print(f"Warning: No SIFT keypoints found in {image_path}")
            return np.zeros((n_keypoints, 128))
        
        # Normalize number of keypoints
        if len(descriptors) < n_keypoints:
            # Pad with zeros
            padding = np.zeros((n_keypoints - len(descriptors), 128))
            normalized_descriptors = np.vstack([descriptors, padding])
        else:
            # Truncate to first n_keypoints
            normalized_descriptors = descriptors[:n_keypoints]
        
        return normalized_descriptors
    
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        # Return zeros in case of error
        return np.zeros((n_keypoints, 128))

def process_image_dataset(image_paths, labels, n_keypoints=N_KEYPOINTS):
    """
    Process a dataset of images to extract and normalize SIFT features.
    
    Args:
        image_paths (list): List of image file paths
        labels (list): List of labels corresponding to each image
        n_keypoints (int): Number of keypoints to extract/normalize to
        
    Returns:
        tuple: (features, labels) - SIFT features and corresponding labels
    """
    features = []
    processed_labels = []
    
    print(f"Extracting SIFT features from {len(image_paths)} images...")
    
    for i, (path, label) in enumerate(zip(image_paths, labels)):
        # Extract and normalize SIFT features
        descriptors = extract_sift_features(path, n_keypoints)
        
        # Flatten the descriptors to a 1D array
        flat_descriptors = descriptors.flatten()
        
        # Add to dataset
        features.append(flat_descriptors)
        processed_labels.append(label)
        
        # Print progress
        if (i+1) % 10 == 0 or i+1 == len(image_paths):
            print(f"Processed {i+1}/{len(image_paths)} images")
    
    return np.array(features), np.array(processed_labels)

# Visualize SIFT keypoints for a sample image
def visualize_sift(image_path):
    """
    Visualize SIFT keypoints for a single image.
    
    Args:
        image_path (str): Path to the image file
    """
    try:
        # Read the image
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}")
            return
        
        # Convert to grayscale for SIFT
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Create SIFT detector
        sift = cv2.SIFT_create()
        
        # Detect keypoints
        keypoints = sift.detect(gray, None)
        
        # Draw keypoints
        img_keypoints = cv2.drawKeypoints(gray, keypoints, None, 
                                         flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        
        # Display original and keypoints
        plt.figure(figsize=(10, 5))
        plt.subplot(121)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.title('Original Image')
        plt.axis('off')
        
        plt.subplot(122)
        plt.imshow(img_keypoints)
        plt.title(f'SIFT Keypoints: {len(keypoints)} detected')
        plt.axis('off')
        
        plt.tight_layout()
        plt.show()
        
        print(f"Total SIFT keypoints detected: {len(keypoints)}")
        
    except Exception as e:
        print(f"Error visualizing SIFT keypoints: {str(e)}")

# SVM Training and Evaluation Functions
def train_svm(X_train, y_train, kernel='linear', C=1.0, gamma='scale'):
    """
    Train an SVM model.
    
    Args:
        X_train (numpy.ndarray): Training features
        y_train (numpy.ndarray): Training labels
        kernel (str): Kernel type ('linear', 'rbf', 'poly', etc.)
        C (float): Regularization parameter
        gamma (str/float): Kernel coefficient for 'rbf', 'poly', 'sigmoid'
        
    Returns:
        SVC: Trained SVM model
    """
    print(f"Training SVM with kernel={kernel}, C={C}, gamma={gamma}...")
    
    # Create and train SVM model
    model = SVC(
        kernel=kernel,
        C=C,
        gamma=gamma,
        probability=True,
        random_state=42
    )
    
    model.fit(X_train, y_train)
    print("SVM model trained successfully")
    return model

def evaluate_model(model, X_test, y_test):
    """
    Evaluate the model and display performance metrics.
    
    Args:
        model: Trained classification model
        X_test (numpy.ndarray): Test features
        y_test (numpy.ndarray): Test labels
        
    Returns:
        dict: Dictionary of evaluation metrics
    """
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Print evaluation results
    print("\nModel Evaluation Results:")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    
    # Print confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:")
    print(cm)
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Visualize confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
               xticklabels=['Class 0', 'Class 1'],
               yticklabels=['Class 0', 'Class 1'])
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.title('Confusion Matrix')
    plt.show()
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': cm
    }

# Main function
def main():
    """
    Main function to perform the image classification process.
    """
    print("Binary Image Classification using SIFT Features and SVM")
    print("=" * 60)
    
    # Directory paths
    print("\nLoading image directories:")
    train_path_class1 = "./resources/classification/cats"
    train_path_class2 = "./resources/classification/dogs"
    test_path_class1 = "./resources/classification/cat_test"
    test_path_class2 = "./resources/classification/dog_test"
    
    # Validate paths
    for path in [train_path_class1, train_path_class2, test_path_class1, test_path_class2]:
        if not os.path.exists(path):
            print(f"Error: Path {path} does not exist!")
            return
    
    # Set SIFT and SVM parameters
    n_keypoints = N_KEYPOINTS
    svm_params = {
        'kernel': 'linear',
        'C': 1.0,
        'gamma': 'scale'
    }
    
    # Step 1: Load image paths and labels
    print("\nStep 1: Loading images and labels...")
    train_paths_class1, train_labels_class1 = load_images(train_path_class1, 0)
    train_paths_class2, train_labels_class2 = load_images(train_path_class2, 1)
    test_paths_class1, test_labels_class1 = load_images(test_path_class1, 0)
    test_paths_class2, test_labels_class2 = load_images(test_path_class2, 1)
    
    # Combine datasets
    train_paths = train_paths_class1 + train_paths_class2
    train_labels = train_labels_class1 + train_labels_class2
    test_paths = test_paths_class1 + test_paths_class2
    test_labels = test_labels_class1 + test_labels_class2
    
    # Optional: Visualize SIFT keypoints for a sample image
    if train_paths:
        print("\nStep 2: Visualizing SIFT keypoints for a sample image...")
        visualize_sift(train_paths[0])
    
    # Step 3: Extract SIFT features
    print("\nStep 3: Extracting SIFT features...")
    X_train, y_train = process_image_dataset(train_paths, train_labels, n_keypoints)
    X_test, y_test = process_image_dataset(test_paths, test_labels, n_keypoints)
    
    print(f"\nFeature vector shape: {X_train.shape[1]} dimensions")
    
    # Step 4: Normalize features using StandardScaler
    print("\nStep 4: Normalizing features...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Print dataset shapes
    print(f"\nTraining data: {X_train_scaled.shape}, Labels: {y_train.shape}")
    print(f"Testing data: {X_test_scaled.shape}, Labels: {y_test.shape}")
    
    # Step 5: Train SVM model
    print("\nStep 5: Training SVM model...")
    svm_model = train_svm(X_train_scaled, y_train, **svm_params)
    
    # Step 6: Evaluate the model
    print("\nStep 6: Evaluating the model...")
    metrics = evaluate_model(svm_model, X_test_scaled, y_test)
    
    print("\nBinary Image Classification process completed!")
    print(f"Final F1-Score: {metrics['f1']:.4f}")

if __name__ == "__main__":
    main()

2. Image classification using ANN with SIFT descriptors

In [None]:
"""
Binary Image Classification using SIFT, Bag of Visual Words, and ANN (MLPClassifier)

This program classifies images into two classes by:
1. Extracting SIFT descriptors from all images
2. Building a visual vocabulary using K-means clustering
3. Creating histogram representations (BoVW) for each image
4. Training an Artificial Neural Network for classification
5. Evaluating model performance using metrics like F1-score and confusion matrix
"""

import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.cluster import KMeans
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

# SIFT Feature Extraction Functions
def extract_sift_descriptors(image_path):
    """
    Extract SIFT descriptors from a single image.
    
    Args:
        image_path (str): Path to the image file
        
    Returns:
        list or None: List of SIFT descriptors or None if no keypoints found
    """
    try:
        # Read the image
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}")
            return None
        
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Create SIFT detector
        sift = cv2.SIFT_create(nfeatures = 200)
        
        # Detect keypoints and compute descriptors
        keypoints, descriptors = sift.detectAndCompute(gray, None)
        
        return descriptors
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None

def load_images_and_extract_sift(folder_path, label):
    """
    Extract SIFT descriptors from all images in a folder and create labels.
    
    Args:
        folder_path (str): Path to the folder containing images
        label (int): Class label (0 or 1)
        
    Returns:
        tuple: (all_descriptors, image_descriptors, labels)
            - all_descriptors: flattened list of all descriptors
            - image_descriptors: list of descriptors per image
            - labels: list of labels corresponding to each image
    """
    all_descriptors = []  # Will contain all descriptors from all images
    image_descriptors = []  # Will contain descriptors per image
    labels = []  # Will contain labels for each image
    
    # Get all image files in the folder
    image_paths = []
    for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp']:
        image_paths.extend(glob.glob(os.path.join(folder_path, ext)))
    
    if not image_paths:
        print(f"Warning: No images found in {folder_path}")
        return [], [], []
    
    print(f"Processing {len(image_paths)} images from {folder_path}...")
    
    # Process each image
    for img_path in image_paths:
        descriptors = extract_sift_descriptors(img_path)
        
        if descriptors is not None and descriptors.shape[0] > 0:
            all_descriptors.append(descriptors)
            image_descriptors.append(descriptors)
            labels.append(label)
    
    # Flatten all_descriptors for K-means clustering
    if all_descriptors:
        all_descriptors = np.vstack(all_descriptors)
    
    print(f"Extracted {len(all_descriptors)} SIFT descriptors from {len(image_descriptors)} images")
    return all_descriptors, image_descriptors, labels

# Visual Vocabulary Building Functions
def plot_elbow_method(all_train_descriptors, max_k=20):
    """
    Plot the Elbow Method graph to help determine optimal number of clusters.
    
    Args:
        all_train_descriptors (numpy.ndarray): All SIFT descriptors from training set
        max_k (int): Maximum number of clusters to try
    """
    # Use a sample if there are too many descriptors
    if len(all_train_descriptors) > 10000:
        print("Sampling descriptors for elbow method...")
        indices = np.random.choice(len(all_train_descriptors), 10000, replace=False)
        sample_descriptors = all_train_descriptors[indices]
    else:
        sample_descriptors = all_train_descriptors
    
    wcss = []  # Within-cluster sum of squares
    k_values = range(1, max_k + 1)
    
    print("Calculating WCSS for different K values...")
    for k in k_values:
        kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
        kmeans.fit(sample_descriptors)
        wcss.append(kmeans.inertia_)
        print(f"K={k}, WCSS={kmeans.inertia_:.2f}")
    
    # Plot the Elbow graph
    plt.figure(figsize=(10, 6))
    plt.plot(k_values, wcss, 'bo-')
    plt.xlabel('Number of Clusters (K)')
    plt.ylabel('WCSS')
    plt.title('Elbow Method For Optimal K')
    plt.grid(True)
    plt.show()

def build_visual_vocabulary(all_train_descriptors, num_clusters):
    """
    Build a visual vocabulary by clustering SIFT descriptors.
    
    Args:
        all_train_descriptors (numpy.ndarray): All SIFT descriptors from training set
        num_clusters (int): Number of clusters (size of vocabulary)
        
    Returns:
        KMeans: Trained K-means model
    """
    print(f"Building visual vocabulary with K={num_clusters}...")
    
    # Sample descriptors if there are too many
    if len(all_train_descriptors) > 100000:
        print("Sampling descriptors for K-means clustering...")
        indices = np.random.choice(len(all_train_descriptors), 100000, replace=False)
        sample_descriptors = all_train_descriptors[indices]
    else:
        sample_descriptors = all_train_descriptors
    
    # Create and train K-means model
    kmeans = KMeans(n_clusters=num_clusters, random_state=42, n_init=10)
    kmeans.fit(sample_descriptors)
    
    print("Visual vocabulary built successfully")
    return kmeans

# Bag of Visual Words Functions
def create_bovw_histograms(image_descriptors, kmeans_model):
    """
    Create a Bag of Visual Words histogram for a single image.
    
    Args:
        image_descriptors (numpy.ndarray): SIFT descriptors for one image
        kmeans_model (KMeans): Trained K-means model
        
    Returns:
        numpy.ndarray: Normalized histogram of visual words
    """
    # If no descriptors were found, return zeros
    if image_descriptors is None or len(image_descriptors) == 0:
        return np.zeros(kmeans_model.n_clusters)
    
    # Predict the closest cluster for each descriptor
    visual_words = kmeans_model.predict(image_descriptors)
    
    # Create histogram of visual words
    histogram = np.zeros(kmeans_model.n_clusters)
    for word in visual_words:
        histogram[word] += 1
    
    # Normalize the histogram (L2 normalization)
    norm = np.linalg.norm(histogram)
    if norm > 0:
        histogram = histogram / norm
    
    return histogram

def process_image_folder_to_bovw(folder_path, label, kmeans_model):
    """
    Process all images in a folder to create BoVW representations.
    
    Args:
        folder_path (str): Path to the folder containing images
        label (int): Class label (0 or 1)
        kmeans_model (KMeans): Trained K-means model
        
    Returns:
        tuple: (features, labels)
            - features: BoVW histograms for all images
            - labels: Labels for all images
    """
    # Extract SIFT descriptors
    _, image_descriptors_list, image_labels = load_images_and_extract_sift(folder_path, label)
    
    # Create BoVW histograms for each image
    bovw_features = []
    for descriptors in image_descriptors_list:
        histogram = create_bovw_histograms(descriptors, kmeans_model)
        bovw_features.append(histogram)
    
    if bovw_features:
        bovw_features = np.array(bovw_features)
        image_labels = np.array(image_labels)
    else:
        bovw_features = np.array([])
        image_labels = np.array([])
    
    print(f"Created {len(bovw_features)} BoVW histograms from {folder_path}")
    return bovw_features, image_labels

# ANN Model Functions
def create_and_train_ann_model(X_train, y_train, mlp_params=None):
    """
    Create and train an Artificial Neural Network model using MLPClassifier.
    
    Args:
        X_train (numpy.ndarray): Training features (BoVW histograms)
        y_train (numpy.ndarray): Training labels
        mlp_params (dict): Parameters for MLPClassifier
        
    Returns:
        MLPClassifier: Trained ANN model
    """
    # Set default parameters if none provided
    if mlp_params is None:
        mlp_params = {
            'hidden_layer_sizes': (64, 32),
            'activation': 'relu',
            'solver': 'adam',
            'alpha': 0.0001,
            'max_iter': 300,
            'batch_size': 'auto',
            'learning_rate_init': 0.001,
            'early_stopping': True,
            'validation_fraction': 0.1,
            'random_state': 42
        }
    
    print("Creating and training MLPClassifier...")
    print(f"Parameters: {mlp_params}")
    
    # Create MLP model
    model = MLPClassifier(**mlp_params)
    
    # Train the model
    model.fit(X_train, y_train)
    
    print("MLPClassifier trained successfully")
    return model

def evaluate_model(y_true, y_pred):
    """
    Evaluate the model performance.
    
    Args:
        y_true (numpy.ndarray): Ground truth labels
        y_pred (numpy.ndarray): Predicted labels
        
    Returns:
        dict: Dictionary containing evaluation metrics
    """
    # Calculate confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Calculate evaluation metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    # Print evaluation results
    print("\nModel Evaluation Results:")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    
    # Print confusion matrix
    print("\nConfusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    
    # Visualize confusion matrix
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(2)
    plt.xticks(tick_marks, ['Class 0', 'Class 1'], rotation=45)
    plt.yticks(tick_marks, ['Class 0', 'Class 1'])
    
    # Add text annotations
    thresh = cm.max() / 2.0
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
    
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': cm
    }

# Main function
def main():
    """
    Main function to orchestrate the binary image classification process.
    """
    print("Binary Image Classification using SIFT, BoVW, and ANN (MLPClassifier)")
    print("=" * 70)
    
    # Get input folder paths
    print("\nPlease enter the paths to the image folders:")
    train_path_class1 = "./resources/classification/cats"
    train_path_class2 = "./resources/classification/dogs"
    test_path_class1 = "./resources/classification/cat_test"
    test_path_class2 = "./resources/classification/dog_test"
    
    # Validate paths
    for path in [train_path_class1, train_path_class2, test_path_class1, test_path_class2]:
        if not os.path.exists(path):
            print(f"Error: Path {path} does not exist!")
            return
    
    # Step 1: Extract SIFT descriptors from training images
    print("\nStep 1: Extracting SIFT descriptors from training images...")
    class1_descriptors, class1_img_descriptors, class1_labels = load_images_and_extract_sift(train_path_class1, 0)
    class2_descriptors, class2_img_descriptors, class2_labels = load_images_and_extract_sift(train_path_class2, 1)
    
    # Combine descriptors from both classes
    if len(class1_descriptors) > 0 and len(class2_descriptors) > 0:
        all_train_descriptors = np.vstack([class1_descriptors, class2_descriptors])
    elif len(class1_descriptors) > 0:
        all_train_descriptors = class1_descriptors
    elif len(class2_descriptors) > 0:
        all_train_descriptors = class2_descriptors
    else:
        print("Error: No SIFT descriptors could be extracted from the training images!")
        return
    
    # Step 2: Determine optimal K using Elbow Method
    print("\nStep 2: Plotting Elbow Method to determine optimal K...")
    max_k = int(input("Enter maximum K to try for Elbow Method (recommend 20-50): "))
    plot_elbow_method(all_train_descriptors, max_k)
    
    # Get K from user
    num_clusters = int(input("\nBased on the Elbow Method plot, enter the optimal K value: "))
    
    # Step 3: Build visual vocabulary (K-means clustering)
    print("\nStep 3: Building visual vocabulary...")
    kmeans_model = build_visual_vocabulary(all_train_descriptors, num_clusters)
    
    # Step 4: Create BoVW histograms for training images
    print("\nStep 4: Creating BoVW histograms for training images...")
    X_train_class1, y_train_class1 = process_image_folder_to_bovw(train_path_class1, 0, kmeans_model)
    X_train_class2, y_train_class2 = process_image_folder_to_bovw(train_path_class2, 1, kmeans_model)
    
    # Combine training data
    if len(X_train_class1) > 0 and len(X_train_class2) > 0:
        X_train = np.vstack([X_train_class1, X_train_class2])
        y_train = np.concatenate([y_train_class1, y_train_class2])
    elif len(X_train_class1) > 0:
        X_train = X_train_class1
        y_train = y_train_class1
    elif len(X_train_class2) > 0:
        X_train = X_train_class2
        y_train = y_train_class2
    else:
        print("Error: Could not create BoVW features for training images!")
        return
    
    # Step 5: Create BoVW histograms for test images
    print("\nStep 5: Creating BoVW histograms for test images...")
    X_test_class1, y_test_class1 = process_image_folder_to_bovw(test_path_class1, 0, kmeans_model)
    X_test_class2, y_test_class2 = process_image_folder_to_bovw(test_path_class2, 1, kmeans_model)
    
    # Combine test data
    if len(X_test_class1) > 0 and len(X_test_class2) > 0:
        X_test = np.vstack([X_test_class1, X_test_class2])
        y_test = np.concatenate([y_test_class1, y_test_class2])
    elif len(X_test_class1) > 0:
        X_test = X_test_class1
        y_test = y_test_class1
    elif len(X_test_class2) > 0:
        X_test = X_test_class2
        y_test = y_test_class2
    else:
        print("Error: Could not create BoVW features for test images!")
        return
    
    # Print data shape information
    print(f"\nTraining data shape: {X_train.shape}, Labels shape: {y_train.shape}")
    print(f"Testing data shape: {X_test.shape}, Labels shape: {y_test.shape}")
    
    # Step 6: Get ANN parameters from user
    print("\nStep 6: Setting up ANN (MLPClassifier)...")
    print("\nANN Model Hyperparameter Suggestions:")
    print("- hidden_layer_sizes: tuple of neurons per layer, e.g. (64, 32)")
    print("- activation: 'relu' (default), 'tanh', 'logistic'")
    print("- solver: 'adam' (default), 'sgd', 'lbfgs'")
    print("- alpha: L2 penalty (regularization), default 0.0001")
    print("- learning_rate_init: Learning rate, default 0.001")
    print("- max_iter: Maximum iterations, default 300")
    print("- batch_size: 'auto', or integer (8, 16, 32, etc.)")
    print("- early_stopping: True/False")
    
    print("\nEnter ANN hyperparameters (press Enter to use defaults):")
    
    # Get hidden layer configuration
    hidden_layers_input = input("Hidden layer sizes (comma-separated, e.g., '64,32'): ")
    hidden_layers = (64, 32)  # Default
    if hidden_layers_input.strip():
        hidden_layers = tuple(int(x) for x in hidden_layers_input.split(','))
    
    # Get activation function
    activation_input = input("Activation function ['relu', 'tanh', 'logistic']: ")
    activation = 'relu'  # Default
    if activation_input.strip():
        activation = activation_input.strip()
    
    # Get solver
    solver_input = input("Solver ['adam', 'sgd', 'lbfgs']: ")
    solver = 'adam'  # Default
    if solver_input.strip():
        solver = solver_input.strip()
    
    # Get alpha (regularization)
    alpha_input = input("Alpha (L2 penalty) [0.0001]: ")
    alpha = 0.0001  # Default
    if alpha_input.strip():
        alpha = float(alpha_input)
    
    # Get max_iter
    max_iter_input = input("Maximum iterations [300]: ")
    max_iter = 300  # Default
    if max_iter_input.strip():
        max_iter = int(max_iter_input)
    
    # Get early_stopping
    early_stopping_input = input("Early stopping (True/False) [True]: ")
    early_stopping = True  # Default
    if early_stopping_input.strip().lower() == 'false':
        early_stopping = False
    
    # Collect parameters
    mlp_params = {
        'hidden_layer_sizes': hidden_layers,
        'activation': activation,
        'solver': solver,
        'alpha': alpha,
        'max_iter': max_iter,
        'early_stopping': early_stopping,
        'random_state': 42,
        'validation_fraction': 0.1 if early_stopping else 0.0
    }
    
    # Step 7: Create and train ANN model
    print("\nStep 7: Creating and training ANN model...")
    ann_model = create_and_train_ann_model(X_train, y_train, mlp_params)
    
    # Step 8: Evaluate the model
    print("\nStep 8: Evaluating the model...")
    # Make predictions
    y_pred = ann_model.predict(X_test)
    
    # Evaluate the model
    metrics = evaluate_model(y_test, y_pred)
    
    print("\nBinary Image Classification process completed!")
    print(f"Final F1-Score: {metrics['f1']:.4f}")

if __name__ == "__main__":
    main()

3. Image classification using ANN with HOG features

In [None]:
"""
Binary Image Classification using HOG Features and ANN

This program classifies images into two classes using:
1. HOG (Histogram of Oriented Gradients) feature extraction
2. ANN (Artificial Neural Network) classification with MLPClassifier

The program loads images from specified directories, extracts HOG features,
trains an ANN model, and evaluates its performance with metrics like F1-score.
"""

import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import cv2
from skimage.feature import hog
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

# HOG Feature Extraction Functions
def extract_hog_descriptor(image_path, target_size=(64, 128), orientations=9, 
                          pixels_per_cell=(8, 8), cells_per_block=(2, 2), 
                          block_norm='L2-Hys', visualize=False):
    """
    Extract HOG descriptor from a single image.
    
    Args:
        image_path (str): Path to the image file
        target_size (tuple): Size to resize the image to before HOG extraction
        orientations (int): Number of orientation bins for HOG
        pixels_per_cell (tuple): Size of a cell in pixels
        cells_per_block (tuple): Number of cells in each block
        block_norm (str): Block normalization method
        visualize (bool): Whether to return the HOG visualization
        
    Returns:
        numpy.ndarray: HOG feature vector for the image
        numpy.ndarray (optional): HOG visualization if visualize=True
    """
    try:
        # Read the image
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read image {image_path}")
            return None
        
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Resize image to target size
        resized = cv2.resize(gray, target_size)
        
        # Extract HOG features
        if visualize:
            hog_features, hog_image = hog(
                resized, 
                orientations=orientations,
                pixels_per_cell=pixels_per_cell,
                cells_per_block=cells_per_block,
                block_norm=block_norm,
                visualize=True
            )
            return hog_features, hog_image
        else:
            hog_features = hog(
                resized, 
                orientations=orientations,
                pixels_per_cell=pixels_per_cell,
                cells_per_block=cells_per_block,
                block_norm=block_norm,
                visualize=False
            )
            return hog_features
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None

def load_images_and_extract_hog(folder_path, label, hog_params):
    """
    Extract HOG features from all images in a folder and create labels.
    
    Args:
        folder_path (str): Path to the folder containing images
        label (int): Class label (0 or 1)
        hog_params (dict): Parameters for HOG feature extraction
        
    Returns:
        tuple: (features, labels)
            - features: List of HOG feature vectors for all images
            - labels: List of labels corresponding to each image
    """
    features = []
    labels = []
    
    # Get all image files in the folder
    image_paths = []
    for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp']:
        image_paths.extend(glob.glob(os.path.join(folder_path, ext)))
    
    if not image_paths:
        print(f"Warning: No images found in {folder_path}")
        return features, labels
    
    print(f"Processing {len(image_paths)} images from {folder_path}...")
    
    # Process each image
    for img_path in image_paths:
        # Extract HOG features
        hog_features = extract_hog_descriptor(
            img_path, 
            target_size=hog_params.get('target_size', (64, 128)),
            orientations=hog_params.get('orientations', 9),
            pixels_per_cell=hog_params.get('pixels_per_cell', (8, 8)),
            cells_per_block=hog_params.get('cells_per_block', (2, 2)),
            block_norm=hog_params.get('block_norm', 'L2-Hys')
        )
        
        if hog_features is not None:
            features.append(hog_features)
            labels.append(label)
    
    print(f"Extracted HOG features from {len(features)} images")
    return features, labels

def visualize_hog_features(image_path, hog_params):
    """
    Visualize HOG features for a sample image.
    
    Args:
        image_path (str): Path to the image file
        hog_params (dict): Parameters for HOG feature extraction
    """
    try:
        # Extract HOG features with visualization
        hog_features, hog_image = extract_hog_descriptor(
            image_path, 
            target_size=hog_params.get('target_size', (64, 128)),
            orientations=hog_params.get('orientations', 9),
            pixels_per_cell=hog_params.get('pixels_per_cell', (8, 8)),
            cells_per_block=hog_params.get('cells_per_block', (2, 2)),
            block_norm=hog_params.get('block_norm', 'L2-Hys'),
            visualize=True
        )
        
        # Read and resize original image for visualization
        img = cv2.imread(image_path)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray, hog_params.get('target_size', (64, 128)))
        
        # Display original and HOG images
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
        ax1.imshow(resized, cmap='gray')
        ax1.set_title('Original Image (Grayscale)')
        ax1.axis('off')
        
        ax2.imshow(hog_image, cmap='gray')
        ax2.set_title('HOG Visualization')
        ax2.axis('off')
        
        plt.tight_layout()
        plt.show()
        
        print(f"HOG Feature vector shape: {hog_features.shape}")
        
    except Exception as e:
        print(f"Error visualizing HOG features: {str(e)}")

# ANN Model Functions
def create_and_train_ann_model(X_train, y_train, mlp_params=None):
    """
    Create and train an Artificial Neural Network model using MLPClassifier.
    
    Args:
        X_train (numpy.ndarray): Training features (HOG features)
        y_train (numpy.ndarray): Training labels
        mlp_params (dict): Parameters for MLPClassifier
        
    Returns:
        MLPClassifier: Trained ANN model
    """
    # Set default parameters if none provided
    if mlp_params is None:
        mlp_params = {
            'hidden_layer_sizes': (100,),
            'activation': 'relu',
            'solver': 'adam',
            'alpha': 0.0001,
            'max_iter': 300,
            'batch_size': 'auto',
            'learning_rate_init': 0.001,
            'early_stopping': True,
            'validation_fraction': 0.1,
            'random_state': 42
        }
    
    print("Creating and training MLPClassifier...")
    print(f"Parameters: {mlp_params}")
    
    # Create MLP model
    model = MLPClassifier(**mlp_params)
    
    # Train the model
    model.fit(X_train, y_train)
    
    print("MLPClassifier trained successfully")
    return model

def evaluate_model(y_true, y_pred):
    """
    Evaluate the model performance.
    
    Args:
        y_true (numpy.ndarray): Ground truth labels
        y_pred (numpy.ndarray): Predicted labels
        
    Returns:
        dict: Dictionary containing evaluation metrics
    """
    # Calculate confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Calculate evaluation metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    # Print evaluation results
    print("\nModel Evaluation Results:")
    print(f"Accuracy:  {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall:    {recall:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    
    # Print confusion matrix
    print("\nConfusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    
    # Visualize confusion matrix
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(2)
    plt.xticks(tick_marks, ['Class 0', 'Class 1'], rotation=45)
    plt.yticks(tick_marks, ['Class 0', 'Class 1'])
    
    # Add text annotations
    thresh = cm.max() / 2.0
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
    
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': cm
    }

# Main function
def main():
    """
    Main function to orchestrate the binary image classification process.
    """
    print("Binary Image Classification using HOG Features and ANN (MLPClassifier)")
    print("=" * 70)
    
    # Get input folder paths
    print("\nPlease enter the paths to the image folders:")
    train_path_class1 = "./resources/classification/cats"
    train_path_class2 = "./resources/classification/dogs"
    test_path_class1 = "./resources/classification/cat_test"
    test_path_class2 = "./resources/classification/dog_test"
    
    # Validate paths
    for path in [train_path_class1, train_path_class2, test_path_class1, test_path_class2]:
        if not os.path.exists(path):
            print(f"Error: Path {path} does not exist!")
            return
    
    # Step 1: Configure HOG parameters
    print("\nStep 1: Configure HOG parameters (press Enter to use defaults)")
    print("\nHOG Parameter Suggestions:")
    print("- target_size: Image size for HOG extraction, e.g., (64, 128) or (128, 128)")
    print("- orientations: Number of orientation bins (typically 9)")
    print("- pixels_per_cell: Cell size in pixels, e.g., (8, 8)")
    print("- cells_per_block: Number of cells per block, e.g., (2, 2)")
    print("- block_norm: Block normalization method, e.g., 'L2-Hys'")
    
    # Get target_size
    target_width = input("Target width [64]: ")
    target_height = input("Target height [128]: ")
    target_size = (
        int(target_width) if target_width.strip() else 64,
        int(target_height) if target_height.strip() else 128
    )
    
    # Get orientations
    orientations_input = input("Orientations [9]: ")
    orientations = int(orientations_input) if orientations_input.strip() else 9
    
    # Get pixels_per_cell
    ppc_x = input("Pixels per cell (width) [8]: ")
    ppc_y = input("Pixels per cell (height) [8]: ")
    pixels_per_cell = (
        int(ppc_x) if ppc_x.strip() else 8,
        int(ppc_y) if ppc_y.strip() else 8
    )
    
    # Get cells_per_block
    cpb_x = input("Cells per block (width) [2]: ")
    cpb_y = input("Cells per block (height) [2]: ")
    cells_per_block = (
        int(cpb_x) if cpb_x.strip() else 2,
        int(cpb_y) if cpb_y.strip() else 2
    )
    
    # Get block_norm
    block_norm_input = input("Block normalization method ['L2-Hys']: ")
    block_norm = block_norm_input if block_norm_input.strip() else 'L2-Hys'
    
    # Collect HOG parameters
    hog_params = {
        'target_size': target_size,
        'orientations': orientations,
        'pixels_per_cell': pixels_per_cell,
        'cells_per_block': cells_per_block,
        'block_norm': block_norm
    }
    
    print(f"\nHOG Parameters: {hog_params}")
    
    # Step 2: Visualize HOG features for a sample image (optional)
    visualize_option = input("\nDo you want to visualize HOG features for a sample image? (y/n): ")
    if visualize_option.lower() == 'y':
        sample_path = input("Enter the path to a sample image: ")
        if os.path.exists(sample_path):
            visualize_hog_features(sample_path, hog_params)
        else:
            print(f"Error: Sample image path {sample_path} does not exist")
    
    # Step 3: Extract HOG features from training images
    print("\nStep 3: Extracting HOG features from training images...")
    X_train_class1, y_train_class1 = load_images_and_extract_hog(train_path_class1, 0, hog_params)
    X_train_class2, y_train_class2 = load_images_and_extract_hog(train_path_class2, 1, hog_params)
    
    # Combine training data
    if X_train_class1 and X_train_class2:
        X_train = np.vstack([X_train_class1, X_train_class2])
        y_train = np.array(y_train_class1 + y_train_class2)
    else:
        print("Error: Could not extract HOG features from training images")
        return
    
    # Step 4: Extract HOG features from test images
    print("\nStep 4: Extracting HOG features from test images...")
    X_test_class1, y_test_class1 = load_images_and_extract_hog(test_path_class1, 0, hog_params)
    X_test_class2, y_test_class2 = load_images_and_extract_hog(test_path_class2, 1, hog_params)
    
    # Combine test data
    if X_test_class1 and X_test_class2:
        X_test = np.vstack([X_test_class1, X_test_class2])
        y_test = np.array(y_test_class1 + y_test_class2)
    else:
        print("Error: Could not extract HOG features from test images")
        return
    
    # Print data shape information
    print(f"\nTraining data shape: {X_train.shape}, Labels shape: {y_train.shape}")
    print(f"Testing data shape: {X_test.shape}, Labels shape: {y_test.shape}")
    
    # Step 5: Normalize features using StandardScaler
    print("\nStep 5: Normalizing features using StandardScaler...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    print("Feature normalization completed")
    
    # Step 6: Configure ANN parameters
    print("\nStep 6: Configure ANN parameters (press Enter to use defaults)")
    print("\nANN Parameter Suggestions:")
    print("- hidden_layer_sizes: Tuple of neurons per layer, e.g., (100,) or (64, 32)")
    print("- activation: 'relu' (default), 'tanh', 'logistic'")
    print("- solver: 'adam' (default), 'sgd', 'lbfgs'")
    print("- alpha: L2 penalty (regularization), default 0.0001")
    print("- learning_rate_init: Learning rate, default 0.001")
    print("- max_iter: Maximum iterations, default 300")
    print("- batch_size: 'auto', or integer (8, 16, 32, etc.)")
    print("- early_stopping: True/False")
    
    # Get hidden layer configuration
    hidden_layers_input = input("Hidden layer sizes (comma-separated, e.g., '100' or '64,32'): ")
    hidden_layers = (100,)  # Default
    if hidden_layers_input.strip():
        hidden_layers = tuple(int(x) for x in hidden_layers_input.split(','))
    
    # Get activation function
    activation_input = input("Activation function ['relu', 'tanh', 'logistic']: ")
    activation = 'relu'  # Default
    if activation_input.strip():
        activation = activation_input.strip()
    
    # Get solver
    solver_input = input("Solver ['adam', 'sgd', 'lbfgs']: ")
    solver = 'adam'  # Default
    if solver_input.strip():
        solver = solver_input.strip()
    
    # Get alpha (regularization)
    alpha_input = input("Alpha (L2 penalty) [0.0001]: ")
    alpha = 0.0001  # Default
    if alpha_input.strip():
        alpha = float(alpha_input)
    
    # Get learning_rate_init
    lr_input = input("Learning rate [0.001]: ")
    learning_rate_init = 0.001  # Default
    if lr_input.strip():
        learning_rate_init = float(lr_input)
    
    # Get max_iter
    max_iter_input = input("Maximum iterations [300]: ")
    max_iter = 300  # Default
    if max_iter_input.strip():
        max_iter = int(max_iter_input)
    
    # Get early_stopping
    early_stopping_input = input("Early stopping (True/False) [True]: ")
    early_stopping = True  # Default
    if early_stopping_input.strip().lower() == 'false':
        early_stopping = False
    
    # Collect ANN parameters
    mlp_params = {
        'hidden_layer_sizes': hidden_layers,
        'activation': activation,
        'solver': solver,
        'alpha': alpha,
        'learning_rate_init': learning_rate_init,
        'max_iter': max_iter,
        'early_stopping': early_stopping,
        'random_state': 42,
        'validation_fraction': 0.1 if early_stopping else 0.0
    }
    
    # Step 7: Create and train ANN model
    print("\nStep 7: Creating and training ANN model...")
    ann_model = create_and_train_ann_model(X_train_scaled, y_train, mlp_params)
    
    # Step 8: Evaluate the model
    print("\nStep 8: Evaluating the model...")
    
    # Train set evaluation (optional)
    evaluate_train = input("Do you want to evaluate on the training set? (y/n): ")
    if evaluate_train.lower() == 'y':
        print("\nTraining Set Evaluation:")
        y_train_pred = ann_model.predict(X_train_scaled)
        _ = evaluate_model(y_train, y_train_pred)
    
    # Test set evaluation
    print("\nTest Set Evaluation:")
    y_test_pred = ann_model.predict(X_test_scaled)
    metrics = evaluate_model(y_test, y_test_pred)
    
    print("\nBinary Image Classification process completed!")
    print(f"Final F1-Score: {metrics['f1']:.4f}")

if __name__ == "__main__":
    main()

2. Pedestrian Detection using CV2 built-in function (0.5)

- Input: images/clip containing pedestrians (video (2160).mp4 / people.mp4 or your videos)

- Output: images with green boxes rounding pedestrians

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import time
import imutils

def detect_pedestrians(file_path, option='image'):

    # Initialize HOG descriptor and set default pedestrian detector
    hog = cv2.HOGDescriptor()
    hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

    if option == 'image':
        # Reading the Image
        image = cv2.imread(file_path)
        if image is None:
            print(f"Error: Could not read image from {file_path}")
            return

        # Resizing the Image
        image = imutils.resize(image, width=min(400, image.shape[1]))

        # Detect pedestrians
        print("Detecting pedestrians in image...")
        (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), 
                                               padding=(4, 4), scale=1.05)
        print(f"Found {len(rects)} pedestrians")

        # Draw bounding boxes around detected pedestrians
        for (x, y, w, h) in rects:
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)  # Green boxes

        # Convert BGR to RGB for matplotlib display
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Display the result
        plt.figure(figsize=(10, 8))
        plt.imshow(image_rgb)
        plt.title(f'Detected {len(rects)} pedestrians')
        plt.axis('off')
        plt.tight_layout()
        plt.show()
        
    elif option == 'video':
        # Open the video file
        video = cv2.VideoCapture(file_path)
        if not video.isOpened():
            print(f"Error: Could not open video from {file_path}")
            return
        
        # Get video frame width, height, and FPS for VideoWriter
        frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = video.get(cv2.CAP_PROP_FPS)
        
        frame_count = 0
        
        # Define codec and create VideoWriter object
        output_path = 'output_video.avi'
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output_path, fourcc, fps, (640, 480))
        
        
        print("Processing video frames...")
        while True:
            ret, frame = video.read()

            if not ret:
                break
            frame = cv2.resize(frame, (640, 480))

            # Detect pedestrians
            boxes, _ = hog.detectMultiScale(frame, winStride=(8, 8), padding=(8, 8), scale=1.2)

            # Draw green rectangles
            for (x, y, w, h) in boxes:
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

            # Write frame to output video
            out.write(frame)
            
            frame_count += 1
            
            # Display progress
            if frame_count % 30 == 0:
                print(f"Processed {frame_count} frames...")
        
        # Release resources
        video.release()
        out.release()
        
        # Print summary
        print(f"Video processing completed.")
        print(f"Total frames processed: {frame_count}")
        print(f"Output saved to: {output_path}")
            
    else:
        print("Invalid option. Use 'image' or 'video'")

# Example usage:
# detect_pedestrians('./resources/img.jpg', 'image')
detect_pedestrians('./resources/video_1.mp4', 'video')

Hello word
(1) việt nam
(2) việt nam