### 1 Face/Full Body Detection (0.5) using Cascade Classifier (0.5)

Input: camera capture or MP4

Output: MP4 with green boxes rounding Face(s)/Full Body(s)

In [None]:
import cv2
import os

def main():
    # Get input source
    input_source = ""
    while input_source not in ["cam", "mp4"]:
        input_source = input("Enter input source ('cam' for camera, 'mp4' for video file): ").lower()
    
    # Get video file path if MP4 is selected
    video_path = "./resources/video_detection_1.mp4"
    
    #Determine detection type
    detection_type = ""
    while detection_type not in ["face", "body", "eye" , "all"]:
        detection_type = input("Enter detection type ('face', 'body', 'eye' or 'all'): ").lower()
    
    # Get output file path if MP4 is selected as input
    output_path = None
    if input_source == "mp4":
        output_path = input("Enter output MP4 file path: ")
        if not output_path.endswith('.mp4'):
            output_path += '.mp4'
    
    # Initialize cascade classifiers
    face_cascade = None
    # body_cascade = None
    eye_cascade = None
    
    # Load face cascade if needed
    if detection_type in ["face", "all"]:
        # Find the cascade file in OpenCV's data directory
        face_cascade_path = os.path.join(cv2.data.haarcascades, 'haarcascade_frontalface_default.xml')

        if not os.path.exists(face_cascade_path):
            print(f"Error: Face cascade file not found at {face_cascade_path}")
            return
        
        face_cascade = cv2.CascadeClassifier(face_cascade_path)
        # Check if cascade loaded successfully
        if face_cascade.empty():
            print("Error: Failed to load face cascade classifier")
            return
        else:
            print(f"Face cascade loaded successfully from {face_cascade_path}")
    
    if detection_type in ["eye", "all"]:
       
        eye_cascade_path = "./resources/haarcascade_eye.xml"
        if not os.path.exists(eye_cascade_path):
            print(f"Error: Eye cascade file not found at {eye_cascade_path}")
            return
        
        eye_cascade = cv2.CascadeClassifier(eye_cascade_path)
        # Check if cascade loaded successfully
        if eye_cascade.empty():
            print("Error: Failed to load eye cascade classifier")
            return
        else:
            print(f"Eye cascade loaded successfully from {eye_cascade_path}")
    
    # Load body cascade if needed
    # if detection_type in ["body", "all"]:
    #     # Find the cascade file in OpenCV's data directory
    #     body_cascade_path = os.path.join(cv2.data.haarcascades, 'haarcascade_fullbody.xml')
    #     if not os.path.exists(body_cascade_path):
    #         print(f"Error: Body cascade file not found at {body_cascade_path}")
    #         return
        
    #     body_cascade = cv2.CascadeClassifier(body_cascade_path)
    #     # Check if cascade loaded successfully
    #     if body_cascade.empty():
    #         print("Error: Failed to load body cascade classifier")
    #         return
    #     else:
    #         print(f"Body cascade loaded successfully from {body_cascade_path}")
    
    # Initialize video capture
    if input_source == "cam":
        video_capture = cv2.VideoCapture(0)  # Use default camera (webcam)
    else:
        video_capture = cv2.VideoCapture(video_path)
    
    if not video_capture.isOpened():
        print("Error: Could not open video source")
        return
    
    # Get video properties
    frame_width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(video_capture.get(cv2.CAP_PROP_FPS))
    if fps == 0: #If cannot get actual fps
        fps = 30  # Default to 30 fps
    
    # Initialize video writer if needed
    video_writer = None
    if input_source == "mp4":
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
        video_writer = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
    
    # Process video frames
    while True:
        ret, frame = video_capture.read()
        
        if not ret:
            # End of video or error
            break
        
        # Convert frame to grayscale for detection
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        # Detect faces if needed
        if detection_type in ["face", "all"] and face_cascade_path is not None:
            faces = face_cascade.detectMultiScale(
                gray,
                scaleFactor=1.1,  # How much the image size is reduced at each image scale
                minNeighbors=5,   # How many neighbors each candidate rectangle should have
                minSize=(30, 30)  # Minimum possible object size
            )
            
            # Draw rectangles around faces
            for (x, y, w, h) in faces:
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)  # Green color, thickness 2

        # Detect eyes if needed (standalone, not within faces)
        if detection_type in ["eye", "all"] and eye_cascade is not None:
            eyes = eye_cascade.detectMultiScale(
                gray,
                scaleFactor=1.05,
                minNeighbors=5,
                minSize=(10, 10)
            )
            
            # Draw rectangles around eyes
            for (x, y, w, h) in eyes:
                cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        
        # Detect bodies if needed
        # if detection_type in ["body", "all"]:
        #     bodies = body_cascade.detectMultiScale(
        #         gray,
        #         scaleFactor=1.2,  # Lower scale factor for better detection
        #         minNeighbors=3,    # Fewer neighbors to detect more bodies
        #         minSize=(30, 60),  # Minimum size for a body rectangle
        #         flags=cv2.CASCADE_SCALE_IMAGE
        #     )
            
        #     # Draw rectangles around bodies
        #     for (x, y, w, h) in bodies:
        #         cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 2)  # Red color, thickness 2
        
        # Display or write the frame
        if input_source == "cam":
            cv2.imshow('Detection', frame)
            
            # Break loop on 'q' key press
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            video_writer.write(frame)
            
            # Optional: show progress
            if int(video_capture.get(cv2.CAP_PROP_POS_FRAMES)) % 30 == 0:
                print(f"Processing: {int(video_capture.get(cv2.CAP_PROP_POS_FRAMES))} frames done")
    
    # Release resources
    video_capture.release()
    if video_writer:
        video_writer.release()
    cv2.destroyAllWindows()
    
    if input_source == "mp4":
        print(f"Output video saved to {output_path}")

if __name__ == "__main__":
    main()

### 2 Image Retrieval using SIFT features

Input: query image

Output: Top k  (default = 5) similarity images

In [None]:
import cv2
import os
import time
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Path to the query image (the image you want to find similar images to)
QUERY_IMAGE_PATH = r"./resources/query-image.jpg"

# Path to the folder containing database images (can contain subfolders)
DATABASE_FOLDER_PATH = r"./resources/classification"

# Path where the results will be saved
OUTPUT_FILE_PATH = r"output.txt"

# Number of most similar images to return
K_RESULTS = 5

COSINE_SIMILARITY_THRESHOLD = 0.8

#Computing SIFT features
def extract_sift_features(image_path):
    """Extract SIFT features from an image."""
    try:
        # Read the image
        img = cv2.imread(image_path)
        if img is None:
            print(f"Error: Could not read image at {image_path}")
            return None, None
        
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Initialize SIFT detector
        sift = cv2.SIFT_create(nfeatures = 100)
        
        # Detect keypoints and compute descriptors
        keypoints, descriptors = sift.detectAndCompute(gray, None)
        
        return keypoints, descriptors
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None, None

def get_all_image_paths(database_folder):
    """Get paths of all images in the database folder and its subfolders."""
    image_paths = []
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']
    
    # Walk through all directories and subdirectories
    for root, _, files in os.walk(database_folder):
        for file in files:
            # Check if the file is an image
            if any(file.lower().endswith(ext) for ext in image_extensions):
                image_paths.append(os.path.join(root, file))
    
    return image_paths

def match_features(query_descriptors, db_descriptors):
    """Match features between query and database image using cosine similarity."""
    # No descriptors to match
    if query_descriptors is None or db_descriptors is None or len(query_descriptors) == 0 or len(db_descriptors) == 0:
        return []
    
    try:
        # Ensure descriptors are float32 for cosine_similarity
        query_descriptors = query_descriptors.astype(np.float32)
        db_descriptors = db_descriptors.astype(np.float32)

         # Normalize query descriptors using StandardScaler
        query_scaler = StandardScaler()
        query_descriptors_norm = query_scaler.fit_transform(query_descriptors)
        
        # Normalize database descriptors using StandardScaler
        db_scaler = StandardScaler()
        db_descriptors_norm = db_scaler.fit_transform(db_descriptors)
        
        # Calculate cosine similarity using normalized descriptors
        sim_matrix = cosine_similarity(query_descriptors_norm, db_descriptors_norm)
    except Exception as e:
        print(f"Error calculating cosine similarity: {e}")
        return []

    good_matches = []
    # For each query descriptor, find the best match in the database descriptors
    for i in range(sim_matrix.shape[0]):
        # Find the index and value of the highest similarity for query descriptor i
        best_match_idx = np.argmax(sim_matrix[i])
        best_match_score = sim_matrix[i, best_match_idx]

        # If the best match similarity is above the threshold, consider it a good match
        if best_match_score >= COSINE_SIMILARITY_THRESHOLD:
            # Create a DMatch object. Distance is often defined as 1 - similarity
            match = cv2.DMatch(_queryIdx=i, _trainIdx=best_match_idx, _distance=1.0 - best_match_score)
            good_matches.append(match)

    return good_matches

def retrieve_similar_images(query_image_path, database_folder, k=2):
    """Find the k most similar images to the query image in the database."""
    # Check if query image exists
    if not os.path.exists(query_image_path):
        print(f"Error: Query image not found at {query_image_path}")
        return []
    
    # Check if database folder exists
    if not os.path.isdir(database_folder):
        print(f"Error: Database folder not found at {database_folder}")
        return []
    
    print(f"Extracting features from query image: {query_image_path}")
    
    # Extract features from query image
    query_keypoints, query_descriptors = extract_sift_features(query_image_path)
    if query_descriptors is None:
        print("Error: Could not extract features from query image")
        return []
    
    # Get all image paths from database
    print(f"Scanning database folder: {database_folder}")
    db_image_paths = get_all_image_paths(database_folder)
    print(f"Found {len(db_image_paths)} images in database")
    
    # Initialize list to store similarity scores
    similarity_scores = []
    
    # Process each database image
    for i, db_image_path in enumerate(db_image_paths):
        if i % 10 == 0:  # Progress update every 10 images
            print(f"Processing image {i+1}/{len(db_image_paths)}: {db_image_path}")
        
        # Extract features from database image
        db_keypoints, db_descriptors = extract_sift_features(db_image_path)
        if db_descriptors is None:
            continue
        
        # Match features
        matches = match_features(query_descriptors, db_descriptors)
        
        # Store similarity score and image path
        similarity_scores.append({
            'path': db_image_path,
            'matches': len(matches),
            'filename': os.path.basename(db_image_path)
        })
    
    # Sort by number of matches (descending)
    similarity_scores.sort(key=lambda x: x['matches'], reverse=True)
    
    # Return top k results
    return similarity_scores[:k]

def save_results(results, output_path):
    """Save the retrieval results to a text file."""
    try:
        with open(output_path, 'w') as f:
            f.write(f"Top {len(results)} similar images:\n")
            for i, result in enumerate(results):
                f.write(f"{i+1}. {result['filename']} - {result['matches']} matches - {result['path']}\n")
        print(f"Results saved to {output_path}")
    except Exception as e:
        print(f"Error saving results: {str(e)}")

def main():
    """Main function to run the image retrieval process."""
    print("Image Retrieval using SIFT features")
    print("-----------------------------------")
    print(f"Query image: {QUERY_IMAGE_PATH}")
    print(f"Database folder: {DATABASE_FOLDER_PATH}")
    print(f"Number of results (k): {K_RESULTS}")
    print(f"Cosine Similarity Threshold: {COSINE_SIMILARITY_THRESHOLD}")
    print("-----------------------------------")
    
    start_time = time.time()
    
    # Retrieve similar images
    results = retrieve_similar_images(
        query_image_path=QUERY_IMAGE_PATH,
        database_folder=DATABASE_FOLDER_PATH,
        k=K_RESULTS
    )
    
    if results:
        # Save results to file
        save_results(results, OUTPUT_FILE_PATH)
        
        # Print results to console
        print("\nTop matches:")
        for i, result in enumerate(results):
            print(f"{i+1}. {result['filename']} - {result['matches']} matches")
    else:
        print("No results found or an error occurred during retrieval.")
    
    end_time = time.time()
    print(f"\nTotal processing time: {end_time - start_time:.2f} seconds")

if __name__ == "__main__":
    main()