# Eval

In [9]:
import os
import xml.etree.ElementTree as ET
import cv2
import numpy as np

from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from skimage.feature import hog

from sklearn.metrics import confusion_matrix, classification_report

from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as patches

import joblib
import json

## Eval Metrics Code

In [10]:
class EvalMetrics:
    
    def __init__(self):
        self.predictions = None
        self.ground_truths = None
        self.iou_thresholds = None
        
    def calculate_ap(self, precision, recall):
        """Calculate Average Precision using 11-point interpolation"""
        if len(precision) == 0 or len(recall) == 0:
            return 0.0
            
        ap = 0
        for t in np.arange(0, 1.1, 0.1):
            mask = recall >= t
            if not np.any(mask):
                p = 0
            else:
                p = np.max(precision[mask])
            ap += p / 11
        return ap
    

    def print_metrics(self, metrics):
        """Print evaluation metrics"""
        print("\nEvaluation Results:")
        print(f"Precision: {metrics['precision']:.1f}%")
        print(f"Recall: {metrics['recall']:.1f}%")
        print(f"mAP@50: {metrics['mAP50']:.1f}%")
        print(f"mAP@50-95: {metrics['mAP50-95']:.1f}%")
        


    def calculate_iou(self, box1, box2, debug=False):
        """Calculate IoU between two boxes with enhanced precision and debugging"""
        # Convert all coordinates to float64 for better precision
        box1 = np.array(box1, dtype=np.float64)
        box2 = np.array(box2, dtype=np.float64)
        
        # Get intersection coordinates
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
    
        # Calculate areas
        w = max(0.0, x2 - x1)
        h = max(0.0, y2 - y1)
        intersection = w * h
        
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union = float(box1_area + box2_area - intersection)
        
        iou = intersection / union if union > 0 else 0.0

        return iou

    def calculate_metrics(self):
        # Single pass of precision-recall calculation
        all_tps = []
        all_fps = []
        all_scores = []
        total_gt = 0
        
        # Process each image once
        for img_idx, (pred_boxes, gt_boxes) in enumerate(zip(self.predictions, self.ground_truths)):
            total_gt += len(gt_boxes)
            if len(pred_boxes) == 0:
                continue
                
            pred_boxes = np.array(pred_boxes, dtype=np.float64)
            gt_boxes = np.array(gt_boxes, dtype=np.float64)
            
            # Track matches at each threshold
            thresholds_tps = np.zeros((len(self.iou_thresholds), len(pred_boxes)))
            thresholds_fps = np.zeros((len(self.iou_thresholds), len(pred_boxes)))
            
            # Sort by confidence
            scores = pred_boxes[:, 4]
            sort_idx = np.argsort(-scores)
            pred_boxes = pred_boxes[sort_idx]
            
            # For each prediction
            for pred_idx, pred in enumerate(pred_boxes):
                # Track best match at each threshold
                best_ious = np.zeros(len(self.iou_thresholds))
                matched_gt_indices = [-1] * len(self.iou_thresholds)
                
                # Compare with each ground truth
                for gt_idx, gt in enumerate(gt_boxes):
                    iou = self.calculate_iou(pred[:4], gt)
                    
                    # Update if better match found
                    for thresh_idx, _ in enumerate(self.iou_thresholds):
                        if iou > best_ious[thresh_idx]:
                            best_ious[thresh_idx] = iou
                            matched_gt_indices[thresh_idx] = gt_idx
                
                # Record matches meeting thresholds
                for thresh_idx, thresh in enumerate(self.iou_thresholds):
                    if best_ious[thresh_idx] >= thresh:
                        thresholds_tps[thresh_idx, pred_idx] = 1
                    else:
                        thresholds_fps[thresh_idx, pred_idx] = 1
                
                all_scores.append(pred[4])
            
            all_tps.append(thresholds_tps)
            all_fps.append(thresholds_fps)
        
        # Calculate metrics
        metrics = {}
        
        if len(all_tps) > 0:
            all_tps = np.concatenate(all_tps, axis=1)
            all_fps = np.concatenate(all_fps, axis=1)
            
            # Calculate AP for each threshold
            aps = []
            for thresh_idx in range(len(self.iou_thresholds)):
                cum_tp = np.cumsum(all_tps[thresh_idx])
                cum_fp = np.cumsum(all_fps[thresh_idx])
                
                # Calculate precision and recall
                recall = cum_tp / total_gt if total_gt > 0 else np.zeros_like(cum_tp)
                precision = cum_tp / (cum_tp + cum_fp)
                
                # Calculate AP
                ap = self.calculate_ap(precision, recall)
                aps.append(ap)
            
            # Final metrics as per paper
            metrics['mAP50'] = aps[0] * 100  # AP at IoU=0.5
            metrics['mAP50-95'] = np.mean(aps) * 100  # Mean AP across thresholds
            
            # Use IoU=0.5 threshold for precision/recall
            cum_tp = np.cumsum(all_tps[0])
            cum_fp = np.cumsum(all_fps[0])
            recall = cum_tp / total_gt if total_gt > 0 else np.zeros_like(cum_tp)
            precision = cum_tp / (cum_tp + cum_fp)
            metrics['precision'] = np.mean(precision) * 100
            metrics['recall'] = np.mean(recall) * 100
            
        else:
            metrics = {
                'precision': 0.0,
                'recall': 0.0,
                'mAP50': 0.0,
                'mAP50-95': 0.0
            }
        
        return metrics

## Detection code

In [11]:
class HOGPersonDetectorEval:

    def __init__(self, model_path):
        """Initialize evaluator with trained model"""
        self.eval = EvalMetrics()
        self.clf = joblib.load(model_path)
        
        self.predictions = []
        self.ground_truths = []
        self.iou_thresholds = np.linspace(0.5, 0.95, 10)  # For mAP50-95
        
        # Add HOG parameters
        self.hog_params = {
            'orientations': 9,
            'pixels_per_cell': (8, 8),
            'cells_per_block': (2, 2),
            'block_norm': 'L2'
        }
 
    def visualize_results(self, image, gt_boxes, pred_boxes, save_path):
        """Visualize ground truth and predictions on image"""
        try:
            vis_img = image.copy()
            
            # Draw ground truth boxes in green
            for box in gt_boxes:
                cv2.rectangle(vis_img, (box[0], box[1]), (box[2], box[3]), 
                             (0, 255, 0), 2)
                cv2.putText(vis_img, 'GT', (box[0], box[1]-5),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            
            # Draw predicted boxes in red
            for box in pred_boxes:
                x1, y1, x2, y2 = map(int, box[:4])
                conf = box[4]
                
                # Draw box
                cv2.rectangle(vis_img, (x1, y1), (x2, y2), (0, 0, 255), 2)
                
                # Add confidence score in better position
                conf_text = f"{conf:.2f}"
                text_size = cv2.getTextSize(conf_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
                # Position text to be visible
                text_x = x1
                text_y = y1 - 10 if y1 > 20 else y2 + 20
                # Add background rectangle for text
                cv2.rectangle(vis_img, 
                             (text_x, text_y - text_size[1] - 4),
                             (text_x + text_size[0], text_y + 4),
                             (0, 0, 0), -1)
                # Add text
                cv2.putText(vis_img, conf_text, (text_x, text_y),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            
            # Add image info
            h, w = image.shape[:2]
            info_text = f"Image size: {w}x{h}, Detections: {len(pred_boxes)}"
            cv2.putText(vis_img, info_text, (10, 20),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
            
            # Save image
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            cv2.imwrite(save_path, vis_img)
            
        except Exception as e:
            print(f"Error in visualization: {e}")
        
            
    def calculate_box_overlap_ratio(self, box1, box2):
        """Calculate overlap ratio relative to smaller box"""
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])
        
        if x2 <= x1 or y2 <= y1:
            return 0.0
            
        intersection = (x2 - x1) * (y2 - y1)
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        
        # Use area of smaller box as denominator
        smaller_area = min(box1_area, box2_area)
        return intersection / smaller_area
    

        
    def load_image_list(self, split_file_path):
        """Load image list from train/val txt file"""
        with open(split_file_path) as f:
            return [line.strip() for line in f.readlines()]

   
    def extract_hog_features(self, image, target_size=(128, 64)):
        """Extract HOG features from an image patch"""
        if image is None or image.size == 0:
            return None
            
        try:
            resized = cv2.resize(image, target_size)
            features = hog(
                resized,
                orientations=self.hog_params['orientations'],
                pixels_per_cell=self.hog_params['pixels_per_cell'],
                cells_per_block=self.hog_params['cells_per_block'],
                block_norm=self.hog_params['block_norm'],
                feature_vector=True
            )
            return features
        except Exception as e:
            print(f"Error extracting HOG features: {e}")
            return None

    def get_person_boxes(self, xml_path):
        """Extract person bounding boxes from XML annotation"""
        tree = ET.parse(xml_path)
        root = tree.getroot()
        boxes = []
        
        for obj in root.findall('object'):
            if obj.find('name').text.lower() == 'person':
                bbox = obj.find('bndbox')
                xmin = int(float(bbox.find('xmin').text))
                ymin = int(float(bbox.find('ymin').text))
                xmax = int(float(bbox.find('xmax').text))
                ymax = int(float(bbox.find('ymax').text))
                boxes.append((xmin, ymin, xmax, ymax))
        return boxes
        
    def non_max_suppression(self, boxes, overlap_thresh):
        """Apply non-maximum suppression to avoid duplicate detections"""
        if len(boxes) == 0:
            return []
            
        # Convert to numpy array if needed
        if not isinstance(boxes, np.ndarray):
            boxes = np.array(boxes)
        
        # Get coordinates and scores
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        scores = boxes[:, 4]
        
        # Calculate areas
        areas = (x2 - x1) * (y2 - y1)
        
        # Sort by confidence score
        order = scores.argsort()[::-1]
        
        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            
            if order.size == 1:
                break
                
            # Get overlaps
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])
            
            w = np.maximum(0.0, xx2 - xx1)
            h = np.maximum(0.0, yy2 - yy1)
            inter = w * h
            
            # Calculate IoU
            ovr = inter / (areas[i] + areas[order[1:]] - inter)
            
            # Get indices of boxes to keep
            inds = np.where(ovr <= overlap_thresh)[0]
            order = order[inds + 1]
        
        return keep
    


    def detect_persons(self, image, conf_thresh=0.5, max_post_processing_steps=1, merge_thresh=0.3, nms_thresh=0.5):
        """Detect persons in image with sliding window"""
        if len(image.shape) == 3:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                
        # Resize large images
        max_dim = 1000
        scale_factor = 1.0
        if max(image.shape) > max_dim:
            scale_factor = max_dim / max(image.shape)
            image = cv2.resize(image, None, fx=scale_factor, fy=scale_factor)
        
        detections = []  # Keep as list initially
        window_size = (64, 128)
        stride = 32
        scales = [0.5, 0.75, 1.0, 1.25]
    
        try:
            for scale in scales:
                scaled = cv2.resize(image, None, fx=scale, fy=scale)
                
                y_steps = range(0, scaled.shape[0] - window_size[1], stride)
                x_steps = range(0, scaled.shape[1] - window_size[0], stride)
                
                batch_size = 64
                windows = []
                coordinates = []
                
                for y in y_steps:
                    for x in x_steps:
                        windows.append(scaled[y:y + window_size[1], x:x + window_size[0]])
                        coordinates.append((x, y))
                        
                        if len(windows) >= batch_size:
                            batch_features = [self.extract_hog_features(win, target_size=window_size) for win in windows]
                            batch_features = [f for f in batch_features if f is not None]
                            
                            if batch_features:
                                # Use decision_function score directly since StandardScaler is in pipeline
                                confidences = self.clf.decision_function(batch_features)
                                
                                # Add detections above threshold
                                for (x, y), confidence in zip(coordinates, confidences):
                                    if confidence > conf_thresh:
                                        x1 = int(x / scale / scale_factor)
                                        y1 = int(y / scale / scale_factor)
                                        x2 = int((x + window_size[0]) / scale / scale_factor)
                                        y2 = int((y + window_size[1]) / scale / scale_factor)
                                        detections.append([x1, y1, x2, y2, float(confidence)])
                            
                            windows = []
                            coordinates = []
                
                # Process remaining windows
                if windows:
                    batch_features = [self.extract_hog_features(win, target_size=window_size) for win in windows]
                    batch_features = [f for f in batch_features if f is not None]
                    
                    if batch_features:
                        confidences = self.clf.decision_function(batch_features)
                        
                        for (x, y), confidence in zip(coordinates, confidences):
                            if confidence > conf_thresh:
                                x1 = int(x / scale / scale_factor)
                                y1 = int(y / scale / scale_factor)
                                x2 = int((x + window_size[0]) / scale / scale_factor)
                                y2 = int((y + window_size[1]) / scale / scale_factor)
                                detections.append([x1, y1, x2, y2, float(confidence)])
            
            # Post-process detections
            if len(detections) > 0:
                detections = np.array(detections)
                
                # Track number of boxes before and after post-processing
                prev_num_boxes = -1
                for step in range(max_post_processing_steps):
                 
                    
                    curr_num_boxes = len(detections) if detections is not None else 0
                    if curr_num_boxes == prev_num_boxes:
                        break
                    
                    prev_num_boxes = curr_num_boxes
                    
                    # First adjust aspect ratios and merge/NMS
                    detections = self.post_process_detection(detections, merge_thresh, nms_thresh) 
                    if detections is None or len(detections) == 0:
                        break
                        
                return detections
            
            return []
            
        except Exception as e:
            print(f"Error in detect_persons: {e}")
            return []


    def evaluate(self, val_file_path, val_images_path, annotations_path, 
                visualization_path=None, conf_thresh=0.5, max_post_processing_steps=3,
                merge_thresh=0.3, nms_thresh=0.5, max_samples=None):
        """
        Evaluate model on validation set
        
        Args:
            val_file_path: Path to validation image list
            val_images_path: Path to validation images
            annotations_path: Path to annotation files
            visualization_path: Path to save visualizations
            conf_thresh: Confidence threshold for detections (default: 0.5)
            max_post_processing_steps: Number of post-processing iterations (default: 3)
            merge_thresh: Threshold for merging overlapping boxes (default: 0.3) 
            nms_thresh: Threshold for NMS (default: 0.5)
            max_samples: Maximum number of images to evaluate (default: None, evaluates all)
        """
        print("\nEvaluation Parameters:")
        print(f"- Confidence threshold: {conf_thresh}")
        print(f"- Max post-processing steps: {max_post_processing_steps}")
        print(f"- Merge threshold: {merge_thresh}")
        print(f"- NMS threshold: {nms_thresh}")
        print(f"- Max samples: {max_samples if max_samples else 'All'}")
        print(f"- HOG Parameters: {self.hog_params}")
        print("\nStarting evaluation...")
        
        # Load validation images
        val_images = self.load_image_list(val_file_path)
        if max_samples:
            val_images = val_images[:max_samples]
        print(f"Evaluating on {len(val_images)} images...")
        
        # Create visualization directory if needed
        if visualization_path and not os.path.exists(visualization_path):
            os.makedirs(visualization_path)
        
        # Initialize progress tracking
        processed_images = 0
        skipped_images = 0
        
        # Reset predictions and ground truths
        self.predictions = []
        self.ground_truths = []
        
        # Process each image
        for i, img_file in enumerate(tqdm(val_images)):
            try:
                img_path = os.path.join(val_images_path, img_file)
                xml_path = os.path.join(annotations_path, 
                                      os.path.splitext(img_file)[0] + '.xml')
                
                if not os.path.exists(img_path) or not os.path.exists(xml_path):
                    skipped_images += 1
                    continue
                    
                # Load and process image
                image = cv2.imread(img_path)
                if image is None:
                    skipped_images += 1
                    continue
                
                # Get ground truth boxes
                gt_boxes = self.get_person_boxes(xml_path)
                
                # Detect persons
                detections = self.detect_persons(
                    image, 
                    conf_thresh=conf_thresh,
                    max_post_processing_steps=max_post_processing_steps,
                    merge_thresh=merge_thresh,
                    nms_thresh=nms_thresh
                )
                
                # Convert detections to list if numpy array
                if isinstance(detections, np.ndarray):
                    detections = detections.tolist()
                
                self.ground_truths.append(gt_boxes)
                self.predictions.append(detections if detections is not None else [])
                

                if visualization_path and i < 5:
                    viz_path = os.path.join(visualization_path, f"eval_{i}.jpg")
                    self.visualize_results(image, gt_boxes, detections, viz_path)
                    
                    # Add HOG visualizations
                    if len(detections) > 0:
                        hog_viz_path = os.path.join(visualization_path, f"eval_{i}_hog.jpg")
                        self.visualize_detection_with_hog(image, detections[0], hog_viz_path)
                        
                    windows_viz_path = os.path.join(visualization_path, f"eval_{i}_windows.jpg")
                    self.visualize_sliding_windows_with_hog(image, save_path=windows_viz_path)
                                    
                
                processed_images += 1
                
      
                    
            except Exception as e:
                print(f"\nError processing image {img_file}: {e}")
                skipped_images += 1
                continue
        
        print(f"\nEvaluation complete:")
        print(f"- Total processed: {processed_images}")
        print(f"- Total skipped: {skipped_images}")
        
        # Set evaluation metrics object attributes
        self.eval.predictions = self.predictions
        self.eval.ground_truths = self.ground_truths
        self.eval.iou_thresholds = self.iou_thresholds
        
        # Calculate metrics
        metrics = self.eval.calculate_metrics()
        self.eval.print_metrics(metrics)
        
        return metrics
    
    def visualize_detection_with_hog(self, image, detection, save_path=None):
        """Visualize HOG features for a detected person"""
        if len(detection) < 4:
            print("Invalid detection format")
            return
            
        # Extract person window
        x1, y1, x2, y2 = map(int, detection[:4])
        person_window = image[y1:y2, x1:x2]
        
        if person_window.size == 0:
            print("Invalid window size")
            return
        
        # Create visualization with 3 subplots
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 4))
        
        # Original detection
        ax1.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, 
                               linewidth=2, edgecolor='r', facecolor='none')
        ax1.add_patch(rect)
        ax1.set_title('Detection')
        ax1.axis('off')
        
        # Extracted window
        ax2.imshow(cv2.cvtColor(person_window, cv2.COLOR_BGR2RGB))
        ax2.set_title('Person Window')
        ax2.axis('off')
        
        # HOG visualization
        _, hog_image = self.visualize_hog(person_window)
        ax3.imshow(hog_image, cmap='gray')
        ax3.set_title('HOG Features')
        ax3.axis('off')
        
        plt.tight_layout()
        
        if save_path:
            # Ensure the directory exists
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show()
    
    def visualize_sliding_windows_with_hog(self, image, window_size=(64, 128), 
                                         stride=32, num_windows=3, save_path=None):
        """Visualize HOG features for random sliding windows"""
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image
            
        height, width = gray.shape
        windows = []
        
        # Generate random window positions while ensuring windows fit within image
        for _ in range(num_windows):
            max_x = max(0, width - window_size[0] - 1)
            max_y = max(0, height - window_size[1] - 1)
            if max_x > 0 and max_y > 0:
                x = np.random.randint(0, max_x)
                y = np.random.randint(0, max_y)
                windows.append((x, y))
        
        if not windows:
            print("Image too small for selected window size")
            return
            
        # Create visualization grid
        fig, axes = plt.subplots(len(windows), 3, figsize=(12, 4*len(windows)))
        if len(windows) == 1:
            axes = [axes]  # Make it indexable for single window case
        
        for i, (x, y) in enumerate(windows):
            try:
                # Extract window
                window = gray[y:y + window_size[1], x:x + window_size[0]]
                
                # Original window
                axes[i][0].imshow(window, cmap='gray')
                axes[i][0].set_title(f'Window {i+1}')
                axes[i][0].axis('off')
                
                # Resized window
                resized = cv2.resize(window, window_size)
                axes[i][1].imshow(resized, cmap='gray')
                axes[i][1].set_title('Resized')
                axes[i][1].axis('off')
                
                # HOG visualization
                _, hog_image = self.visualize_hog(window)
                axes[i][2].imshow(hog_image, cmap='gray')
                axes[i][2].set_title('HOG Features')
                axes[i][2].axis('off')
                
            except Exception as e:
                print(f"Error processing window {i}: {e}")
                continue
        
        plt.tight_layout()
        
        if save_path:
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            plt.savefig(save_path)
            plt.close()
        else:
            plt.show()

    def visualize_hog(self, image, window_size=(64, 128), save_path=None):
        """Visualize HOG features for a given image window"""
        if image is None or image.size == 0:
            print("Invalid image")
            return None, None
            
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        else:
            gray = image
            
        # Resize to window size
        try:
            resized = cv2.resize(gray, window_size)
            
            # Calculate HOG features with visualization
            features, hog_image = hog(
                resized,
                orientations=self.hog_params['orientations'],
                pixels_per_cell=self.hog_params['pixels_per_cell'],
                cells_per_block=self.hog_params['cells_per_block'],
                block_norm=self.hog_params['block_norm'],
                visualize=True
            )
            
            if save_path:
                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
                
                # Original image
                ax1.imshow(resized, cmap='gray')
                ax1.set_title('Original Window')
                ax1.axis('off')
                
                # HOG visualization
                ax2.imshow(hog_image, cmap='gray')
                ax2.set_title('HOG Features')
                ax2.axis('off')
                
                plt.tight_layout()
                plt.savefig(save_path)
                plt.close()
                
            return features, hog_image
            
        except Exception as e:
            print(f"Error in visualize_hog: {e}")
            return None, None

    
    def post_process_detection(self, detections, merge_thresh=0.3, nms_thresh=0.5):
        """Enhanced iterative post-processing with progressive merging"""
        if len(detections) == 0:
            return detections
                
        detections = np.array(detections)
        
        def create_density_clusters(boxes, radius=150):  # Increased radius
            """Group boxes based on density-based clustering with larger radius"""
            centers = []
            for box in boxes:
                centers.append([
                    (box[0] + box[2])/2,  # center x
                    (box[1] + box[3])/2   # center y
                ])
            centers = np.array(centers)
            
            clusters = []
            used = set()
            
            # Sort boxes by area to prioritize larger boxes
            areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
            order = np.argsort(-areas)
            
            for i in order:
                if i in used:
                    continue
                    
                cluster = []
                points_to_check = [i]
                
                while points_to_check:
                    current = points_to_check.pop(0)
                    if current in used:
                        continue
                        
                    used.add(current)
                    cluster.append(current)
                    
                    # Get current box dimensions
                    curr_box = boxes[current]
                    curr_width = curr_box[2] - curr_box[0]
                    curr_height = curr_box[3] - curr_box[1]
                    
                    # Adaptive radius based on box size
                    adaptive_radius = max(radius, max(curr_width, curr_height) * 0.5)
                    
                    # Find neighbors
                    for j in range(len(boxes)):
                        if j in used:
                            continue
                            
                        # Calculate center distance
                        dist = np.sqrt(np.sum((centers[current] - centers[j])**2))
                        
                        # Check if boxes overlap or are close
                        if dist < adaptive_radius:
                            points_to_check.append(j)
                        else:
                            # Also check if boxes overlap significantly
                            iou = self.eval.calculate_iou(boxes[current], boxes[j])
                            if iou > merge_thresh * 0.5:  # More lenient IOU threshold
                                points_to_check.append(j)
                
                if cluster:
                    clusters.append(cluster)
            
            return clusters
    
        def merge_cluster(boxes):
            """Merge boxes with enhanced size preservation and coordinate clamping"""
            if len(boxes) == 0:
                return None
                
            # Get box centers and confidences
            centers = np.array([[
                (box[0] + box[2])/2,
                (box[1] + box[3])/2
            ] for box in boxes])
            
            # Calculate density-based weights
            densities = np.zeros(len(centers))
            for i, center in enumerate(centers):
                distances = np.sqrt(np.sum((centers - center)**2, axis=1))
                densities[i] = np.sum(np.exp(-distances/100))
            
            weights = densities * boxes[:, 4]
            weights = weights / np.sum(weights)
            
            # Get weighted center
            center_x = np.average((boxes[:, 0] + boxes[:, 2])/2, weights=weights)
            center_y = np.average((boxes[:, 1] + boxes[:, 3])/2, weights=weights)
            
            # Calculate dimensions with clamped margins
            margin = 0.1
            width = np.max(boxes[:, 2]) - np.min(boxes[:, 0])
            height = np.max(boxes[:, 3]) - np.min(boxes[:, 1])
            
            margin_x = margin * width
            margin_y = margin * height
            
            x1 = max(0, np.min(boxes[:, 0]) - margin_x)
            y1 = max(0, np.min(boxes[:, 1]) - margin_y)
            x2 = np.max(boxes[:, 2]) + margin_x
            y2 = np.max(boxes[:, 3]) + margin_y
            
            # Adjust aspect ratio if needed
            width = x2 - x1
            height = y2 - y1
            aspect_ratio = width / height
            target_ratio = 0.41
            
            if aspect_ratio < target_ratio * 0.5:
                width = height * target_ratio * 0.6
                x1 = max(0, center_x - width/2)
                x2 = center_x + width/2
            elif aspect_ratio > target_ratio * 2.0:
                height = width / (target_ratio * 1.5)
                y1 = max(0, center_y - height/2)
                y2 = center_y + height/2
            
            confidence = np.max(boxes[:, 4])
            
            return np.array([x1, y1, x2, y2, confidence])
                 
    
        # Iterative merging process
        prev_num_boxes = len(detections) + 1
        
        while len(detections) < prev_num_boxes:
            prev_num_boxes = len(detections)
            
            # Create and merge clusters
            clusters = create_density_clusters(detections)
            final_boxes = []
            
            for cluster_indices in clusters:
                cluster_boxes = detections[cluster_indices]
                merged_box = merge_cluster(cluster_boxes)
                if merged_box is not None:
                    final_boxes.append(merged_box)
            
            if not final_boxes:
                break
                
            detections = np.array(final_boxes)
            
            # Apply NMS
            keep = self.non_max_suppression(detections, nms_thresh)
            detections = detections[keep]
            
            # Break if no more merging possible
            if len(detections) == prev_num_boxes:
                break
        
        return detections

# Implementation

In [12]:
evaluator = HOGPersonDetectorEval("model.pkl")


metrics = evaluator.evaluate(
    val_file_path="dataset/val_files.txt",
    val_images_path="dataset/images",
    annotations_path="dataset/voc_labels",
    visualization_path="viz",
    conf_thresh=1000.0,
    max_post_processing_steps=2,
    merge_thresh=0.3,
    nms_thresh=0.5,
    max_samples=None
)



Evaluation Parameters:
- Confidence threshold: 1000.0
- Max post-processing steps: 2
- Merge threshold: 0.3
- NMS threshold: 0.5
- Max samples: All
- HOG Parameters: {'orientations': 9, 'pixels_per_cell': (8, 8), 'cells_per_block': (2, 2), 'block_norm': 'L2'}

Starting evaluation...
Evaluating on 1620 images...


100%|██████████| 1620/1620 [2:27:30<00:00,  5.46s/it] 


Evaluation complete:
- Total processed: 1620
- Total skipped: 0

Evaluation Results:
Precision: 3.4%
Recall: 1.9%
mAP@50: 0.4%
mAP@50-95: 0.2%



