In [None]:
def merge_contour_group(contour_group: List) -> Tuple[int, int, int, int]:
    """
    Create a bounding rectangle that encompasses all contours in a group
    """
    if len(contour_group) == 1:
        return cv2.boundingRect(contour_group[0])
    
    # Get bounding rectangles for all contours
    rects = [cv2.boundingRect(contour) for contour in contour_group]
    
    # Find the encompassing rectangle
    min_x = min(rect[0] for rect in rects)
    min_y = min(rect[1] for rect in rects)
    max_x = max(rect[0] + rect[2] for rect in rects)
    max_y = max(rect[1] + rect[3] for rect in rects)
    
    return min_x, min_y, max_x - min_x, max_y - min_y# Vision Board Box Extractor V4 - Enhanced Grouping Version
# This code extracts meaningful objects from vision board images keeping text together

import cv2
import numpy as np
import uuid
from typing import List, Dict, Any, Tuple
import base64
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
import os
from sklearn.cluster import DBSCAN

# Helper Functions
def base64_to_opencv_image(base64_string: str) -> np.ndarray:
    """Convert base64 string to OpenCV image"""
    try:
        if "," in base64_string:
            base64_string = base64_string.split(",")[1]
        
        image_data = base64.b64decode(base64_string)
        pil_image = Image.open(BytesIO(image_data))
        
        if pil_image.mode == "RGBA":
            pil_image = pil_image.convert("RGB")
        
        opencv_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
        return opencv_image
    except Exception as e:
        raise ValueError(f"Invalid base64 image data: {str(e)}")

def image_file_to_base64(image_path: str) -> str:
    """Convert image file to base64 string for testing"""
    with open(image_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
        return f"data:image/jpeg;base64,{encoded_string}"

def display_base64_image(base64_string: str, title: str = "Image"):
    """Display base64 encoded image in notebook with smaller size"""
    if "," in base64_string:
        base64_string = base64_string.split(",")[1]
    
    image_data = base64.b64decode(base64_string)
    image = Image.open(BytesIO(image_data))
    
    plt.figure(figsize=(5, 3))
    plt.imshow(image)
    plt.title(title)
    plt.axis('off')
    plt.show()

def get_dominant_background_color(image: np.ndarray, sample_size: int = 1000) -> np.ndarray:
    """
    Estimate the dominant background color by sampling edge pixels
    """
    h, w = image.shape[:2]
    
    # Sample pixels from edges (likely to be background)
    edge_pixels = []
    
    # Top and bottom edges
    edge_pixels.extend(image[0:10, :].reshape(-1, 3))
    edge_pixels.extend(image[h-10:h, :].reshape(-1, 3))
    
    # Left and right edges
    edge_pixels.extend(image[:, 0:10].reshape(-1, 3))
    edge_pixels.extend(image[:, w-10:w].reshape(-1, 3))
    
    edge_pixels = np.array(edge_pixels)
    
    # Use k-means to find dominant color
    from sklearn.cluster import KMeans
    
    if len(edge_pixels) > sample_size:
        indices = np.random.choice(len(edge_pixels), sample_size, replace=False)
        edge_pixels = edge_pixels[indices]
    
    if len(edge_pixels) > 0:
        kmeans = KMeans(n_clusters=1, random_state=42, n_init=10)
        kmeans.fit(edge_pixels)
        return kmeans.cluster_centers_[0].astype(np.uint8)
    else:
        return np.array([200, 200, 200], dtype=np.uint8)  # Default gray

def create_object_mask(image: np.ndarray, debug: bool = False) -> np.ndarray:
    """
    Create a mask that identifies foreground objects, focusing on edge-based detection
    """
    h, w = image.shape[:2]
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Method 1: Enhanced edge detection (this was working well)
    # Use bilateral filter to reduce noise while preserving edges
    filtered = cv2.bilateralFilter(gray, 9, 75, 75)
    
    # Multi-scale edge detection
    edges1 = cv2.Canny(filtered, 30, 80)
    edges2 = cv2.Canny(filtered, 50, 120)
    edges_combined = cv2.bitwise_or(edges1, edges2)
    
    # Dilate edges to create solid regions
    kernel_dilate = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4))
    edges_dilated = cv2.dilate(edges_combined, kernel_dilate, iterations=3)
    
    # Fill enclosed regions using flood fill approach
    # Create a copy for flood filling
    h, w = edges_dilated.shape
    mask_filled = edges_dilated.copy()
    
    # Close gaps in the edges first
    kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
    mask_filled = cv2.morphologyEx(mask_filled, cv2.MORPH_CLOSE, kernel_close, iterations=2)
    
    # Method 2: Adaptive thresholding to catch objects with uniform colors
    adaptive_thresh = cv2.adaptiveThreshold(
        filtered, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, 8
    )
    
    # Invert adaptive threshold to get objects as white
    adaptive_thresh_inv = cv2.bitwise_not(adaptive_thresh)
    
    # Remove small noise from adaptive threshold
    kernel_clean = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    adaptive_thresh_clean = cv2.morphologyEx(adaptive_thresh_inv, cv2.MORPH_OPEN, kernel_clean, iterations=1)
    
    # Method 3: Combine edge-based and adaptive threshold results
    combined_mask = cv2.bitwise_or(mask_filled, adaptive_thresh_clean)
    
    # Final cleanup - remove very small objects and fill small holes
    kernel_final = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
    combined_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_CLOSE, kernel_final, iterations=2)
    combined_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel_final, iterations=1)
    
    if debug:
        plt.figure(figsize=(20, 5))
        plt.subplot(1, 5, 1)
        plt.imshow(edges_combined, cmap='gray')
        plt.title('Raw Edges')
        plt.axis('off')
        
        plt.subplot(1, 5, 2)
        plt.imshow(edges_dilated, cmap='gray')
        plt.title('Dilated Edges')
        plt.axis('off')
        
        plt.subplot(1, 5, 3)
        plt.imshow(mask_filled, cmap='gray')
        plt.title('Edge Mask with Closing')
        plt.axis('off')
        
        plt.subplot(1, 5, 4)
        plt.imshow(adaptive_thresh_clean, cmap='gray')
        plt.title('Adaptive Threshold')
        plt.axis('off')
        
        plt.subplot(1, 5, 5)
        plt.imshow(combined_mask, cmap='gray')
        plt.title('Final Combined Mask')
        plt.axis('off')
        plt.show()
    
    return combined_mask

def group_nearby_contours(contours: List, min_distance: int = 30, debug: bool = False) -> List[List]:
    """
    Group nearby contours that likely belong to the same text/object using DBSCAN clustering
    """
    if len(contours) == 0:
        return []
    
    # Extract centroids of all contours
    centroids = []
    for contour in contours:
        M = cv2.moments(contour)
        if M["m00"] != 0:
            cx = int(M["m10"] / M["m00"])
            cy = int(M["m01"] / M["m00"])
            centroids.append([cx, cy])
        else:
            # Fallback to bounding rect center
            x, y, w, h = cv2.boundingRect(contour)
            centroids.append([x + w//2, y + h//2])
    
    centroids = np.array(centroids)
    
    # Use DBSCAN to cluster nearby centroids
    # eps is the maximum distance between points in the same cluster
    clustering = DBSCAN(eps=min_distance, min_samples=1).fit(centroids)
    labels = clustering.labels_
    
    # Group contours by cluster
    groups = {}
    for i, label in enumerate(labels):
        if label not in groups:
            groups[label] = []
        groups[label].append(contours[i])
    
    if debug:
        print(f"Grouped {len(contours)} contours into {len(groups)} clusters")
        for label, group in groups.items():
            print(f"Cluster {label}: {len(group)} contours")
    
    return list(groups.values())

def filter_corner_fragments(contour_groups: List[List], debug: bool = False) -> List[List]:
    """
    Remove small corner fragments that are likely parts of larger objects
    """
    if len(contour_groups) <= 1:
        return contour_groups
    
    # Get bounding rectangles for all groups
    group_rects = []
    for i, group in enumerate(contour_groups):
        x, y, w, h = merge_contour_group(group)
        area = w * h
        group_rects.append((x, y, w, h, area, group, i))
    
    # Sort by area (largest first)
    group_rects.sort(key=lambda item: item[4], reverse=True)
    
    filtered_groups = []
    removed_fragments = []
    
    for i, (x, y, w, h, area, group, orig_idx) in enumerate(group_rects):
        is_fragment = False
        
        # Increased threshold to catch more potential fragments
        if area < 8000:  # Increased from 5000 to 8000
            
            # Compare with larger objects that were processed earlier
            for j in range(i):
                larger_x, larger_y, larger_w, larger_h, larger_area, _, larger_orig_idx = group_rects[j]
                
                # Skip if the larger object is not significantly larger
                if larger_area < area * 2:
                    continue
                
                # Check spatial relationship
                small_center_x = x + w // 2
                small_center_y = y + h // 2
                
                # Generous margin for detecting nearby fragments
                margin = 40
                expanded_left = larger_x - margin
                expanded_right = larger_x + larger_w + margin
                expanded_top = larger_y - margin
                expanded_bottom = larger_y + larger_h + margin
                
                # Check if small object is within the expanded area of larger object
                if (expanded_left <= small_center_x <= expanded_right and 
                    expanded_top <= small_center_y <= expanded_bottom):
                    
                    # Check distance to corners of the larger rectangle
                    corners = [
                        (larger_x, larger_y),  # top-left
                        (larger_x + larger_w, larger_y),  # top-right
                        (larger_x, larger_y + larger_h),  # bottom-left
                        (larger_x + larger_w, larger_y + larger_h)  # bottom-right
                    ]
                    
                    min_corner_distance = min(
                        ((small_center_x - corner_x) ** 2 + (small_center_y - corner_y) ** 2) ** 0.5
                        for corner_x, corner_y in corners
                    )
                    
                    # Check distance to edges
                    edge_distances = [
                        abs(small_center_x - larger_x),  # distance to left edge
                        abs(small_center_x - (larger_x + larger_w)),  # distance to right edge
                        abs(small_center_y - larger_y),  # distance to top edge
                        abs(small_center_y - (larger_y + larger_h))  # distance to bottom edge
                    ]
                    
                    min_edge_distance = min(edge_distances)
                    
                    # Fragment detection thresholds
                    if min_corner_distance < 50 or min_edge_distance < 25:
                        is_fragment = True
                        break
                    
                    # Additional check: if the small object overlaps significantly with larger object
                    overlap_x = max(0, min(x + w, larger_x + larger_w) - max(x, larger_x))
                    overlap_y = max(0, min(y + h, larger_y + larger_h) - max(y, larger_y))
                    overlap_area = overlap_x * overlap_y
                    overlap_ratio = overlap_area / area
                    
                    if overlap_ratio > 0.3:  # If 30% of small object overlaps with larger
                        is_fragment = True
                        break
        
        if not is_fragment:
            filtered_groups.append(group)
        else:
            removed_fragments.append((x, y, w, h, orig_idx))
    
    return filtered_groups
    """
    Create a bounding rectangle that encompasses all contours in a group
    """
    if len(contour_group) == 1:
        return cv2.boundingRect(contour_group[0])
    
    # Get bounding rectangles for all contours
    rects = [cv2.boundingRect(contour) for contour in contour_group]
    
    # Find the encompassing rectangle
    min_x = min(rect[0] for rect in rects)
    min_y = min(rect[1] for rect in rects)
    max_x = max(rect[0] + rect[2] for rect in rects)
    max_y = max(rect[1] + rect[3] for rect in rects)
    
    return min_x, min_y, max_x - min_x, max_y - min_y

def is_valid_object(x: int, y: int, w: int, h: int, image_shape: Tuple, debug: bool = False) -> bool:
    """
    Determine if a detected region represents a valid object to extract
    """
    area = w * h
    image_area = image_shape[0] * image_shape[1]
    
    # Size filters - more inclusive than before
    min_area = 300  # Reduced minimum area
    max_area = image_area * 0.7  # Maximum 70% of image
    
    if area < min_area:
        if debug:
            print(f"Rejected: too small ({area} < {min_area})")
        return False
    
    if area > max_area:
        if debug:
            print(f"Rejected: too large ({area} > {max_area})")
        return False
    
    # Minimum dimensions - reduced for better text capture
    min_width, min_height = 25, 25
    if w < min_width or h < min_height:
        if debug:
            print(f"Rejected: dimensions too small ({w}x{h})")
        return False
    
    # Aspect ratio - allow wider range for different text orientations
    aspect_ratio = max(w, h) / min(w, h)
    if aspect_ratio > 15:  # Only reject extremely thin lines
        if debug:
            print(f"Rejected: extreme aspect ratio ({aspect_ratio})")
        return False
    
    # Additional check: reject objects that are too close to image borders
    border_margin = 5
    if (x < border_margin or y < border_margin or 
        x + w > image_shape[1] - border_margin or 
        y + h > image_shape[0] - border_margin):
        if debug:
            print(f"Rejected: too close to border")
        return False
    
    return True

def extract_objects_from_vision_board_base64(image_base64: str, debug: bool = False) -> List[str]:
    """
    Extract meaningful objects (text groups, pictures) from a vision board image
    """
    image = base64_to_opencv_image(image_base64)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Create mask to identify foreground objects  
    mask = create_object_mask(image, debug=debug)
    
    # Find contours in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if debug:
        print(f"Found {len(contours)} initial contours")
    
    # Filter out very small contours early
    min_contour_area = 200  # Reduced threshold
    filtered_contours = [c for c in contours if cv2.contourArea(c) > min_contour_area]
    
    if debug:
        print(f"After size filtering: {len(filtered_contours)} contours")
    
    # Group nearby contours (this handles the character splitting issue)
    contour_groups = group_nearby_contours(filtered_contours, min_distance=30, debug=debug)
    
    # Filter out corner fragments that are likely parts of larger objects
    contour_groups = filter_corner_fragments(contour_groups, debug=debug)
    
    # Create bounding boxes for each group
    object_regions = []
    for group in contour_groups:
        x, y, w, h = merge_contour_group(group)
        
        if is_valid_object(x, y, w, h, image_rgb.shape[:2], debug=debug):
            object_regions.append((x, y, w, h))
    
    if debug:
        print(f"Final valid objects: {len(object_regions)}")
        
        # Visualize the detection process
        plt.figure(figsize=(20, 5))
        
        plt.subplot(1, 4, 1)
        plt.imshow(image_rgb)
        plt.title('Original Image')
        plt.axis('off')
        
        plt.subplot(1, 4, 2)
        plt.imshow(mask, cmap='gray')
        plt.title('Object Detection Mask')
        plt.axis('off')
        
        plt.subplot(1, 4, 3)
        # Show contours on original image
        contour_img = image_rgb.copy()
        cv2.drawContours(contour_img, filtered_contours, -1, (255, 0, 0), 2)
        plt.imshow(contour_img)
        plt.title('Detected Contours')
        plt.axis('off')
        
        plt.subplot(1, 4, 4)
        result_img = image_rgb.copy()
        for i, (x, y, w, h) in enumerate(object_regions):
            cv2.rectangle(result_img, (x, y), (x+w, y+h), (0, 255, 0), 3)
            cv2.putText(result_img, str(i+1), (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        plt.imshow(result_img)
        plt.title('Final Extracted Objects')
        plt.axis('off')
        plt.show()
    
    # Sort by vertical position (top to bottom, then left to right)
    object_regions = sorted(object_regions, key=lambda region: (region[1], region[0]))
    
    # Extract each object region
    object_base64_list = []
    
    for i, (x, y, w, h) in enumerate(object_regions):
        # Add padding around the detected object
        padding = 5
        x_padded = max(0, x - padding)
        y_padded = max(0, y - padding)
        w_padded = min(image_rgb.shape[1] - x_padded, w + 2 * padding)
        h_padded = min(image_rgb.shape[0] - y_padded, h + 2 * padding)
        
        # Extract the object
        obj = image_rgb[y_padded:y_padded+h_padded, x_padded:x_padded+w_padded]
        
        if debug:
            print(f"Extracting object {i+1}: position ({x_padded}, {y_padded}), size ({w_padded}, {h_padded})")
        
        # Convert to base64
        pil_image = Image.fromarray(obj)
        buffer = BytesIO()
        pil_image.save(buffer, format='JPEG')
        img_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
        object_base64_list.append(f"data:image/jpeg;base64,{img_base64}")
    
    return object_base64_list

def process_vision_board(image_base64: str, filename: str = "image.jpg", debug: bool = False) -> Dict[str, Any]:
    """
    Process vision board image and extract meaningful objects (keeping text together)
    """
    if not image_base64:
        raise ValueError("No image data provided")
    
    session_id = str(uuid.uuid4())
    
    try:
        object_base64_list = extract_objects_from_vision_board_base64(image_base64, debug=debug)
        
        results = []
        for i, obj_base64 in enumerate(object_base64_list):
            results.append({
                "id": i + 1,
                "image": obj_base64
            })
        
        return {
            "session_id": session_id,
            "filename": filename,
            "num_objects_found": len(object_base64_list),
            "objects": results
        }
        
    except Exception as e:
        raise Exception(f"Processing error: {str(e)}")

def save_objects_to_files(result_data, output_dir="extracted_objects"):
    """Save extracted objects as individual image files"""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    saved_files = []
    
    for obj in result_data['objects']:
        # Remove data URL prefix
        base64_string = obj['image'].split(",")[1] if "," in obj['image'] else obj['image']
        
        # Decode and save
        image_data = base64.b64decode(base64_string)
        filename = f"{output_dir}/object_{obj['id']}.jpg"
        
        with open(filename, "wb") as f:
            f.write(image_data)
        
        saved_files.append(filename)
    
    return saved_files

# Testing Section
if __name__ == "__main__":
    print("Vision Board Object Extractor V4 - Enhanced Grouping")
    print("=" * 55)
    
    # Update this path to your vision board image
    image_path = "/Users/angelo/downloads/Coding3/refined-extract-boxes/images/IMG_9859.jpeg"
    
    try:
        # Convert image to base64
        image_base64 = image_file_to_base64(image_path)
        
        # Display original image
        print("Original Vision Board:")
        display_base64_image(image_base64, "Original Vision Board")
        
        # Process the image with debug enabled
        result = process_vision_board(image_base64, "vision_board.jpg", debug=True)
        
        print(f"\nProcessing Results:")
        print(f"Session ID: {result['session_id']}")
        print(f"Number of objects found: {result['num_objects_found']}")
        
        # Display all extracted objects
        if result['num_objects_found'] > 0:
            print(f"\nDisplaying {result['num_objects_found']} extracted objects:")
            
            for obj in result['objects']:
                display_base64_image(obj['image'], f"Extracted Object {obj['id']}")
        else:
            print("No objects detected. Try adjusting the parameters.")
        
        # Optionally save objects to files
        # saved_files = save_objects_to_files(result)
        # print(f"\nSaved {len(saved_files)} objects to files")
        
    except FileNotFoundError:
        print(f"Error: Could not find image file at {image_path}")
        print("Please update the image_path variable with the correct path to your image.")
    except Exception as e:
        print(f"Error: {e}")

def test_functionality():
    """Test that all functions are properly defined"""
    functions = [
        'base64_to_opencv_image',
        'image_file_to_base64', 
        'display_base64_image',
        'get_dominant_background_color',
        'create_object_mask',
        'group_nearby_contours',
        'filter_corner_fragments',
        'merge_contour_group',
        'is_valid_object',
        'extract_objects_from_vision_board_base64',
        'process_vision_board',
        'save_objects_to_files'
    ]
    
    print("Function availability check:")
    for func_name in functions:
        if func_name in globals():
            print(f"✓ {func_name} - Available")
        else:
            print(f"✗ {func_name} - Missing")
    
    print("\nAll functions loaded successfully!")
    print("Ready to process vision board images with enhanced object grouping!")

# Run the test
test_functionality()