<a href="https://colab.research.google.com/github/ayagup/stablediffusion/blob/main/image_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

maygup123_dataset_image_segmentation_path = kagglehub.dataset_download('maygup123/dataset-image-segmentation')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
"""
Simple Image Segmentation Pipeline
Segment images using HuggingFace Transformers models
"""

import torch
from transformers import AutoImageProcessor, AutoModelForSemanticSegmentation
from PIL import Image
import numpy as np
import os
import gc
import time
from typing import Optional
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# Suppress warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'


def print_header():
    """Print a nice header"""
    print("\n" + "="*70)
    print("üé® Image Segmentation Pipeline")
    print("="*70)


def clear_gpu_memory():
    """Clear GPU memory cache"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()
        print("üßπ GPU memory cache cleared")


def load_image(image_path: str, max_size: Optional[int] = None) -> Image.Image:
    """Load and optionally resize image"""
    print(f"Loading image: {image_path}")

    if image_path.startswith('http://') or image_path.startswith('https://'):
        import requests
        from io import BytesIO
        response = requests.get(image_path)
        image = Image.open(BytesIO(response.content)).convert('RGB')
    else:
        image = Image.open(image_path).convert('RGB')

    original_size = image.size
    print(f"Original size: {original_size[0]}x{original_size[1]}")

    # Resize if needed
    if max_size and max(image.size) > max_size:
        ratio = max_size / max(image.size)
        new_size = tuple(int(dim * ratio) for dim in image.size)
        image = image.resize(new_size, Image.LANCZOS)
        print(f"Resized to: {new_size[0]}x{new_size[1]}")

    return image


def create_segmentation_visualization(
    image: Image.Image,
    segmentation_map: np.ndarray,
    id2label: dict,
    output_path: str,
    alpha: float = 0.6
):
    """Create visualization of segmentation results"""

    # Create figure with subplots
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))

    # Original image
    axes[0].imshow(image)
    axes[0].set_title('Original Image', fontsize=14, fontweight='bold')
    axes[0].axis('off')

    # Segmentation map
    axes[1].imshow(segmentation_map, cmap='tab20')
    axes[1].set_title('Segmentation Map', fontsize=14, fontweight='bold')
    axes[1].axis('off')

    # Overlay
    axes[2].imshow(image)
    axes[2].imshow(segmentation_map, cmap='tab20', alpha=alpha)
    axes[2].set_title('Overlay', fontsize=14, fontweight='bold')
    axes[2].axis('off')

    # Create legend for unique classes
    unique_labels = np.unique(segmentation_map)
    colors = plt.cm.tab20(np.linspace(0, 1, 20))

    patches = []
    for label_id in unique_labels:
        if label_id in id2label:
            label_name = id2label[label_id]
            color = colors[label_id % 20]
            patches.append(mpatches.Patch(color=color, label=label_name))

    # Add legend
    if patches:
        fig.legend(handles=patches, loc='center', bbox_to_anchor=(0.5, -0.05),
                  ncol=min(len(patches), 5), fontsize=10)

    plt.tight_layout()
    plt.savefig(output_path, bbox_inches='tight', dpi=150)
    plt.close()

    print(f"‚úì Segmentation visualization saved: {output_path}")


def segment_image(
    image_path: str,
    model_name: str = "nvidia/segformer-b0-finetuned-ade-512-512",
    output_path: str = "segmented_image.png",
    max_image_size: Optional[int] = 1024,
    overlay_alpha: float = 0.6,
) -> tuple:
    """
    Segment image using semantic segmentation model

    Args:
        image_path: Path to input image or URL
        model_name: HuggingFace model identifier
        output_path: Output path for visualization
        max_image_size: Maximum image dimension
        overlay_alpha: Transparency for overlay (0-1)

    Returns:
        Tuple of (segmentation_map, class_counts, id2label)
    """

    print_header()
    clear_gpu_memory()

    # Device setup
    device = "cuda" if torch.cuda.is_available() else "cpu"

    if device == "cuda":
        gpu_name = torch.cuda.get_device_name(0)
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
        print(f"\nüñ•Ô∏è  Using GPU: {gpu_name}")
        print(f"   Memory: {gpu_memory:.1f} GB\n")
    else:
        print("\nüíª Using CPU\n")

    # Load image
    image = load_image(image_path, max_image_size)

    # Load model
    print(f"\nLoading model: {model_name}")
    start_time = time.time()

    processor = AutoImageProcessor.from_pretrained(model_name)
    model = AutoModelForSemanticSegmentation.from_pretrained(model_name)
    model = model.to(device)
    model.eval()

    load_time = time.time() - start_time
    print(f"‚úì Model loaded in {load_time:.2f}s")

    # Get label mapping
    id2label = model.config.id2label
    num_classes = len(id2label)
    print(f"‚úì Model supports {num_classes} classes")

    # Print segmentation parameters
    print("\n" + "="*70)
    print("üé¨ Segmentation Parameters")
    print("="*70)
    print(f"Input image: {image_path}")
    print(f"Image size: {image.size[0]}x{image.size[1]}")
    print(f"Model: {model_name}")
    print(f"Device: {device}")
    print(f"Number of classes: {num_classes}")
    print("="*70)

    # Prepare inputs
    print("\nüé® Segmenting image...")
    inputs = processor(images=image, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Inference
    inference_start = time.time()
    with torch.no_grad():
        outputs = model(**inputs)

    inference_time = time.time() - inference_start
    print(f"‚úì Inference completed in {inference_time:.3f}s")

    # Post-process
    print("\nüìä Processing segmentation map...")

    # Get logits and convert to segmentation map
    logits = outputs.logits

    # Resize to original image size
    upsampled_logits = torch.nn.functional.interpolate(
        logits,
        size=image.size[::-1],  # (height, width)
        mode='bilinear',
        align_corners=False
    )

    # Get predictions
    segmentation_map = upsampled_logits.argmax(dim=1)[0].cpu().numpy()

    # Count classes
    unique_labels, counts = np.unique(segmentation_map, return_counts=True)
    class_counts = {}
    total_pixels = segmentation_map.size

    for label_id, count in zip(unique_labels, counts):
        if label_id in id2label:
            class_name = id2label[label_id]
            percentage = (count / total_pixels) * 100
            class_counts[class_name] = {
                'pixels': int(count),
                'percentage': percentage
            }

    # Print results
    print("\n" + "="*70)
    print("üèÜ Segmentation Results")
    print("="*70)
    print(f"\nTotal classes detected: {len(unique_labels)}")
    print(f"Image resolution: {image.size[0]}x{image.size[1]} ({total_pixels:,} pixels)")
    print("\nClass breakdown:")

    # Sort by percentage
    sorted_classes = sorted(class_counts.items(), key=lambda x: x[1]['percentage'], reverse=True)

    for class_name, stats in sorted_classes:
        print(f"  {class_name:20s}: {stats['percentage']:5.2f}% ({stats['pixels']:,} pixels)")

    print("\n" + "="*70)

    # Create visualization
    print("\nüé® Creating visualization...")
    create_segmentation_visualization(
        image, segmentation_map, id2label, output_path, overlay_alpha
    )

    # Summary
    total_time = time.time() - start_time + load_time
    print("\n" + "="*70)
    print("‚úÖ Segmentation Complete!")
    print("="*70)
    print(f"Total time: {total_time:.2f}s")
    print(f"  - Model loading: {load_time:.2f}s")
    print(f"  - Inference: {inference_time:.3f}s")
    print(f"  - Post-processing: {total_time - load_time - inference_time:.2f}s")
    print(f"\nOutput saved: {output_path}")
    print(f"Classes found: {len(unique_labels)}")
    print("="*70 + "\n")

    clear_gpu_memory()

    return segmentation_map, class_counts, id2label




2025-10-18 19:22:09.047009: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760815329.263538      37 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760815329.341480      37 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
# if __name__ == "__main__":
#     import argparse

#     parser = argparse.ArgumentParser(
#         description='Simple Image Segmentation',
#         formatter_class=argparse.RawDescriptionHelpFormatter,
#         epilog="""
# Examples:
#   # Basic segmentation with SegFormer
#   python simple_image_segmentation.py --image photo.jpg

#   # Use different model
#   python simple_image_segmentation.py --image photo.jpg --model nvidia/segformer-b2-finetuned-ade-512-512

#   # Segment from URL
#   python simple_image_segmentation.py --image https://example.com/photo.jpg

#   # Adjust overlay transparency
#   python simple_image_segmentation.py --image photo.jpg --alpha 0.4
#         """
#     )

#     parser.add_argument('--image', type=str, required=True,
#                         help='Path to input image or URL')
#     parser.add_argument('--model', type=str,
#                         default='nvidia/segformer-b0-finetuned-ade-512-512',
#                         help='HuggingFace model name')
#     parser.add_argument('--output', type=str, default='segmented_image.png',
#                         help='Output path for visualization')
#     parser.add_argument('--max-size', type=int, default=1024,
#                         help='Maximum image dimension')
#     parser.add_argument('--alpha', type=float, default=0.6,
#                         help='Overlay transparency (0-1)')

#     args = parser.parse_args()

try:
    segment_image(
        image_path='/kaggle/input/dataset-image-segmentation/detected_objects (1).jpg',
        model_name='nvidia/segformer-b0-finetuned-ade-512-512',
        output_path='/kaggle/working/segmented_image.png',
        max_image_size=1024,
        overlay_alpha=0.6,
    )
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    import traceback
    traceback.print_exc()



üé® Image Segmentation Pipeline

üíª Using CPU

Loading image: /kaggle/input/dataset-image-segmentation/detected_objects (1).jpg
Original size: 5656x4244
Resized to: 1024x768

Loading model: nvidia/segformer-b0-finetuned-ade-512-512


preprocessor_config.json:   0%|          | 0.00/271 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/15.0M [00:00<?, ?B/s]

‚úì Model loaded in 2.21s
‚úì Model supports 150 classes

üé¨ Segmentation Parameters
Input image: /kaggle/input/dataset-image-segmentation/detected_objects (1).jpg
Image size: 1024x768
Model: nvidia/segformer-b0-finetuned-ade-512-512
Device: cpu
Number of classes: 150

üé® Segmenting image...
‚úì Inference completed in 0.724s

üìä Processing segmentation map...

üèÜ Segmentation Results

Total classes detected: 11
Image resolution: 1024x768 (786,432 pixels)

Class breakdown:
  wall                : 82.62% (649,750 pixels)
  chair               :  8.64% (67,918 pixels)
  painting            :  5.59% (43,931 pixels)
  basket              :  2.49% (19,614 pixels)
  plate               :  0.28% (2,203 pixels)
  lamp                :  0.13% (995 pixels)
  cushion             :  0.11% (838 pixels)
  floor               :  0.06% (478 pixels)
  table               :  0.05% (380 pixels)
  box                 :  0.02% (169 pixels)
  book                :  0.02% (156 pixels)


üé® Creating 