<a href="https://colab.research.google.com/github/edwardoughton/satellite-image-analysis/blob/main/04_01_ggs416_26_02_16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# üõ∞Ô∏è GGS416 Satellite Image Analysis Week 4 üõ∞Ô∏è

This week we will cover:

 * Classical image segmentation approaches


# Learning objectives

By the end of this week, students will be able to:

* Explain practical foundations of classical image segmentation methods.
* Implement histogram-based segmentation using Otsu and Multi-Otsu thresholding.
* Apply clustering-based segmentation using K-means in RGB feature space.
* Generate superpixels using SLIC and interpret compactness effects.
* Perform marker-controlled watershed segmentation.
* Apply graph-based segmentation using the Felzenszwalb algorithm.
* Compare segmentation outputs across heterogeneous landscapes (urban, agricultural, forest, coastal).
* Analyze the influence of algorithm parameters on segmentation results.
* Critically evaluate strengths and weaknesses of classical segmentation approaches for satellite imagery.



# Microsoft Copilot Pro

Please sign up:

https://docs.github.com/en/copilot/how-tos/manage-your-account/get-free-access-to-copilot-pro

# Visual Studio Code

Please install before next week:

https://code.visualstudio.com/

# AI advice

* Make sure you are using the newest models, where possible.
* Free apps will have more outdated models.
* Do use your educational email for free access to certain products.

In [None]:
# Example: Setup and location presets
!pip -q install pystac-client planetary-computer odc-stac rasterio requests

import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import rasterio
from rasterio.transform import from_origin
from rasterio.crs import CRS

import pystac_client
import planetary_computer
import odc.stac
from pystac.extensions.eo import EOExtension as eo

In [None]:
# Example: Location presets and search STAC
!pip -q install pystac-client planetary-computer odc-stac rasterio requests

STAC_API_URL = "https://planetarycomputer.microsoft.com/api/stac/v1"
COLLECTION = "landsat-c2-l2"

# 5 locations (min_lon, min_lat, max_lon, max_lat) in EPSG:4326
LOCATION_PRESETS = {
    # Ames, Iowa ‚Äî agriculture patterns
    "agriculture_iowa_ames": [
        -93.77, 41.88,
        -93.47, 42.18
    ],

    # Downtown Los Angeles ‚Äî dense urban core
    "urban_los_angeles_downtown": [
        -118.375, 33.90,
        -118.075, 34.20
    ],

    # New Orleans ‚Äî Mississippi River bends + wetlands
    "coastal_louisiana_new_orleans": [
        -90.25, 29.80,
        -89.95, 30.10
    ],

    # Phoenix ‚Äî desert‚Äìurban interface
    "desert_arizona_phoenix": [
        -112.25, 33.30,
        -111.95, 33.60
    ],

    # Mount Jefferson area ‚Äî Cascades terrain + forest
    "forest_oregon_mt_jefferson": [
        -121.95, 44.50,
        -121.65, 44.80
    ],
}

time_of_interest = "2024-05-01/2024-09-30"
cloud_limit = 10  # percent

DATA_DIR = Path("data_landsat")
DATA_DIR.mkdir(exist_ok=True)

catalog = pystac_client.Client.open(
    STAC_API_URL,
    modifier=planetary_computer.sign_inplace
)

print(catalog)

In [None]:
# Example: Cropping

def save_rgb_geotiff_from_odc(data, out_path):
    """
    Save RGB (red/green/blue) from an odc-stac xarray dataset to a GeoTIFF.
    This keeps georeferencing so rasterio cropping works.
    """
    # Build (bands, rows, cols) array for rasterio
    rgb = data[["red", "green", "blue"]].to_array().values  # shape (3, y, x)

    # Get georeferencing from odc-stac dataset
    transform = data.odc.geobox.transform
    crs = data.odc.geobox.crs

    profile = {
        "driver": "GTiff",
        "height": rgb.shape[1],
        "width": rgb.shape[2],
        "count": 3,
        "dtype": rgb.dtype,
        "crs": crs,
        "transform": transform,
    }

    with rasterio.open(out_path, "w", **profile) as dst:
        dst.write(rgb)

    return out_path


In [None]:
# Example: Multi-image downloading and cropping
downloaded_rgb_tifs = []

for location_name, bbox in LOCATION_PRESETS.items():
    print(f"\n--- {location_name} ---")

    search = catalog.search(
        collections=[COLLECTION],
        bbox=bbox,
        datetime=time_of_interest,
        query={"eo:cloud_cover": {"lt": cloud_limit}},
    )

    items = search.item_collection()
    if len(items) == 0:
        print("No scenes found.")
        continue

    # Lecture 2: choose lowest cloud cover
    selected_item = min(items, key=lambda item: eo.ext(item).cloud_cover)

    print(
        f"Chosen: {selected_item.id}  |  Date: {selected_item.datetime.date()}  "
        f"|  Cloud: {selected_item.properties['eo:cloud_cover']}%"
    )

    # Lecture 2: load bands with odc-stac (we load a slightly larger area = your bbox)
    bands = ["red", "green", "blue"]
    data = odc.stac.stac_load([selected_item], bands=bands, bbox=bbox).isel(time=0)

    out_tif = DATA_DIR / f"{location_name}_rgb.tif"
    save_rgb_geotiff_from_odc(data, out_tif)

    downloaded_rgb_tifs.append(out_tif)
    print("Saved:", out_tif)


In [None]:
# Example: Crop images (from Lecture 3)
from rasterio.windows import from_bounds
from rasterio.warp import transform_bounds

CROPPED_DIR = DATA_DIR / "cropped"
CROPPED_DIR.mkdir(exist_ok=True)

def crop_geotiff_to_bbox(in_path, out_path, bbox_wgs84):
    """
    Crop a GeoTIFF to a lon/lat bbox using the Lecture 3 approach.
    """
    with rasterio.open(in_path) as src:
        # 1) Convert bbox from EPSG:4326 to the raster CRS (UTM for Landsat)
        minlon, minlat, maxlon, maxlat = bbox_wgs84
        minx, miny, maxx, maxy = transform_bounds(
            "EPSG:4326", src.crs, minlon, minlat, maxlon, maxlat, densify_pts=21
        )

        # 2) Lecture 3: build a raster window from bounds
        win = from_bounds(minx, miny, maxx, maxy, src.transform)

        # 3) Read just that window
        data = src.read(window=win)

        # 4) Update metadata and write output (Lecture 3)
        prof = src.profile
        prof.update(
            height=data.shape[1],
            width=data.shape[2],
            transform=rasterio.windows.transform(win, src.transform),
        )

        with rasterio.open(out_path, "w", **prof) as dst:
            dst.write(data)

    return out_path

cropped_tifs = []

for tif_path in downloaded_rgb_tifs:
    location_name = tif_path.stem.replace("_rgb", "")
    bbox = LOCATION_PRESETS[location_name]

    out_path = CROPPED_DIR / f"{location_name}_rgb_cropped.tif"
    crop_geotiff_to_bbox(tif_path, out_path, bbox)

    cropped_tifs.append(out_path)
    print("Cropped:", out_path.name)


In [None]:
# Example: Plot images (from Lecture 1)
fig, axes = plt.subplots(2, 3, figsize=(16, 10))
axes = axes.flatten()

for i, ax in enumerate(axes):
    if i < len(cropped_tifs):
        path = cropped_tifs[i]
        label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

        with rasterio.open(path) as src:
            rgb = src.read([1, 2, 3]).astype("float32")
            rgb = rgb / (rgb.max() + 1e-6)          # normalize for display
            rgb = rgb.transpose(1, 2, 0)            # (rows, cols, bands)

        ax.imshow(rgb)
        ax.set_title(label, fontsize=14)
        ax.axis("off")

        # Panel letter (A, B, C...)
        ax.text(
            0.02, 0.98, chr(ord("A") + i),
            transform=ax.transAxes,
            va="top", ha="left",
            fontsize=14,
            bbox=dict(facecolor="white", alpha=0.7, edgecolor="none", pad=3)
        )
    else:
        ax.axis("off")

fig.suptitle("Landsat RGB ‚Äî Download + Crop", fontsize=18)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()


# Otsu thresholding

Segmentation approach:
- Splits a grayscale image into foreground and background regions.
- Automatically selects a threshold based on the image intensity histogram.

Otsu selects the threshold that splits the image into two substantial groups, maximizing the distance between their average intensities.  

Hence, we find the threshold that best separates foreground from background based on histogram structure.


Key parameters:
- No manual threshold required.
- Input quality depends on histogram separation.

Key papers:
- Otsu, N. (1979). *A Threshold Selection Method from Gray-Level Histograms*. IEEE Trans. SMC, 9(1), 62-66.
- Sezgin, M., & Sankur, B. (2004). *Survey over image thresholding techniques and quantitative performance evaluation*. J. Electronic Imaging, 13(1), 146-165.

In [None]:
# Example:
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from skimage.color import rgb2gray
from skimage.filters import threshold_otsu

image_files = cropped_tifs[:5]  # your cropped GeoTIFFs

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 12))
plt.subplots_adjust(hspace=0.25, wspace=0.05)

for row, path in enumerate(image_files):

    # --- Load GeoTIFF properly ---
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype("float32")  # (3, H, W)
        rgb = rgb.transpose(1, 2, 0)                 # (H, W, 3)

    # Normalize for display
    rgb = rgb / (np.nanmax(rgb) + 1e-6)

    # --- Otsu threshold ---
    gray = rgb2gray(rgb)
    t = threshold_otsu(gray)
    mask = gray > t

    # Dim background for visualization
    otsu_vis = rgb.copy()
    otsu_vis[~mask] *= 0.25

    # Label
    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # --- Plot original ---
    axL = axes[row, 0]
    axL.imshow(rgb)
    axL.set_title(f"{label} ‚Äî Original", fontsize=12)
    axL.axis("off")

    # --- Plot Otsu result ---
    axR = axes[row, 1]
    axR.imshow(otsu_vis)
    axR.set_title(f"{label} ‚Äî Otsu Threshold", fontsize=12)
    axR.axis("off")

fig.suptitle("Landsat RGB (Left) vs Otsu Segmentation (Right)", fontsize=16)
plt.show()


# Multi-Otsu thresholding (3 classes)

Segmentation approach:
- Extends Otsu to multiple classes.
- Useful for coarse land-cover partitioning (dark/mid/bright regions).



Key parameters:
- `classes=3` (can also try 4 or 5).
- Sensitive to histogram shape and contrast.

Key papers:
- Otsu, N. (1979). *A Threshold Selection Method from Gray-Level Histograms*. IEEE Trans. SMC, 9(1), 62-66.
- Liao, P.-S., Chen, T.-S., & Chung, P.-C. (2001). *A fast algorithm for multilevel thresholding*. JISE, 17(5), 713-727.



In [None]:
# Example:
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from skimage.color import rgb2gray
from skimage.filters import threshold_multiotsu

image_files = cropped_tifs[:5]  # your cropped GeoTIFFs

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 12))
plt.subplots_adjust(hspace=0.25, wspace=0.05)

for row, path in enumerate(image_files):

    # --- Load GeoTIFF properly ---
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype("float32")  # (3, H, W)
        rgb = rgb.transpose(1, 2, 0)                 # (H, W, 3)

    # Normalize for display
    rgb = rgb / (np.nanmax(rgb) + 1e-6)

    # --- Multi-Otsu threshold (3 classes) ---
    gray = rgb2gray(rgb)

    # thresholds will have length classes-1 = 2
    t1, t2 = threshold_multiotsu(gray, classes=3)

    # Convert grayscale into class labels: 0,1,2
    seg = np.digitize(gray, bins=[t1, t2])

    # --- Visualization: overlay the 3-class segmentation on the RGB ---
    # Start from a dimmed version of the original
    vis = (rgb * 0.35).copy()

    # Colorize each class (no need for fancy colormaps; explicit & teachable)
    # class 0 = dark pixels
    vis[seg == 0] = [0.10, 0.20, 0.80]  # blue
    # class 1 = mid pixels
    vis[seg == 1] = [0.20, 0.80, 0.20]  # green
    # class 2 = bright pixels
    vis[seg == 2] = [0.90, 0.20, 0.20]  # red

    # Label
    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # --- Plot original ---
    axL = axes[row, 0]
    axL.imshow(rgb)
    axL.set_title(f"{label} ‚Äî Original", fontsize=12)
    axL.axis("off")

    # --- Plot Multi-Otsu result ---
    axR = axes[row, 1]
    axR.imshow(vis)
    axR.set_title(f"{label} ‚Äî Multi-Otsu (3 classes)", fontsize=12)
    axR.axis("off")

    # Optional: show thresholds in the console
    print(f"{label}: t1={t1:.3f}, t2={t2:.3f}")

fig.suptitle("Landsat RGB (Left) vs Multi-Otsu 3-Class Segmentation (Right)", fontsize=16)
plt.show()


In [None]:
# Example:
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from skimage.color import rgb2gray
from skimage.filters import threshold_multiotsu

def show_new_orleans_multi_otsu(cropped_tifs):

    # Find the New Orleans file
    path = [p for p in cropped_tifs if "new_orleans" in p.stem.lower()][0]

    # Load GeoTIFF
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype("float32")
        rgb = rgb.transpose(1, 2, 0)

    # Normalize
    rgb = rgb / (np.nanmax(rgb) + 1e-6)

    # Multi-Otsu (3 classes)
    gray = rgb2gray(rgb)
    thresholds = threshold_multiotsu(gray, classes=3)
    seg = np.digitize(gray, bins=thresholds)

    # Plot full-size
    plt.figure(figsize=(10, 10))
    plt.imshow(seg, cmap="viridis")
    plt.title("New Orleans ‚Äî Multi-Otsu (3 Classes)", fontsize=16)
    plt.axis("off")
    plt.show()

    print(f"Thresholds: {thresholds}")

show_new_orleans_multi_otsu(cropped_tifs)


# K-means (RGB clustering)

Segmentation approach:
- Clusters pixels in color space.
- Each pixel is assigned to nearest color centroid.

So, K-means segmentation groups pixels based on similarity in color space.

For RGB images:
- Each pixel is treated as a 3D vector $
  x_i = [R_i, G_i, B_i]
  $
- Hence, pixels are clustered in this 3D space.
- Each pixel is assigned to the nearest cluster centroid.

Unlike Otsu (which uses intensity only), K-means operates in multivariate feature space.

Key parameters:
- `k`: number of clusters.
- Iterations and initialization affect stability.


Key papers:
- MacQueen, J. (1967). *Some Methods for classification and Analysis of Multivariate Observations*. Proc. 5th Berkeley Symposium.
- Lloyd, S. (1982). *Least squares quantization in PCM*. IEEE Trans. Information Theory, 28(2), 129-137.

In [None]:
# Example: K-means RGB segmentation
import numpy as np
import matplotlib.pyplot as plt
import rasterio

def kmeans_segment_rgb_uint8(rgb_uint8, k=5, iterations=18, seed=42):
    """
    Very simple K-means on RGB pixels.
    Input/Output: uint8 image in [0..255], shape (H,W,3)
    Output: uint8 image where each pixel is replaced by its cluster centroid color.
    """
    h, w, _ = rgb_uint8.shape
    X = rgb_uint8.reshape(-1, 3).astype(np.float32)

    rng = np.random.default_rng(seed)
    centers = X[rng.choice(X.shape[0], size=k, replace=False)]

    for _ in range(iterations):
        # Assign to nearest centroid
        d2 = np.sum((X[:, None, :] - centers[None, :, :]) ** 2, axis=2)
        labels = np.argmin(d2, axis=1)

        # Update centroids
        new_centers = centers.copy()
        for j in range(k):
            pts = X[labels == j]
            if pts.size:
                new_centers[j] = pts.mean(axis=0)

        # Stop if stable
        if np.allclose(new_centers, centers, atol=0.5):
            centers = new_centers
            break
        centers = new_centers

    out = centers[labels].reshape(h, w, 3)
    return np.clip(out, 0, 255).astype(np.uint8)


# ---- Apply to your 5 cropped GeoTIFFs and plot before/after ----
image_files = cropped_tifs[:5]

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 20))
plt.subplots_adjust(hspace=0.25, wspace=0.05)

k = 5

for row, path in enumerate(image_files):

    # Read GeoTIFF (bands, H, W) -> (H, W, 3)
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    # Convert to uint8 for a visible effect
    # (percentile stretch tends to look better than max-normalization)
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb01 = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)
    rgb8  = (rgb01 * 255).astype(np.uint8)

    # ‚Äúresults[...] = ...‚Äù style
    kmeans_rgb = kmeans_segment_rgb_uint8(rgb8, k=k, iterations=18)

    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # Left: original
    axL = axes[row, 0]
    axL.imshow(rgb8)
    axL.set_title(f"{label} ‚Äî Original", fontsize=12)
    axL.axis("off")

    # Right: k-means quantized
    axR = axes[row, 1]
    axR.imshow(kmeans_rgb)
    axR.set_title(f"{label} ‚Äî K-means (k={k})", fontsize=12)
    axR.axis("off")

fig.suptitle("Landsat RGB (Left) vs K-means RGB Clustering (Right)", fontsize=16)
plt.show()


In [None]:
# Example: Single-location K-means
import numpy as np
import matplotlib.pyplot as plt
import rasterio

def kmeans_segment_rgb_uint8(rgb_uint8, k=5, iterations=18, seed=42):
    h, w, _ = rgb_uint8.shape
    X = rgb_uint8.reshape(-1, 3).astype(np.float32)

    rng = np.random.default_rng(seed)
    centers = X[rng.choice(X.shape[0], size=k, replace=False)]

    for _ in range(iterations):
        d2 = np.sum((X[:, None, :] - centers[None, :, :]) ** 2, axis=2)
        labels = np.argmin(d2, axis=1)

        new_centers = centers.copy()
        for j in range(k):
            pts = X[labels == j]
            if pts.size:
                new_centers[j] = pts.mean(axis=0)

        if np.allclose(new_centers, centers, atol=0.5):
            centers = new_centers
            break
        centers = new_centers

    out = centers[labels].reshape(h, w, 3)
    return np.clip(out, 0, 255).astype(np.uint8)


def show_la_kmeans(cropped_tifs, k=5, iterations=18, seed=42):
    # Find the LA file (works with your naming: "urban_los_angeles_downtown_...")
    matches = [p for p in cropped_tifs if "los_angeles" in p.stem.lower()]
    if not matches:
        raise ValueError("Could not find a Los Angeles file in cropped_tifs (look for 'los_angeles' in filename).")
    path = matches[0]

    # Load GeoTIFF RGB
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    # Percentile stretch -> uint8 for nicer display and clearer k-means effect
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb01 = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)
    rgb8 = (rgb01 * 255).astype(np.uint8)

    # K-means "after"
    kmeans_rgb = kmeans_segment_rgb_uint8(rgb8, k=k, iterations=iterations, seed=seed)

    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # Full-size side-by-side
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
    axes[0].imshow(rgb8)
    axes[0].set_title(f"{label} ‚Äî Original", fontsize=14)
    axes[0].axis("off")

    axes[1].imshow(kmeans_rgb)
    axes[1].set_title(f"{label} ‚Äî K-means (k={k})", fontsize=14)
    axes[1].axis("off")

    fig.suptitle("Los Angeles: RGB vs K-means Color Clustering", fontsize=16)
    plt.tight_layout()
    plt.show()

# Run it:
show_la_kmeans(cropped_tifs, k=5)


# Simple Linear Iterative Clustering (SLIC) Superpixels

SLIC (Simple Linear Iterative Clustering) groups pixels using a combined color + spatial distance metric.

Segmentation approach:
- Groups nearby pixels into compact superpixels.
- Balances color similarity and spatial proximity.


Key parameters:
- `n_segments`: number of superpixels.
- `compactness`: spatial regularity vs boundary adherence.


Key papers:
- Achanta, R., et al. (2012). *SLIC Superpixels Compared to State-of-the-art Superpixel Methods*. IEEE TPAMI, 34(11), 2274-2282.
- Stutz, D., Hermans, A., & Leibe, B. (2018). *Superpixels: An Evaluation of the State-of-the-Art*. CVIU, 166, 1-27.

In [None]:
# Example: Superpixels (SLIC)
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from skimage.segmentation import slic, mark_boundaries

image_files = cropped_tifs[:5]

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 20))
plt.subplots_adjust(hspace=0.25, wspace=0.05)

for row, path in enumerate(image_files):

    # --- Load GeoTIFF properly ---
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    # Percentile stretch -> [0,1] float (helps SLIC + display)
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb_float = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)

    # --- SLIC superpixels ---
    slic_labels = slic(
        rgb_float,
        n_segments=250,
        compactness=12.0,
        sigma=1.0,
        start_label=1
    )

    # Draw boundaries on top of the image (yellow-ish boundary color)
    boundary_overlay = mark_boundaries(rgb_float, slic_labels, color=(1, 1, 0))

    # Label
    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # --- Plot original ---
    axL = axes[row, 0]
    axL.imshow(rgb_float)
    axL.set_title(f"{label} ‚Äî Original", fontsize=12)
    axL.axis("off")

    # --- Plot superpixels overlay ---
    axR = axes[row, 1]
    axR.imshow(boundary_overlay)
    axR.set_title(f"{label} ‚Äî SLIC superpixels", fontsize=12)
    axR.axis("off")

fig.suptitle("Landsat RGB (Left) vs SLIC Superpixels (Right)", fontsize=16)
plt.show()


In [None]:
# Example: Superpixels focusing on Phoenix, AZ
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from skimage.segmentation import slic, mark_boundaries

def show_phoenix_slic(cropped_tifs):

    # Find the Phoenix file
    matches = [p for p in cropped_tifs if "phoenix" in p.stem.lower()]
    if not matches:
        raise ValueError("Could not find a Phoenix file in cropped_tifs.")
    path = matches[0]

    # --- Load GeoTIFF ---
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    # Percentile stretch for better contrast
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb_float = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)

    # --- SLIC superpixels ---
    slic_labels = slic(
        rgb_float,
        n_segments=300,
        compactness=10.0,
        sigma=1.0,
        start_label=1
    )

    overlay = mark_boundaries(rgb_float, slic_labels, color=(1, 1, 0))

    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # --- Plot side-by-side ---
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    axes[0].imshow(rgb_float)
    axes[0].set_title(f"{label} ‚Äî Original", fontsize=14)
    axes[0].axis("off")

    axes[1].imshow(overlay)
    axes[1].set_title(f"{label} ‚Äî SLIC Superpixels", fontsize=14)
    axes[1].axis("off")

    fig.suptitle("Phoenix: RGB vs SLIC Superpixel Segmentation", fontsize=16)
    plt.tight_layout()
    plt.show()


# Run it
show_phoenix_slic(cropped_tifs)


# Watershed segmentation

Segmentation approach:
- Treats the gradient magnitude image as a topographic surface and performs region flooding from predefined markers.
- Produces region boundaries from gradient basins.


Key parameters:
- Marker strategy strongly affects output.
- `min_distance` in peak detection controls seed density.


Key papers:
- Vincent, L., & Soille, P. (1991). *Watersheds in digital spaces: An efficient algorithm based on immersion simulations*. IEEE TPAMI, 13(6), 583-598.
- Roerdink, J. B. T. M., & Meijster, A. (2000). *The Watershed Transform: Definitions, Algorithms and Parallelization Strategies*. Fundamenta Informaticae, 41, 187-228.

In [None]:
# Example: Watershed segmentation
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from scipy import ndimage as ndi
from skimage.color import rgb2gray
from skimage.feature import peak_local_max
from skimage.filters import sobel, threshold_otsu
from skimage.segmentation import watershed, mark_boundaries

image_files = cropped_tifs[:5]

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 20))
plt.subplots_adjust(hspace=0.25, wspace=0.05)

for row, path in enumerate(image_files):

    # --- Load GeoTIFF (RGB) ---
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    # Percentile stretch -> [0,1] float for display/segmentation
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb_float = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)

    # --- Watershed pipeline (like your snippet) ---
    gray = rgb2gray(rgb_float)

    gradient = sobel(gray)
    otsu_t = threshold_otsu(gray)

    mask = gray > otsu_t
    distance = ndi.distance_transform_edt(mask)

    # local maxima as markers
    marker_coords = peak_local_max(distance, min_distance=15, labels=mask)

    markers = np.zeros_like(gray, dtype=np.int32)
    if len(marker_coords) > 0:
        markers[tuple(marker_coords.T)] = np.arange(1, len(marker_coords) + 1)
    else:
        h, w = gray.shape
        markers[h // 2, w // 2] = 1

    markers, _ = ndi.label(markers > 0)

    ws_labels = watershed(gradient, markers, mask=mask)

    # boundaries overlay (red)
    overlay = mark_boundaries(rgb_float, ws_labels, color=(1, 0, 0))

    # Label
    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # --- Plot original ---
    axL = axes[row, 0]
    axL.imshow(rgb_float)
    axL.set_title(f"{label} ‚Äî Original", fontsize=12)
    axL.axis("off")

    # --- Plot watershed overlay ---
    axR = axes[row, 1]
    axR.imshow(overlay)
    axR.set_title(f"{label} ‚Äî Watershed", fontsize=12)
    axR.axis("off")

fig.suptitle("Landsat RGB (Left) vs Watershed Segmentation (Right)", fontsize=16)
plt.show()


In [None]:
# Example:
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from scipy import ndimage as ndi
from skimage.color import rgb2gray
from skimage.feature import peak_local_max
from skimage.filters import sobel, threshold_otsu
from skimage.segmentation import watershed, mark_boundaries


def show_oregon_watershed(cropped_tifs):

    # Find Oregon file
    matches = [p for p in cropped_tifs if "oregon" in p.stem.lower()]
    if not matches:
        raise ValueError("Could not find an Oregon file in cropped_tifs.")
    path = matches[0]

    # --- Load GeoTIFF ---
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    # Percentile stretch for better contrast
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb_float = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)

    # --- Watershed segmentation ---
    gray = rgb2gray(rgb_float)
    gradient = sobel(gray)

    otsu_t = threshold_otsu(gray)
    mask = gray > otsu_t

    distance = ndi.distance_transform_edt(mask)

    marker_coords = peak_local_max(distance, min_distance=20, labels=mask)

    markers = np.zeros_like(gray, dtype=np.int32)
    if len(marker_coords) > 0:
        markers[tuple(marker_coords.T)] = np.arange(1, len(marker_coords) + 1)
    else:
        h, w = gray.shape
        markers[h // 2, w // 2] = 1

    markers, _ = ndi.label(markers > 0)

    ws_labels = watershed(gradient, markers, mask=mask)

    overlay = mark_boundaries(rgb_float, ws_labels, color=(1, 0, 0))

    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # --- Plot side-by-side ---
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    axes[0].imshow(rgb_float)
    axes[0].set_title(f"{label} ‚Äî Original", fontsize=14)
    axes[0].axis("off")

    axes[1].imshow(overlay)
    axes[1].set_title(f"{label} ‚Äî Watershed Segmentation", fontsize=14)
    axes[1].axis("off")

    fig.suptitle("Oregon Cascades: RGB vs Watershed", fontsize=16)
    plt.tight_layout()
    plt.show()

# Run it
show_oregon_watershed(cropped_tifs)


# Felzenszwalb graph segmentation

Segmentation approach:
- Builds a graph of pixels with weighted edges.
- Merges regions based on internal consistency and boundary contrast.


Key parameters:
- `scale`: larger gives larger segments.
- `min_size`: removes tiny noisy segments.

Key papers:
- Felzenszwalb, P. F., & Huttenlocher, D. P. (2004). *Efficient Graph-Based Image Segmentation*. IJCV, 59(2), 167-181.
- Shi, J., & Malik, J. (2000). *Normalized Cuts and Image Segmentation*. IEEE TPAMI, 22(8), 888-905.

In [None]:
# Example:Felzenszwalb graph-based segmentation
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from skimage.segmentation import felzenszwalb, mark_boundaries

image_files = cropped_tifs[:5]

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 20))
plt.subplots_adjust(hspace=0.25, wspace=0.05)

for row, path in enumerate(image_files):

    # --- Load GeoTIFF (RGB) ---
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    # Percentile stretch -> [0,1] float for display/segmentation
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb_float = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)

    # --- Felzenszwalb segmentation (like your snippet) ---
    felz_labels = felzenszwalb(
        rgb_float,
        scale=175,     # larger => larger segments
        sigma=0.8,     # smoothing
        min_size=80    # minimum segment size
    )

    # Boundary overlay (cyan)
    overlay = mark_boundaries(rgb_float, felz_labels, color=(0, 1, 1))

    # Label
    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # --- Plot original ---
    axL = axes[row, 0]
    axL.imshow(rgb_float)
    axL.set_title(f"{label} ‚Äî Original", fontsize=12)
    axL.axis("off")

    # --- Plot Felzenszwalb overlay ---
    axR = axes[row, 1]
    axR.imshow(overlay)
    axR.set_title(f"{label} ‚Äî Felzenszwalb", fontsize=12)
    axR.axis("off")

fig.suptitle("Landsat RGB (Left) vs Felzenszwalb Graph Segmentation (Right)", fontsize=16)
plt.show()


In [None]:
# Example: Single-location Felzenszwalb
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from skimage.segmentation import felzenszwalb, mark_boundaries

def show_iowa_felzenszwalb(cropped_tifs, scale=175, sigma=0.8, min_size=80):

    # Find the Iowa file
    matches = [p for p in cropped_tifs if "iowa" in p.stem.lower()]
    if not matches:
        raise ValueError("Could not find an Iowa file in cropped_tifs (look for 'iowa' in filename).")
    path = matches[0]

    # --- Load GeoTIFF (RGB) ---
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    # Percentile stretch -> [0,1] float
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb_float = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)

    # --- Felzenszwalb segmentation ---
    felz_labels = felzenszwalb(rgb_float, scale=scale, sigma=sigma, min_size=min_size)
    overlay = mark_boundaries(rgb_float, felz_labels, color=(0, 1, 1))  # cyan boundaries

    label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # --- Plot before/after ---
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    axes[0].imshow(rgb_float)
    axes[0].set_title(f"{label} ‚Äî Original", fontsize=14)
    axes[0].axis("off")

    axes[1].imshow(overlay)
    axes[1].set_title(f"{label} ‚Äî Felzenszwalb", fontsize=14)
    axes[1].axis("off")

    fig.suptitle("Iowa: RGB vs Felzenszwalb Graph Segmentation", fontsize=16)
    plt.tight_layout()
    plt.show()

# Run it
show_iowa_felzenszwalb(cropped_tifs)


In [None]:
# Example: Putting all our techniques together
import numpy as np
import matplotlib.pyplot as plt
import rasterio

from scipy import ndimage as ndi
from skimage.color import rgb2gray
from skimage.filters import threshold_otsu, threshold_multiotsu, sobel
from skimage.feature import peak_local_max
from skimage.segmentation import slic, felzenszwalb, watershed, mark_boundaries

# --- Simple helpers (kept short for teaching) ---
def stretch01(rgb):
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    return np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)

def kmeans_segment_rgb_uint8(rgb_uint8, k=5, iterations=18, seed=42):
    h, w, _ = rgb_uint8.shape
    X = rgb_uint8.reshape(-1, 3).astype(np.float32)
    rng = np.random.default_rng(seed)
    centers = X[rng.choice(X.shape[0], size=k, replace=False)]
    for _ in range(iterations):
        d2 = np.sum((X[:, None, :] - centers[None, :, :]) ** 2, axis=2)
        labels = np.argmin(d2, axis=1)
        new_centers = centers.copy()
        for j in range(k):
            pts = X[labels == j]
            if pts.size:
                new_centers[j] = pts.mean(axis=0)
        if np.allclose(new_centers, centers, atol=0.5):
            centers = new_centers
            break
        centers = new_centers
    out = centers[labels].reshape(h, w, 3)
    return np.clip(out, 0, 255).astype(np.uint8)

# --- Methods (order matches your list) ---
ordered_methods = [
    "otsu_thresholding",
    "multi_otsu_3class",
    "kmeans_rgb",
    "slic_superpixels",
    "watershed",
    "felzenszwalb",
]

# Choose the 5 images (assumes cropped_tifs already created)
image_files = cropped_tifs[:5]

nrows = len(image_files)
ncols = 1 + len(ordered_methods)

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(4 * ncols, 4 * nrows))
if nrows == 1:
    axes = axes[None, :]  # keep 2D indexing

for r, path in enumerate(image_files):

    # ---- Load + normalize RGB ----
    with rasterio.open(path) as src:
        rgb = src.read([1, 2, 3]).astype(np.float32).transpose(1, 2, 0)

    rgb_float = stretch01(rgb)  # [0,1]
    gray = rgb2gray(rgb_float)

    # Row label
    location_label = path.stem.replace("_rgb_cropped", "").replace("_", " ").title()

    # ---- Original ----
    axes[r, 0].imshow(rgb_float)
    axes[r, 0].set_title(f"{location_label}\nOriginal", fontsize=11)
    axes[r, 0].axis("off")

    # ---- 1) Otsu (binary) ----
    t = threshold_otsu(gray)
    mask = gray > t
    otsu_vis = rgb_float.copy()
    otsu_vis[~mask] *= 0.25

    # ---- 2) Multi-Otsu (3 classes) ----
    t1, t2 = threshold_multiotsu(gray, classes=3)
    seg3 = np.digitize(gray, bins=[t1, t2])
    multi_vis = (rgb_float * 0.35).copy()
    multi_vis[seg3 == 0] = [0.10, 0.20, 0.80]  # blue
    multi_vis[seg3 == 1] = [0.20, 0.80, 0.20]  # green
    multi_vis[seg3 == 2] = [0.90, 0.20, 0.20]  # red

    # ---- 3) K-means RGB (color quantization) ----
    rgb8 = (rgb_float * 255).astype(np.uint8)
    kmeans_vis = kmeans_segment_rgb_uint8(rgb8, k=5, iterations=18, seed=42) / 255.0

    # ---- 4) SLIC superpixels ----
    slic_labels = slic(rgb_float, n_segments=250, compactness=12.0, sigma=1.0, start_label=1)
    slic_vis = mark_boundaries(rgb_float, slic_labels, color=(1, 1, 0))  # yellow edges

    # ---- 5) Watershed ----
    gradient = sobel(gray)
    otsu_t = threshold_otsu(gray)
    ws_mask = gray > otsu_t
    distance = ndi.distance_transform_edt(ws_mask)

    marker_coords = peak_local_max(distance, min_distance=15, labels=ws_mask)
    markers = np.zeros_like(gray, dtype=np.int32)
    if len(marker_coords) > 0:
        markers[tuple(marker_coords.T)] = np.arange(1, len(marker_coords) + 1)
    else:
        h, w = gray.shape
        markers[h // 2, w // 2] = 1
    markers, _ = ndi.label(markers > 0)

    ws_labels = watershed(gradient, markers, mask=ws_mask)
    ws_vis = mark_boundaries(rgb_float, ws_labels, color=(1, 0, 0))  # red edges

    # ---- 6) Felzenszwalb ----
    felz_labels = felzenszwalb(rgb_float, scale=175, sigma=0.8, min_size=80)
    felz_vis = mark_boundaries(rgb_float, felz_labels, color=(0, 1, 1))  # cyan edges

    # Collect results in the same order as ordered_methods
    results_row = {
        "otsu_thresholding": otsu_vis,
        "multi_otsu_3class": multi_vis,
        "kmeans_rgb": kmeans_vis,
        "slic_superpixels": slic_vis,
        "watershed": ws_vis,
        "felzenszwalb": felz_vis,
    }

    # ---- Plot each method ----
    for c, method in enumerate(ordered_methods, start=1):
        axes[r, c].imshow(results_row[method])
        axes[r, c].set_title(method.replace("_", " "), fontsize=11)
        axes[r, c].axis("off")

fig.suptitle("Landsat: Original + 6 Segmentation Methods (Rows = Locations)", fontsize=18, y=0.995)
plt.tight_layout(rect=[0, 0, 1, 0.98])
plt.show()
