# Manual labeling of patches

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
import pandas as pd

In [None]:
# Load the patches (as array)

patches = np.load('/home/ubuntu/mucilage_pipeline/patches_tqr.npy')
N = patches.shape[0]

## Functions

In [None]:
def show_rgb(patch):
    """Visualize patch with stretch in RGB (B4,B3,B2)."""
    rgb = patch[:, :, [2,1,0]]
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)
    plt.imshow(rgb)
    plt.axis("off")

def show_amei(patch, eps=1e-6, ax=None):
    """Visualize patch with stretch in RGB (B4,B3,B2)."""
    green = patch[:, :, 1]
    red = patch[:,:,2]
    nir = patch[:,:,3]
    swir = patch[:,:,4]
    denom = green + 0.25 * swir
    amei  = (2*red + nir - 2*swir) / (denom + eps)

    p2, p98 = np.nanpercentile(amei, (2, 98))
    amei = np.clip((amei - p2) / (p98 - p2), 0, 1)
    cmap = plt.cm.turbo

    if ax is None:
        ax = plt.gca()
    ax.imshow(amei, cmap=cmap)
    ax.axis("off")
    return ax

## Labeling

Pt.1

In [None]:
# Manually label one-by-one the patches by visual inspection

classes = {
    "-1": "outlier",
    "0": "clean_water",
    "1": "mucilage",
    "2": "algae",
    "3": "cloud",
    "4": "sediment"
}

labels_file = "labels.csv"

# Load previous labels if they exist
if os.path.exists(labels_file):
    import pandas as pd
    df = pd.read_csv(labels_file)
    labeled_indices = set(df['index'].tolist())
    labels = df.values.tolist()
else:
    labeled_indices = set()
    labels = []

# Start from first unlabeled patch
i = 0
while i < N:
    if i in labeled_indices:
        i += 1
        continue  # skip already labeled

    show_rgb(patches[i])
    plt.title(f"Patch {i}/{N}")
    plt.show(block=False)

    print("Enter label: -1=outlier, 0=clean_water, 1=mucilage, 2=sediment, 3=cloud")
    print("s=skip, q=quit")
    lbl = input(f"Label for patch {i}: ")

    plt.close()

    if lbl == "q":
        print("Quitting, progress saved.")
        break
    elif lbl == "s":
        i += 1
        continue
    elif lbl not in classes:
        print("Invalid label, skipping...")
        i += 1
        continue

    labels.append([i, classes[lbl]])
    labeled_indices.add(i)

    # Save progress after each label
    with open(labels_file, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["index", "label"])
        writer.writerows(labels)

    i += 1

print("✅ Labeling session ended. Progress saved in labels.csv")

In [7]:
# Count valid

labels = pd.read_csv('/home/ubuntu/mucilage_pipeline/mucilage-detection/src/labels_tqr.csv')
print(f"Mucilage patches: {np.sum(np.array(labels) == 'mucilage')}")
print(f"Clean water patches: {np.sum(np.array(labels) == 'clean_water')}")

# Remove outliers and clouds

labels_cleaned = labels[~labels['label'].isin(['outlier','algae', 'cloud', 'sediment'])]
labels_cleaned.to_csv("/home/ubuntu/mucilage_pipeline/mucilage-detection/src/labels_corrected_tqr.csv", index=False)

Mucilage patches: 164
Clean water patches: 729


Pt.2

In [None]:
# Reinspect labels for the interested classes

# Dictionary of allowed classes
classes = {
    "0": "clean_water",
    "1": "mucilage",
    "2": "algae"
}

# Make labels editable
labels_corrected = pd.read_csv('/home/ubuntu/mucilage_pipeline/mucilage-detection/src/labels_corrected.csv')
labels_corrected = labels_corrected.copy()

for idx, row in labels_corrected.iterrows():
    if idx > 870:
        patch_id = int(row["index"])
        current_label = row["label"]

        plt.figure(figsize=(3,3))
        show_rgb(patches[patch_id])
        plt.title(f"Patch {patch_id} | Current: {current_label}")
        plt.show(block=False)

        print("Enter new label [0=clean_water, 1=mucilage, s=skip, q=quit]")
        lbl = input("New label: ")

        plt.close()

        if lbl == "q":
            print("Stopping review, progress saved.")
            break
        elif lbl == "s" or lbl.strip() == "":
            continue
        elif lbl in classes:
            labels_corrected.at[idx, "label"] = classes[lbl]
        else:
            print("Invalid input, keeping old label.")

# Save corrected labels
labels_corrected.to_csv("labels_corrected.csv", index=False)
print("✅ Saved to labels_corrected.csv")

In [None]:
labels_corrected = pd.read_csv('/home/ubuntu/mucilage_pipeline/mucilage-detection/src/labels_corrected.csv')
print(len(labels_corrected))
print(f"Mucilage patches: {np.sum(np.array(labels_corrected) == 'mucilage')}")
print(f"Clean water patches: {np.sum(np.array(labels_corrected) == 'clean_water')}")
print(f"Algae patches: {np.sum(np.array(labels_corrected) == 'algae')}")

# Check AMEI in mucilage patches

In [None]:
labels_corrected = pd.read_csv("/home/ubuntu/mucilage_pipeline/mucilage-detection/src/labels_corrected.csv")
mucilage = labels_corrected[labels_corrected['label'] == 'mucilage']

# Grid parameters
ncols = 10
nrows = int(np.ceil(len(mucilage) / ncols))

fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*2, nrows*2))

for ax, (row_idx, row) in zip(axes.ravel(), mucilage.iterrows()):
    patch_id = int(row["index"])
    current_label = row["label"]
    show_amei(patches[patch_id], ax=ax)
    ax.set_title(f"id:{patch_id} - ax:{row_idx}", fontsize=8)

# Hide empty subplots if any
for ax in axes.ravel()[len(mucilage):]:
    ax.axis("off")

plt.tight_layout()
plt.show()

In [None]:
# Investigate RGB vs AMEI of single mucilage patches

def show_rgb(patch, eps=1e-6, ax=None):
    """
    Visualize patch as stretched RGB (B4,B3,B2).
    Assumes patch order: [B1, B2, B3, B4, ...]
    so B2=green, B3=red, B4=NIR, etc.
    """
    # Extract bands
    red   = patch[:, :, 2]  # B4
    green = patch[:, :, 1]  # B3
    blue  = patch[:, :, 0]  # B2

    # Stack to RGB
    rgb = np.dstack([red, green, blue])

    # Contrast stretch (percentile clipping)
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb = np.clip((rgb - p2) / (p98 - p2 + eps), 0, 1)

    if ax is None:
        ax = plt.gca()
    ax.imshow(rgb)
    ax.axis("off")
    return ax

fig, axs = plt.subplots(1, 2, figsize=(6, 6))
row = mucilage.loc[816]
patch_id = int(row["index"])
show_rgb(patches[patch_id], ax=axs[0])
show_amei(patches[patch_id], ax=axs[1])

# Apply SAM on mucilaage patches

In [None]:
import torch
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
import cv2

# Use CPU automatically
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

# Load smaller model checkpoint (ViT-B for CPU)
sam = sam_model_registry["vit_b"](checkpoint="/home/ubuntu/mucilage_pipeline/sam_vit_b_01ec64.pth")
sam.to(device=DEVICE)

In [None]:
labels_corrected = pd.read_csv("labels_corrected.csv")
mucilage = labels_corrected[labels_corrected['label'] == 'mucilage']

# Load patch
row = mucilage.loc[16]
patch_id = int(row["index"])
patch = patches[patch_id]

red   = patch[:, :, 2]  # B4
green = patch[:, :, 1]  # B3
blue  = patch[:, :, 0]  # B2
rgb = np.dstack([red, green, blue])

p2, p98 = np.nanpercentile(rgb, (2, 98))
rgb = np.clip((rgb - p2) / (p98 - p2 + 1e-6), 0, 1)

image = (rgb * 255).astype(np.uint8)

# Generate masks automatically
mask_generator = SamAutomaticMaskGenerator(sam)
masks = mask_generator.generate(rgb)

print(f"Generated {len(masks)} masks")

In [None]:
import random
overlay = image.copy()

for mask_dict in masks:
    mask = mask_dict['segmentation']  # boolean mask
    # Generate random color for each mask
    color = np.array([random.randint(0, 255) for _ in range(3)], dtype=np.uint8)
    overlay[mask] = 0.1 * overlay[mask] + 0.9 * color  # blend mask with image

overlay = overlay.astype(np.uint8)

# Plot side by side
fig, axes = plt.subplots(1, 2, figsize=(6, 6))
axes[0].imshow(image)
axes[0].set_title("Original RGB Image")
axes[0].axis("off")

axes[1].imshow(overlay)
axes[1].set_title("RGB Image with SAM Masks")
axes[1].axis("off")

plt.show()

# Check AMEI on algae

In [None]:
labels_corrected = pd.read_csv("/home/ubuntu/mucilage_pipeline/mucilage-detection/src/labels_corrected.csv")
algae = labels_corrected[labels_corrected['label'] == 'algae']

# Grid parameters
ncols = 10
nrows = int(np.ceil(len(algae) / ncols))

fig, axes = plt.subplots(nrows, ncols, figsize=(ncols*2, nrows*2))

for ax, (row_idx, row) in zip(axes.ravel(), algae.iterrows()):
    patch_id = int(row["index"])
    current_label = row["label"]
    show_amei(patches[patch_id], ax=ax)
    ax.set_title(f"id:{patch_id} - ax:{row_idx}", fontsize=8)

# Hide empty subplots if any
for ax in axes.ravel()[len(algae):]:
    ax.axis("off")

plt.tight_layout()
plt.show()

In [None]:
# Investigate RGB vs AMEI of single mucilage patches

def show_rgb(patch, eps=1e-6, ax=None):
    """
    Visualize patch as stretched RGB (B4,B3,B2).
    Assumes patch order: [B1, B2, B3, B4, ...]
    so B2=green, B3=red, B4=NIR, etc.
    """
    # Extract bands
    red   = patch[:, :, 2]  # B4
    green = patch[:, :, 1]  # B3
    blue  = patch[:, :, 0]  # B2

    # Stack to RGB
    rgb = np.dstack([red, green, blue])

    # Contrast stretch (percentile clipping)
    p2, p98 = np.nanpercentile(rgb, (2, 98))
    rgb = np.clip((rgb - p2) / (p98 - p2 + eps), 0, 1)

    if ax is None:
        ax = plt.gca()
    ax.imshow(rgb)
    ax.axis("off")
    return ax

fig, axs = plt.subplots(1, 2, figsize=(6, 6))
row = algae.loc[427]
patch_id = int(row["index"])
show_rgb(patches[patch_id], ax=axs[0])
show_amei(patches[patch_id], ax=axs[1])