- Sample [segmentation report](https://wandb.ai/k-galliamov/uw_madison/reports/UW-Madison-GI-tract-image-segmentation-report--Vmlldzo0MjcxNDA0)

# Setup

## Install Dependencies

In [None]:
!pip install kaggle wandb -q

## Data

### Download

In [None]:
!kaggle competitions download -c uw-madison-gi-tract-image-segmentation

### Unzip

In [None]:
import zipfile

# Source path of the ZIP file in your Google Drive
zip_file_path = "./uw-madison-gi-tract-image-segmentation.zip"

# Destination path where you want to unzip the contents
destination_folder = "./data"

# Unzip the file
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
    zip_ref.extractall(destination_folder)

# Log Masks

In [None]:
import os
import random
import numpy as np
import wandb
import glob
import pandas as pd
from PIL import Image

# Disable wandb logging
# os.environ["WANDB_MODE"] = "disabled"


# Function to decode RLE
def rle_decode(mask_rle, shape):
    """
    Decode RLE-encoded mask into a binary mask.

    Parameters:
    mask_rle (str): run-length as string formatted (start length)
    shape (tuple): (height, width) of array to return

    Returns:
    numpy.ndarray: Binary mask
    """
    print(mask_rle)
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction


# Function to load image
def load_image(img_id):
    """
    Load an image based on the id.

    Parameters:
    img_id (str): Image identifier

    Returns:
    numpy.ndarray: Image array
    """

    # id: case67_day0_slice_0107
    # path: data/train/case101/case101_day20/scans/slice_0003_266_266_1.50_1.50.png
    substrings = img_id.split("_")
    case = substrings[0]
    day = substrings[1]
    slice_str = "_".join(substrings[2:])
    base_path = "./data/train/"
    img_path = os.path.join(
        base_path, case, "_".join(substrings[:2]), "scans", slice_str
    )
    matches = glob.glob(f"{img_path}*.png")
    print(img_id)
    print(matches[0])
    img = Image.open(matches[0])

    # Convert to numpy array
    image_np = np.array(img)

    # Normalize to 8-bit
    image_8bit = (image_np / np.max(image_np) * 255).astype(np.uint8)

    # Convert back to a PIL image
    img = Image.fromarray(image_8bit)
    return img


# Initialize wandb
wandb.init(project="gi-tract-segmentation", mode="online")

# Read CSV file
df = pd.read_csv("./data/train.csv")

# Filter out rows with NaN segmentation
df_filtered = df.dropna(subset=["segmentation"])

# Randomly select 10 unique ids
unique_ids = df_filtered["id"].unique()
selected_ids = random.sample(list(unique_ids), 22)
print(selected_ids)

# Filter the DataFrame
filtered_df = df_filtered[df_filtered["id"].isin(selected_ids)]

unique_labels = filtered_df["class"].unique()
labels = {}
labels_inverse = {}
classes = []
i = 1
for label in list(unique_labels):
    labels[i] = label
    labels_inverse[label] = i
    classes.append({"name": label, "id": i})
    i += 1

labels[0] = "unknown"
labels_inverse["unknown"] = 0
classes.append({"name": "unknown", "id": 0})

class_set = wandb.Classes(classes)
print(labels)
print(classes)

# Create a wandb Table
columns = ["id", "image"]
wandb_table = wandb.Table(columns=columns)
current_img = ""
mask = np.empty((266, 266), dtype=np.uint8)

for index, row in filtered_df.iterrows():
    img_id = row["id"]
    img = load_image(img_id)
    if img.size == (266, 266):
        if current_img != img_id:
            if current_img != "":
                # TODO: Add previous image
                masked_image = wandb.Image(
                    img,
                    masks={
                        "ground_truth": {"mask_data": mask, "class_labels": labels},
                    },
                    classes=class_set,
                )
                wandb_table.add_data(current_img, masked_image)
                mask = np.empty((266, 266), dtype=np.uint8)
            current_img = img_id
        class_id = labels_inverse[row["class"]]
        mask += rle_decode(row["segmentation"], img.size) * class_id


# Log the table to wandb
wandb.log({"segmentation_samples": wandb_table})
wandb.finish()

In [None]:
img.show()