In [1]:
import yaml
import numpy as np
from numpy.typing import NDArray
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from tqdm import tqdm

from eda import git_root

In [2]:
from pathlib import Path
import numpy as np
from numpy.typing import NDArray
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from skimage import img_as_ubyte
from skimage.io import imsave


def overlay_clusters(
    cropped: NDArray[np.floating],
    klabels: NDArray[np.integer],
    n_clusters: int,
    ncols: int = 5,
    figsize=(12, 12),
) -> Figure:
    """Overlay original and cluster image.

    Parameters
    ----------
    cropped : NDArray[np.floating]
    klabels : NDArray[np.integer]
    n_clusters : int
    ncols : int, optional
        by default 5
    figsize : tuple, optional
        by default (12, 12)

    Returns
    -------
    Figure
    """
    nrows = np.ceil(n_clusters / ncols).astype(int)
    fig, axes = plt.subplots(nrows, ncols, figsize=figsize)
    axes = axes.ravel()

    for k in range(n_clusters):
        canvas = cropped.copy()
        mask = klabels == k + 1
        
        # fill with yellow
        canvas[mask, 0] = np.iinfo(cropped.dtype).max
        canvas[mask, 1] = np.iinfo(cropped.dtype).max

        axes[k].imshow(canvas)
        axes[k].set_axis_off()
        axes[k].set_title(f"label={k + 1}")
    return fig


def save_overlay(
    cropped: NDArray[np.floating],
    klabels: NDArray[np.integer],
    n_clusters: int,
    output_dir: Path,
    suffix: str
) -> None:
    """Overlay original and cluster image.

    Parameters
    ----------
    cropped : NDArray[np.floating]
    klabels : NDArray[np.integer]
    n_clusters : int
    """
    for k in range(n_clusters):
        canvas = cropped.copy()
        mask = klabels == k + 1
        
        # fill with yellow
        canvas[mask, 0] = np.iinfo(cropped.dtype).max
        canvas[mask, 1] = np.iinfo(cropped.dtype).max
        imsave(output_dir / f"overlay_{suffix}_{k + 1}.png", img_as_ubyte(canvas))

In [3]:
ROOT = git_root(absolute=True)

cfg_path = ROOT / "Submission/kmeans_clustering/config/models/predict_human.yaml"
with open(cfg_path, mode="r") as f:
    cfg = yaml.safe_load(f)

cropped_dir = ROOT / cfg["data"]["INPUT_DIR"]
klabels_dir = ROOT / cfg["data"]["OUTPUT_DIR"]

In [4]:
paths_sigmas = list(klabels_dir.glob("*"))
print(paths_sigmas)

[PosixPath('/work/data/submission/clustering/human/3_7'), PosixPath('/work/data/submission/clustering/human/3_10'), PosixPath('/work/data/submission/clustering/human/3_15'), PosixPath('/work/data/submission/clustering/human/4_7'), PosixPath('/work/data/submission/clustering/human/4_10'), PosixPath('/work/data/submission/clustering/human/4_15'), PosixPath('/work/data/submission/clustering/human/5_7'), PosixPath('/work/data/submission/clustering/human/5_10'), PosixPath('/work/data/submission/clustering/human/5_15'), PosixPath('/work/data/submission/clustering/human/6_20')]


In [5]:
path_sigma = klabels_dir / "3_7"
paths_tissue = list(path_sigma.glob("klabels*.npy"))

for path_tissue in tqdm(paths_tissue, total=len(paths_tissue)):
    tissue_id = int(path_tissue.stem.split("_")[-1])
    path_cropped = list(cropped_dir.glob(f"cropped_{tissue_id}.npy"))[0]

    cropped = np.load(path_cropped, allow_pickle=True)
    klabels = np.load(path_tissue, allow_pickle=True)

    # plot
    fig = overlay_clusters(cropped, klabels, 30, 5, (16, 24))
    fig.suptitle(path_tissue)
    fig.savefig(path_sigma / f"klabels_{tissue_id}.png")
    plt.clf()
    plt.close("all")

    # save
    save_overlay(cropped, klabels, 30, path_sigma, str(tissue_id))

100%|██████████| 90/90 [56:55<00:00, 37.95s/it]
