### This notebook runs FlowSOM pixel-level clustering

In [1]:
# import required packages
import os
import subprocess

import numpy as np
import pandas as pd
import xarray as xr

from ark.phenotyping import som_utils
from ark.utils import io_utils, load_utils

### Set file paths and parameters

In [2]:
# assign file paths
base_dir = "../data/example_dataset/flowsom_data"
tiff_dir = os.path.join(base_dir, "input_data")
segmentation_dir = os.path.join(base_dir, "deepcell_output")

In [3]:
# set mibitiff parameters
MIBItiff = False
mibitiff_suffix = '-MassCorrected-Filtered.tiff'

In [4]:
# either get all fovs in the folder...
if MIBItiff:
    fovs = io_utils.list_files(tiff_dir, substrs=MIBItiff_suffix)
else:
    fovs = io_utils.list_folders(tiff_dir)

# ... or optionally, select a specific set of fovs manually
# fovs = ["Point14"]

In [5]:
# set the channels to subset over
channels = ["CD45", "SMA", "Vimentin", "CD31", "CD20", "CD3", "CD68"]

### Load segmentation labels

In [7]:
# load the segmentation labels
segmentation_labels = load_utils.load_imgs_from_dir(segmentation_dir,
                                                    xr_dim_name='compartments',
                                                    xr_channel_names=['whole_cell'],
                                                    force_ints=True)

In [8]:
# replace 'segmentationmask_SampleID' with 'Point'
segmentation_labels = segmentation_labels.assign_coords(
    fovs=[fov.replace('segmentationmask_SampleID', 'Point') for fov in segmentation_labels.fovs.values]
)

### Preprocess

In [9]:
# run FlowSOM preprocessing
som_utils.create_pixel_matrix(fovs, channels, segmentation_labels, base_dir, tiff_dir)

### Train SOM

In [None]:
# run the SOM clustering
som_utils.train_som(fovs, channels, base_dir)

### Assign clusters

In [None]:
som_utils.cluster_pixels(fovs, base_dir)

### Run consensus clustering

In [None]:
som_utils.consensus_cluster(fovs, channels, base_dir)

TODO: add post-processing pixel clustering steps