### This notebook runs FlowSOM pixel-level clustering

In [1]:
# import required packages
import os
import subprocess

import numpy as np
import pandas as pd
import xarray as xr

from ark.phenotyping import som_utils
from ark.utils import io_utils, load_utils

### Set file paths and parameters

In [2]:
# assign file paths
base_dir = "../data/example_dataset/flowsom_data"
tiff_dir = os.path.join(base_dir, "TIFs")

In [3]:
# set mibitiff parameters
MIBItiff = False
mibitiff_suffix = '-MassCorrected-Filtered.tiff'

In [4]:
# either get all fovs in the folder...
if MIBItiff:
    fovs = io_utils.list_files(tiff_dir, substrs=MIBItiff_suffix)
else:
    fovs = io_utils.list_folders(tiff_dir)

# ... or optionally, select a specific set of fovs manually
# fovs = ["Point14"]

In [5]:
# set the channels to subset over
chan_list = ["CD45", "SMA", "Vimentin", "CD31", "Keratin-pan", "E-cadherin", "MastChyTry",
             "MPO", "CD20", "CD3", "CD14", "HLA-DR-DQ-DP", "Foxp3", "CD16", "CD11c", "CD206",
             "CD11b", "CD68", "CD163", "CD209"]

### Load data

In [6]:
# load the image data
if MIBItiff:
    img_xr = load_utils.load_imgs_from_mibitiff(tiff_dir, mibitiff_files=fovs, channels=chan_list, dtype="int16")
else:
    img_xr = load_utils.load_imgs_from_tree(tiff_dir, img_sub_folder="TIFsNoAgg", fovs=fovs, channels=chan_list, dtype="int16")

In [7]:
# load the segmentation labels
segmentation_labels = load_utils.load_imgs_from_dir(base_dir,
                                                    xr_dim_name='compartments',
                                                    xr_channel_names=['whole_cell'],
                                                    force_ints=True)

In [8]:
# to make data types consistent, remove 'Point' from segmentation label fov coordinate names
segmentation_labels = segmentation_labels.assign_coords(fovs=[fov.replace('segmentationmask_SampleID', 'Point') for fov in segmentation_labels.coords['fovs'].values])

### Preprocess

In [9]:
# run FlowSOM preprocessing
pixel_data = som_utils.create_pixel_matrix(img_xr, segmentation_labels)

In [10]:
# write data to csv, start at "Cluster the data" after this
pixel_data.to_csv(os.path.join(base_dir, 'example_pixel_matrix.csv'), index=False)

### Train the SOM

In [11]:
# run the SOM clustering
som_utils.cluster_pixels(base_dir, chan_list)

In [13]:
# read the resulting clustered data
clustered_pixel_data = pd.read_csv(os.path.join(base_dir, 'pixel_mat_clustered.csv'))

TODO: add post-processing pixel clustering steps