### This notebook runs FlowSOM pixel-level clustering

In [1]:
# import required packages
import os

import sys
sys.path.append('..')
sys.path.append('../ark')

import numpy as np
import pandas as pd
import xarray as xr

from ark.flowsom import preprocess
from ark.utils import io_utils, load_utils

### Set file paths and parameters

In [2]:
base_dir = "/Users/alexkong/Downloads/granulomaCohort_allData"
tiff_dir = os.path.join(base_dir, "TIFs")
all_data_path = os.path.join(base_dir, "cohortDatav.csv")

In [3]:
MIBItiff = False
mibitiff_suffix = '-MassCorrected-Filtered.tiff'

In [4]:
# either get all fovs in the folder...
if MIBItiff:
    fovs = io_utils.list_files(tiff_dir, substrs=MIBItiff_suffix)
else:
    fovs = io_utils.list_folders(tiff_dir)

# ... or optionally, select a specific set of fovs manually
# fovs = ["fov1", "fov2"]

In [5]:
# set the channels to subset over
chan_list = ["CD45", "SMA", "Vimentin", "CD31", "Keratin-pan", "E-cadherin", "MastChyTry",
             "MPO", "CD20", "CD3", "CD14", "HLA-DR-DQ-DP", "Foxp3", "CD16", "CD11c", "CD206",
             "CD11b", "CD68", "CD163", "CD209"]

### Load data

In [6]:
if MIBItiff:
    img_xr = load_utils.load_imgs_from_mibitiff(tiff_dir, mibitiff_files=fovs, channels=None)
else:
    img_xr = load_utils.load_imgs_from_tree(tiff_dir, img_sub_folder="TIFsNoAgg", fovs=fovs, channels=None)

In [7]:
all_data = pd.read_csv(all_data_path)

In [8]:
segmentation_labels = load_utils.load_imgs_from_dir(base_dir,
                                                    xr_dim_name='compartments',
                                                    xr_channel_names=['whole_cell'],
                                                    force_ints=True)

In [9]:
segmentation_labels.coords['fovs'].values = [fov.replace('segmentationmask_SampleID', 'Point') for fov in segmentation_labels.coords['fovs'].values]

### Preprocess the data

In [10]:
img_xr_proc = preprocess.preprocess_flowsom(img_xr, segmentation_labels, 'pixel_results', channels=chan_list, blur_factor=2)

Point35


IndexError: index 20 is out of bounds for axis 1 with size 20

In [None]:
img_xr_proc.to_netcdf('img_xr_proc.xr', format="NETCDF3_64BIT")

### Preprocess the data some more!

In [None]:
img_xr_proc = xr.load_dataarray('img_xr_proc.xr')

In [None]:
fovs = img_xr_proc.coords['fovs'].values

In [None]:
img_xr_proc = img_xr_proc.values

In [None]:
for index, fov in enumerate(fovs):
    img_xr_proc_sub = img_xr_proc.loc[fov, :, :].values
    img_xr_proc_sub = img_xr_proc_sub[np.sum(img_xr_proc_sub, axis=1) != 0, :]
    print(img_xr_proc_sub.shape)