### This notebook runs FlowSOM pixel-level clustering

In [1]:
# import required packages
import os

import sys
sys.path.append('..')
sys.path.append('../ark')

import numpy as np
import pandas as pd
import xarray as xr

from ark.flowsom import preprocess
from ark.utils import io_utils, load_utils

### Set file paths and parameters

In [2]:
base_dir = "/Users/alexkong/Downloads/granulomaCohort_allData"
tiff_dir = os.path.join(base_dir, "TIFs")
all_data_path = os.path.join(base_dir, "cohortDatav.csv")

In [3]:
MIBItiff = False
mibitiff_suffix = '-MassCorrected-Filtered.tiff'

In [4]:
# either get all fovs in the folder...
if MIBItiff:
    fovs = io_utils.list_files(tiff_dir, substrs=MIBItiff_suffix)
else:
    fovs = io_utils.list_folders(tiff_dir)

# ... or optionally, select a specific set of fovs manually
# fovs = ["fov1", "fov2"]

In [5]:
# set the channels to subset over
chan_list = ["CD45", "SMA", "Vimentin", "CD31", "Keratin-pan", "E-cadherin", "MastChyTry",
             "MPO", "CD20", "CD3", "CD14", "HLA-DR-DQ-DP", "Foxp3", "CD16", "CD11c", "CD206",
             "CD11b", "CD68", "CD163", "CD209"]

### Load data

In [6]:
if MIBItiff:
    img_xr = load_utils.load_imgs_from_mibitiff(tiff_dir, mibitiff_files=fovs, channels=chan_list)
else:
    img_xr = load_utils.load_imgs_from_tree(tiff_dir, img_sub_folder="TIFsNoAgg", fovs=fovs, channels=chan_list)

['CD45', 'SMA', 'Vimentin', 'CD31', 'Keratin-pan', 'E-cadherin', 'MastChyTry', 'MPO', 'CD20', 'CD3', 'CD14', 'HLA-DR-DQ-DP', 'Foxp3', 'CD16', 'CD11c', 'CD206', 'CD11b', 'CD68', 'CD163', 'CD209']
['CD14.tif', 'CD209.tif', 'CD16.tif', 'E-cadherin.tif', 'Vimentin.tif', 'HLA-DR-DQ-DP.tif', 'MastChyTry.tif', 'MPO.tif', 'CD3.tif', 'SMA.tif', 'CD11c.tif', 'CD68.tif', 'CD11b.tif', 'CD45.tif', 'CD20.tif', 'Foxp3.tif', 'CD36.tif', 'CD163.tif', 'Keratin-pan.tif', 'CD206.tif', 'CD31.tif']


ValueError: 'CD36' is not in list

In [None]:
all_data = pd.read_csv(all_data_path)

In [None]:
img_xr.coords['channels'].values

In [None]:
all_data.head(10)

### Preprocess the data

In [None]:
preprocess.preprocess_flowsom(img_xr, blur_factor=2)