### This notebook runs FlowSOM pixel-level clustering

In [1]:
# import required packages
import os

import sys
sys.path.append('..')
sys.path.append('../ark')

import numpy as np
import pandas as pd
import xarray as xr

from ark.flowsom import preprocess
from ark.utils import io_utils, load_utils

### Set file paths and parameters

In [2]:
base_dir = "/Users/alexkong/Downloads/granulomaCohort_allData"
tiff_dir = os.path.join(base_dir, "TIFs")
all_data_path = os.path.join(base_dir, "cohortDatav.csv")

In [3]:
MIBItiff = False
mibitiff_suffix = '-MassCorrected-Filtered.tiff'

In [4]:
# either get all fovs in the folder...
if MIBItiff:
    fovs = io_utils.list_files(tiff_dir, substrs=MIBItiff_suffix)
else:
    fovs = io_utils.list_folders(tiff_dir)

# ... or optionally, select a specific set of fovs manually
# fovs = ["fov1", "fov2"]

In [5]:
# set the channels to subset over
chan_list = ["CD45", "SMA", "Vimentin", "CD31", "Keratin-pan", "E-cadherin", "MastChyTry",
             "MPO", "CD20", "CD3", "CD14", "HLA-DR-DQ-DP", "Foxp3", "CD16", "CD11c", "CD206",
             "CD11b", "CD68", "CD163", "CD209"]

### Load data

In [6]:
if MIBItiff:
    img_xr = load_utils.load_imgs_from_mibitiff(tiff_dir, mibitiff_files=fovs, channels=chan_list)
else:
    img_xr = load_utils.load_imgs_from_tree(tiff_dir, img_sub_folder="TIFsNoAgg", fovs=fovs, channels=chan_list)

['CD45', 'SMA', 'Vimentin', 'CD31', 'Keratin-pan', 'E-cadherin', 'MastChyTry', 'MPO', 'CD20', 'CD3', 'CD14', 'HLA-DR-DQ-DP', 'Foxp3', 'CD16', 'CD11c', 'CD206', 'CD11b', 'CD68', 'CD163', 'CD209']
['CD14.tif', 'CD209.tif', 'CD16.tif', 'E-cadherin.tif', 'Vimentin.tif', 'HLA-DR-DQ-DP.tif', 'MastChyTry.tif', 'MPO.tif', 'CD3.tif', 'SMA.tif', 'CD11c.tif', 'CD68.tif', 'CD11b.tif', 'CD45.tif', 'CD20.tif', 'Foxp3.tif', 'CD36.tif', 'CD163.tif', 'Keratin-pan.tif', 'CD206.tif', 'CD31.tif']


In [7]:
all_data = pd.read_csv(all_data_path)

In [8]:
img_xr.coords['channels'].values

array(['CD45', 'SMA', 'Vimentin', 'CD206', 'CD163', 'E-cadherin',
       'MastChyTry', 'MPO', 'CD20', 'CD3', 'CD14', 'HLA-DR-DQ-DP',
       'Foxp3', 'CD16', 'CD11c', 'Keratin-pan', 'CD11b', 'CD68', 'CD36',
       'CD209'], dtype='<U12')

In [9]:
all_data.head(10)

Unnamed: 0,SampleID,cellLabelInImage,cellSize,C,Na,Si,HH3,Vimentin,SMA,Background,...,MPO,NaKATPase,HLA.Class.1,Ta,Au,Tissue,PatientID,lineage,cell_type,cell_lin
0,6,2,169,0.911187,0.813581,0.287503,0.765556,0.786841,0.0,0.098785,...,0.0,0.706893,0.823146,0.01451,0.080456,gran_lung,30.0,endothelial,endothelial,nonimmune
1,6,3,240,0.912404,0.857459,0.287738,0.881312,0.726815,0.0,0.291964,...,0.0,0.69238,0.740648,0.068144,0.076104,gran_lung,30.0,endothelial,endothelial,nonimmune
2,6,4,40,0.890122,0.838072,0.302053,0.839182,0.795838,0.0,0.202693,...,0.0,0.564155,0.71543,0.0,0.058582,gran_lung,30.0,immune,CD16_CD14_Mono,myeloid
3,6,5,66,0.900242,0.861314,0.2376,0.797691,0.509005,0.0,0.241783,...,0.0,0.403058,0.738303,0.036695,0.06988,gran_lung,30.0,immune,CD8_T,lymphocyte
4,6,6,386,0.89171,0.880452,0.249988,0.910139,0.544552,0.0,0.283618,...,0.0,0.604402,0.688543,0.066717,0.087015,gran_lung,30.0,immune,CD4_T,lymphocyte
5,6,7,120,0.878638,0.867499,0.293679,0.897407,0.610435,0.0,0.202693,...,0.0,0.574829,0.625484,0.122214,0.107297,gran_lung,30.0,immune,CD8_T,lymphocyte
6,6,8,76,0.880438,0.882904,0.285634,0.943981,0.482122,0.0,0.458296,...,0.190363,0.577581,0.524681,0.062017,0.061433,gran_lung,30.0,immune,CD4_T,lymphocyte
7,6,9,99,0.882088,0.815346,0.240405,0.784573,0.527468,0.0,0.241783,...,0.0,0.414437,0.502245,0.024661,0.047939,gran_lung,30.0,immune,CD4_T,lymphocyte
8,6,10,96,0.864591,0.783798,0.188173,0.669566,0.0,0.0,0.248577,...,0.0,0.0,0.485491,0.0,0.071843,gran_lung,30.0,immune,imm_other,other
9,6,12,163,0.851431,0.792175,0.243957,0.740396,0.531803,0.0,0.151745,...,0.0,0.0,0.294504,0.083823,0.134935,gran_lung,30.0,immune,imm_other,other


### Preprocess the data

In [10]:
preprocess.preprocess_flowsom(img_xr, blur_factor=2)

TypeError: verify_in_list() takes 0 positional arguments but 2 were given