In [1]:
import logging
import json
import os
import re
from pathlib import Path
from collections import Counter

import anndata
import pandas as pd
import numpy as np
import tifffile
from einops import rearrange
from skimage.measure import regionprops

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
import multiplex_imaging_pipeline.utils as utils
import multiplex_imaging_pipeline.ome as ome
import multiplex_imaging_pipeline.segmentation as seg
import multiplex_imaging_pipeline.multiplex_imaging_pipeline as mip
from multiplex_imaging_pipeline.spatial_features import DEFAULT_GATING_STRATEGY

2024-01-30 16:05:26.284866: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /diskmnt/Projects/Users/estorrs/miniconda3/envs/imaging_analysis_v2/lib/python3.10/site-packages/cv2/../../lib64:/usr/local/lib:/usr/lib64:/usr/local/lib:/usr/lib64:
2024-01-30 16:05:26.284905: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [11]:
case = 'HT413C1-Th1k4A1'
runs = [
    '2024-01-11_HT413C1_Th1k4A1',
    '2024-01-10_HT413C1_Th1k4A1'
]
output_dir = f'/diskmnt/Projects/Users/estorrs/imaging-analysis/results/serial_sections/{case}'
spatial_dir = os.path.join(output_dir, 'spatial_features')
Path(spatial_dir).mkdir(parents=True, exist_ok=True)

In [27]:
gating_strategy = DEFAULT_GATING_STRATEGY.copy()
gating_strategy

[{'name': 'Epithelial',
  'strategy': [{'channel': 'Pan-Cytokeratin',
    'value': 0.05,
    'direction': 'pos'}]},
 {'name': 'Epithelial',
  'strategy': [{'channel': 'E-cadherin', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'Treg',
  'strategy': [{'channel': 'CD3e', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'CD4', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'FOXP3', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'CD4 T cell',
  'strategy': [{'channel': 'CD3e', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'CD4', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'CD8 T cell',
  'strategy': [{'channel': 'CD3e', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'CD8', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'B cell',
  'strategy': [{'channel': 'CD20', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'Pan-Cytokeratin', 'value': 0.05, 'direction': 'neg'},
   {'channel': 'E-cadherin', 'value': 0.05, 'direction': 'neg'}]},
 {'name': 'Macrophage - M1',
  'st

In [28]:
gating_strategy.insert(2,
    {
        'name': 'Hepatocyte',
        'strategy': [{'channel': 'Hep-Par-1', 'value': .05, 'direction': 'pos'}]
    }
)
gating_strategy.insert(9,
    {
        'name': 'Immune',
        'strategy': [{'channel': 'HLA-DR', 'value': .05, 'direction': 'pos'}]
    }
)
gating_strategy.pop(6)
gating_strategy

[{'name': 'Epithelial',
  'strategy': [{'channel': 'Pan-Cytokeratin',
    'value': 0.05,
    'direction': 'pos'}]},
 {'name': 'Epithelial',
  'strategy': [{'channel': 'E-cadherin', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'Hepatocyte',
  'strategy': [{'channel': 'Hep-Par-1', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'Treg',
  'strategy': [{'channel': 'CD3e', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'CD4', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'FOXP3', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'CD4 T cell',
  'strategy': [{'channel': 'CD3e', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'CD4', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'CD8 T cell',
  'strategy': [{'channel': 'CD3e', 'value': 0.05, 'direction': 'pos'},
   {'channel': 'CD8', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'Macrophage - M1',
  'strategy': [{'channel': 'CD68', 'value': 0.05, 'direction': 'pos'}]},
 {'name': 'Macrophage - M2',
  'strategy': [{'channel': 'C

In [21]:
metadata = pd.read_csv('/diskmnt/Projects/Users/estorrs/sandbox/specimen_tracking.tsv', sep='\t')
metadata = metadata[[True if x in runs else False for x in metadata['omero_run_name']]]
metadata

Unnamed: 0,run_name,omero_run_name,date,case,specimen,bbox,filepath
6,20240111_Human_mCRC_serial_sectrion_HT413C1_Th...,2024-01-11_HT413C1_Th1k4A1,20240111,HT413C1_Th1k4A1,HT413C1_Th1k4A1_U10,700008000,/diskmnt/primary/CODEX/HTAN/20240111_Human_mCR...
7,20240111_Human_mCRC_serial_sectrion_HT413C1_Th...,2024-01-11_HT413C1_Th1k4A1,20240111,HT413C1_Th1k4A1,HT413C1_Th1k4A1_U26,110002000020009000,/diskmnt/primary/CODEX/HTAN/20240111_Human_mCR...
8,20240111_Human_mCRC_serial_sectrion_HT413C1_Th...,2024-01-11_HT413C1_Th1k4A1,20240111,HT413C1_Th1k4A1,HT413C1_Th1k4A1_U37,240003300010009000,/diskmnt/primary/CODEX/HTAN/20240111_Human_mCR...
9,20240111_Human_mCRC_serial_sectrion_HT413C1_Th...,2024-01-11_HT413C1_Th1k4A1,20240111,HT413C1_Th1k4A1,HT413C1_Th1k4A1_U42,360004500010009000,/diskmnt/primary/CODEX/HTAN/20240111_Human_mCR...
10,20240110_Human_mCRC_Serial_section_HT413C1_Th1...,2024-01-10_HT413C1_Th1k4A1,20240110,HT413C1_Th1k4A1,HT413C1_Th1k4A1_U3,900010008000,/diskmnt/primary/CODEX/HTAN/20240110_Human_mCR...
11,20240110_Human_mCRC_Serial_section_HT413C1_Th1...,2024-01-10_HT413C1_Th1k4A1,20240110,HT413C1_Th1k4A1,HT413C1_Th1k4A1_U20,120002200008000,/diskmnt/primary/CODEX/HTAN/20240110_Human_mCR...
12,20240110_Human_mCRC_Serial_section_HT413C1_Th1...,2024-01-10_HT413C1_Th1k4A1,20240110,HT413C1_Th1k4A1,HT413C1_Th1k4A1_U32,240003400008000,/diskmnt/primary/CODEX/HTAN/20240110_Human_mCR...
13,20240110_Human_mCRC_Serial_section_HT413C1_Th1...,2024-01-10_HT413C1_Th1k4A1,20240110,HT413C1_Th1k4A1,HT413C1_Th1k4A1_U41,370004600010009000,/diskmnt/primary/CODEX/HTAN/20240110_Human_mCR...


In [22]:
mapping = utils.R_CHANNEL_MAPPING
for fp in metadata['filepath']:
    channels = utils.get_ome_tiff_channels(fp)
    for c in channels:
        assert c in mapping, f'channel {c} is not in channel mapping, to ensure all channel names match consider adding {c} to channel mapping'
        
    print(fp, [mapping[c] for c in channels])

/diskmnt/primary/CODEX/HTAN/20240111_Human_mCRC_serial_sectrion_HT413C1_Th1k4A1_Slide_8/HT413C1_Th1k4A1_U10__20240111.ome.tiff ['DAPI', 'Podoplanin', 'Pan-Cytokeratin', 'HLA-DR', 'CD8', 'SMA', 'CD45', 'Ki67', 'Hep-Par-1', 'GLUT1', 'CK14', 'CD31', 'CCL2', 'E-cadherin', 'MUC2', 'CD11b', 'SOX9', 'CD20', 'FOXP3', 'P21', 'CK7', 'CD68', 'P16', 'CD4', 'PAI1', 'CD3e', 'CK8/18']
/diskmnt/primary/CODEX/HTAN/20240111_Human_mCRC_serial_sectrion_HT413C1_Th1k4A1_Slide_8/HT413C1_Th1k4A1_U26__20240111.ome.tiff ['DAPI', 'Podoplanin', 'Pan-Cytokeratin', 'HLA-DR', 'CD8', 'SMA', 'CD45', 'Ki67', 'Hep-Par-1', 'GLUT1', 'CK14', 'CD31', 'CCL2', 'E-cadherin', 'MUC2', 'CD11b', 'SOX9', 'CD20', 'FOXP3', 'P21', 'CK7', 'CD68', 'P16', 'CD4', 'PAI1', 'CD3e', 'CK8/18']
/diskmnt/primary/CODEX/HTAN/20240111_Human_mCRC_serial_sectrion_HT413C1_Th1k4A1_Slide_8/HT413C1_Th1k4A1_U37__20240111.ome.tiff ['DAPI', 'Podoplanin', 'Pan-Cytokeratin', 'HLA-DR', 'CD8', 'SMA', 'CD45', 'Ki67', 'Hep-Par-1', 'GLUT1', 'CK14', 'CD31', 'CCL2',

In [23]:
fps = sorted(utils.listfiles(output_dir, f'cell_segmentation.tif$'))
specimen_to_ome = {specimen:fp for specimen, fp in zip(metadata['specimen'], metadata['filepath'])}
specimen_to_seg = {fp.split('/')[-1].replace('_cell_segmentation.tif', ''):fp for fp in fps}
set(specimen_to_ome.keys()) - set(specimen_to_seg.keys())

set()

In [24]:
channel_thresholds = json.load(open(os.path.join(output_dir, 'channel_thresholds.json')))
channel_thresholds = {k.split(' | ')[-1]:v for k, v in channel_thresholds.items()} # fps should be unique
channel_thresholds.keys(), next(iter(channel_thresholds.values()))

(dict_keys(['HT413C1_Th1k4A1_U20__20240110.ome.tiff', 'HT413C1_Th1k4A1_U32__20240110.ome.tiff', 'HT413C1_Th1k4A1_U3__20240110.ome.tiff', 'HT413C1_Th1k4A1_U41__20240110.ome.tiff', 'HT413C1_Th1k4A1_U10__20240111.ome.tiff', 'HT413C1_Th1k4A1_U26__20240111.ome.tiff', 'HT413C1_Th1k4A1_U37__20240111.ome.tiff', 'HT413C1_Th1k4A1_U42__20240111.ome.tiff']),
 [0.0,
  0.0,
  19.0,
  99.0,
  10.0,
  19.0,
  99.0,
  0.0,
  40.0,
  0.0,
  0.0,
  10.0,
  0.0,
  20.0,
  0.0,
  10.0,
  0.0,
  0.0,
  100.0,
  0.0,
  149.0,
  60.0,
  0.0,
  29.0,
  0.0,
  19.0,
  0.0])

In [None]:
for specimen, ome_fp in specimen_to_ome.items():
    print(specimen)
    seg_fp = specimen_to_seg[specimen]
    
    channels = utils.get_ome_tiff_channels(ome_fp)
#     channels = [utils.R_CHANNEL_MAPPING[c] for c in channels]
    thresholds = {c:v for c, v in zip(channels, channel_thresholds[ome_fp.split('/')[-1]])}
    
    df, a = mip.get_spatial_features(seg_fp, ome_fp, thresholds=thresholds, gating_strategy=gating_strategy)
    a.write_h5ad(os.path.join(spatial_dir, f'{specimen}_spatial_features.h5ad'))
    df.to_csv(os.path.join(spatial_dir, f'{specimen}_spatial_features.txt'), sep='\t', index=False)

2024-01-30 16:46:37,766 - extracting /diskmnt/primary/CODEX/HTAN/20240111_Human_mCRC_serial_sectrion_HT413C1_Th1k4A1_Slide_8/HT413C1_Th1k4A1_U10__20240111.ome.tiff


HT413C1_Th1k4A1_U10


2024-01-30 16:46:44,613 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/serial_sections/HT413C1-Th1k4A1/segmentation/HT413C1_Th1k4A1_U10_cell_segmentation.tif
2024-01-30 16:46:44,949 - thresholds detected: [('DAPI', 0.0), ('Podoplanin', 0.0), ('PanCytokeratin', 19.0), ('HLA-DR', 99.0), ('CD8', 10.0), ('SMA (D)', 19.0), ('CD45', 99.0), ('Ki67', 0.0), ('Hep-Par-1 (D)', 40.0), ('GLUT1 (D)', 0.0), ('CK14', 0.0), ('CD31', 10.0), ('CCL2', 0.0), ('E-cadherin', 20.0), ('MUC2', 0.0), ('CD11b', 10.0), ('SOX9', 0.0), ('CD20 (D)', 0.0), ('FoxP3', 100.0), ('P21 (D)', 0.0), ('CK7', 149.0), ('CD68 (D)', 60.0), ('P16 (Dnew)', 0.0), ('CD4 (D)', 29.0), ('PAI1 (D)', 0.0), ('CD3e (D)', 19.0), ('CK8/18', 0.0)]
2024-01-30 16:46:48,801 - num cells: 58765
2024-01-30 16:47:23,619 - cell typing key is: fraction
2024-01-30 16:47:23,620 - gating cells with the following markers: ['CCL2', 'CD11b', 'CD20', 'CD31', 'CD3e', 'CD45', 'CD4', 'CD68', 'CD8', 'CK14', 'CK7', 'CK8/18', 'DAPI', 'E-cadher

HT413C1_Th1k4A1_U26


2024-01-30 16:47:35,938 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/serial_sections/HT413C1-Th1k4A1/segmentation/HT413C1_Th1k4A1_U26_cell_segmentation.tif
2024-01-30 16:47:36,341 - thresholds detected: [('DAPI', 0.0), ('Podoplanin', 0.0), ('PanCytokeratin', 19.0), ('HLA-DR', 99.0), ('CD8', 10.0), ('SMA (D)', 19.0), ('CD45', 99.0), ('Ki67', 0.0), ('Hep-Par-1 (D)', 40.0), ('GLUT1 (D)', 0.0), ('CK14', 0.0), ('CD31', 10.0), ('CCL2', 0.0), ('E-cadherin', 20.0), ('MUC2', 0.0), ('CD11b', 10.0), ('SOX9', 0.0), ('CD20 (D)', 0.0), ('FoxP3', 100.0), ('P21 (D)', 0.0), ('CK7', 149.0), ('CD68 (D)', 60.0), ('P16 (Dnew)', 0.0), ('CD4 (D)', 29.0), ('PAI1 (D)', 0.0), ('CD3e (D)', 19.0), ('CK8/18', 0.0)]
2024-01-30 16:47:40,519 - num cells: 63966
2024-01-30 16:48:17,805 - cell typing key is: fraction
2024-01-30 16:48:17,808 - gating cells with the following markers: ['CCL2', 'CD11b', 'CD20', 'CD31', 'CD3e', 'CD45', 'CD4', 'CD68', 'CD8', 'CK14', 'CK7', 'CK8/18', 'DAPI', 'E-cadher

HT413C1_Th1k4A1_U37


2024-01-30 16:48:32,935 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/serial_sections/HT413C1-Th1k4A1/segmentation/HT413C1_Th1k4A1_U37_cell_segmentation.tif
2024-01-30 16:48:33,357 - thresholds detected: [('DAPI', 0.0), ('Podoplanin', 0.0), ('PanCytokeratin', 19.0), ('HLA-DR', 99.0), ('CD8', 10.0), ('SMA (D)', 19.0), ('CD45', 99.0), ('Ki67', 0.0), ('Hep-Par-1 (D)', 40.0), ('GLUT1 (D)', 0.0), ('CK14', 0.0), ('CD31', 10.0), ('CCL2', 0.0), ('E-cadherin', 20.0), ('MUC2', 0.0), ('CD11b', 10.0), ('SOX9', 0.0), ('CD20 (D)', 0.0), ('FoxP3', 100.0), ('P21 (D)', 0.0), ('CK7', 149.0), ('CD68 (D)', 60.0), ('P16 (Dnew)', 0.0), ('CD4 (D)', 29.0), ('PAI1 (D)', 0.0), ('CD3e (D)', 19.0), ('CK8/18', 0.0)]
2024-01-30 16:48:37,295 - num cells: 54981
2024-01-30 16:49:09,831 - cell typing key is: fraction
2024-01-30 16:49:09,833 - gating cells with the following markers: ['CCL2', 'CD11b', 'CD20', 'CD31', 'CD3e', 'CD45', 'CD4', 'CD68', 'CD8', 'CK14', 'CK7', 'CK8/18', 'DAPI', 'E-cadher

HT413C1_Th1k4A1_U42


2024-01-30 16:49:22,835 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/serial_sections/HT413C1-Th1k4A1/segmentation/HT413C1_Th1k4A1_U42_cell_segmentation.tif
2024-01-30 16:49:23,282 - thresholds detected: [('DAPI', 0.0), ('Podoplanin', 0.0), ('PanCytokeratin', 19.0), ('HLA-DR', 99.0), ('CD8', 10.0), ('SMA (D)', 19.0), ('CD45', 99.0), ('Ki67', 0.0), ('Hep-Par-1 (D)', 40.0), ('GLUT1 (D)', 0.0), ('CK14', 0.0), ('CD31', 10.0), ('CCL2', 0.0), ('E-cadherin', 20.0), ('MUC2', 0.0), ('CD11b', 10.0), ('SOX9', 0.0), ('CD20 (D)', 0.0), ('FoxP3', 100.0), ('P21 (D)', 0.0), ('CK7', 149.0), ('CD68 (D)', 60.0), ('P16 (Dnew)', 0.0), ('CD4 (D)', 29.0), ('PAI1 (D)', 0.0), ('CD3e (D)', 19.0), ('CK8/18', 0.0)]
2024-01-30 16:49:27,171 - num cells: 52184
2024-01-30 16:49:57,661 - cell typing key is: fraction
2024-01-30 16:49:57,663 - gating cells with the following markers: ['CCL2', 'CD11b', 'CD20', 'CD31', 'CD3e', 'CD45', 'CD4', 'CD68', 'CD8', 'CK14', 'CK7', 'CK8/18', 'DAPI', 'E-cadher

HT413C1_Th1k4A1_U3


2024-01-30 16:50:09,908 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/serial_sections/HT413C1-Th1k4A1/segmentation/HT413C1_Th1k4A1_U3_cell_segmentation.tif
2024-01-30 16:50:10,311 - thresholds detected: [('DAPI', 0.0), ('Podoplanin', 0.0), ('PanCytokeratin', 19.0), ('HLA-DR', 99.0), ('CD8', 10.0), ('SMA (D)', 19.0), ('CD45', 99.0), ('Ki67', 0.0), ('Hep-Par-1 (D)', 40.0), ('GLUT1 (D)', 0.0), ('CK14', 0.0), ('CD31', 10.0), ('CCL2', 0.0), ('E-cadherin', 20.0), ('MUC2', 0.0), ('CD11b', 10.0), ('SOX9', 0.0), ('CD20 (D)', 0.0), ('FoxP3', 100.0), ('P21 (D)', 0.0), ('CK7', 149.0), ('CD68 (D)', 60.0), ('P16 (Dnew)', 0.0), ('CD4 (D)', 29.0), ('PAI1 (D)', 0.0), ('CD3e (D)', 19.0), ('CK8/18', 0.0)]
2024-01-30 16:50:15,349 - num cells: 70234
2024-01-30 16:50:56,413 - cell typing key is: fraction
2024-01-30 16:50:56,417 - gating cells with the following markers: ['CCL2', 'CD11b', 'CD20', 'CD31', 'CD3e', 'CD45', 'CD4', 'CD68', 'CD8', 'CK14', 'CK7', 'CK8/18', 'DAPI', 'E-cadheri

HT413C1_Th1k4A1_U20


2024-01-30 16:51:11,089 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/serial_sections/HT413C1-Th1k4A1/segmentation/HT413C1_Th1k4A1_U20_cell_segmentation.tif
2024-01-30 16:51:11,517 - thresholds detected: [('DAPI', 0.0), ('Podoplanin', 0.0), ('PanCytokeratin', 19.0), ('HLA-DR', 99.0), ('CD8', 10.0), ('SMA (D)', 19.0), ('CD45', 99.0), ('Ki67', 0.0), ('Hep-Par-1 (D)', 40.0), ('GLUT1 (D)', 0.0), ('CK14', 0.0), ('CD31', 10.0), ('CCL2', 0.0), ('E-cadherin', 20.0), ('MUC2', 0.0), ('CD11b', 10.0), ('SOX9', 0.0), ('CD20 (D)', 0.0), ('FoxP3', 100.0), ('P21 (D)', 0.0), ('CK7', 149.0), ('CD68 (D)', 60.0), ('P16 (Dnew)', 0.0), ('CD4 (D)', 29.0), ('PAI1 (D)', 0.0), ('CD3e (D)', 19.0), ('CK8/18', 0.0)]
2024-01-30 16:51:16,300 - num cells: 68068
2024-01-30 16:51:55,697 - cell typing key is: fraction
2024-01-30 16:51:55,701 - gating cells with the following markers: ['CCL2', 'CD11b', 'CD20', 'CD31', 'CD3e', 'CD45', 'CD4', 'CD68', 'CD8', 'CK14', 'CK7', 'CK8/18', 'DAPI', 'E-cadher

HT413C1_Th1k4A1_U32


2024-01-30 16:52:10,991 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/serial_sections/HT413C1-Th1k4A1/segmentation/HT413C1_Th1k4A1_U32_cell_segmentation.tif
2024-01-30 16:52:11,515 - thresholds detected: [('DAPI', 0.0), ('Podoplanin', 0.0), ('PanCytokeratin', 19.0), ('HLA-DR', 99.0), ('CD8', 10.0), ('SMA (D)', 19.0), ('CD45', 99.0), ('Ki67', 0.0), ('Hep-Par-1 (D)', 40.0), ('GLUT1 (D)', 0.0), ('CK14', 0.0), ('CD31', 10.0), ('CCL2', 0.0), ('E-cadherin', 20.0), ('MUC2', 0.0), ('CD11b', 10.0), ('SOX9', 0.0), ('CD20 (D)', 0.0), ('FoxP3', 100.0), ('P21 (D)', 0.0), ('CK7', 149.0), ('CD68 (D)', 60.0), ('P16 (Dnew)', 0.0), ('CD4 (D)', 29.0), ('PAI1 (D)', 0.0), ('CD3e (D)', 19.0), ('CK8/18', 0.0)]
2024-01-30 16:52:16,211 - num cells: 60658


In [30]:
1

1