In [24]:
import logging
import json
import os
import re
from pathlib import Path
from collections import Counter

import anndata
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import tifffile
from einops import rearrange
from skimage.measure import regionprops

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
import multiplex_imaging_pipeline.utils as utils
import multiplex_imaging_pipeline.ome as ome
import multiplex_imaging_pipeline.segmentation as seg
import multiplex_imaging_pipeline.multiplex_imaging_pipeline as mip
from multiplex_imaging_pipeline.spatial_features import DEFAULT_GATING_STRATEGY

2024-02-14 08:28:57.892569: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /diskmnt/Projects/Users/estorrs/miniconda3/envs/imaging_analysis_v2/lib/python3.10/site-packages/cv2/../../lib64:/usr/local/lib:/usr/lib64:/usr/local/lib:/usr/lib64:
2024-02-14 08:28:57.892608: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [63]:
parent_dir = f'/diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom'
specimen_metadata_fp = '/diskmnt/Projects/Users/estorrs/sandbox/specimen_tracking.tsv'

## Define inputs

In [64]:
def qc_channel_names():
    mapping = utils.R_CHANNEL_MAPPING
    for fp in metadata['filepath']:
        channels = utils.get_ome_tiff_channels(fp)
        for c in channels:
            assert c in mapping, f'channel {c} is not in channel mapping, to ensure all channel names match consider adding {c} to channel mapping. If you do not it may impact cell gating.'

        print(fp, [mapping[c] for c in channels])
        
def get_channel_thresholds():
    channel_thresholds = json.load(open(os.path.join(output_dir, 'channel_thresholds.json')))
    channel_thresholds = {k.split(' | ')[-1]:v for k, v in channel_thresholds.items()} # fps should be unique
    return channel_thresholds

###### HT413C1-Th1k4A1

In [104]:
case = 'HT413C1-Th1k4A1'
runs = [
    '2024-01-11_HT413C1_Th1k4A1',
    '2024-01-10_HT413C1_Th1k4A1'
]

metadata = pd.read_csv(specimen_metadata_fp, sep='\t')
metadata = metadata[[True if x in runs else False for x in metadata['omero_run_name']]]

output_dir = os.path.join(parent_dir, case)
spatial_dir = os.path.join(output_dir, 'spatial_features')
Path(spatial_dir).mkdir(parents=True, exist_ok=True)

In [105]:
qc_channel_names()

/diskmnt/primary/CODEX/HTAN/20240111_Human_mCRC_serial_sectrion_HT413C1_Th1k4A1_Slide_8/HT413C1_Th1k4A1_U10__20240111.ome.tiff ['DAPI', 'Podoplanin', 'Pan-Cytokeratin', 'HLA-DR', 'CD8', 'SMA', 'CD45', 'Ki67', 'Hep-Par-1', 'GLUT1', 'CK14', 'CD31', 'CCL2', 'E-cadherin', 'MUC2', 'CD11b', 'SOX9', 'CD20', 'FOXP3', 'P21', 'CK7', 'CD68', 'P16', 'CD4', 'PAI1', 'CD3e', 'CK8/18']
/diskmnt/primary/CODEX/HTAN/20240111_Human_mCRC_serial_sectrion_HT413C1_Th1k4A1_Slide_8/HT413C1_Th1k4A1_U26__20240111.ome.tiff ['DAPI', 'Podoplanin', 'Pan-Cytokeratin', 'HLA-DR', 'CD8', 'SMA', 'CD45', 'Ki67', 'Hep-Par-1', 'GLUT1', 'CK14', 'CD31', 'CCL2', 'E-cadherin', 'MUC2', 'CD11b', 'SOX9', 'CD20', 'FOXP3', 'P21', 'CK7', 'CD68', 'P16', 'CD4', 'PAI1', 'CD3e', 'CK8/18']
/diskmnt/primary/CODEX/HTAN/20240111_Human_mCRC_serial_sectrion_HT413C1_Th1k4A1_Slide_8/HT413C1_Th1k4A1_U37__20240111.ome.tiff ['DAPI', 'Podoplanin', 'Pan-Cytokeratin', 'HLA-DR', 'CD8', 'SMA', 'CD45', 'Ki67', 'Hep-Par-1', 'GLUT1', 'CK14', 'CD31', 'CCL2',

In [106]:
channel_thresholds = get_channel_thresholds()
channel_thresholds.keys(), next(iter(channel_thresholds.values()))

(dict_keys(['HT413C1_Th1k4A1_U20__20240110.ome.tiff', 'HT413C1_Th1k4A1_U32__20240110.ome.tiff', 'HT413C1_Th1k4A1_U3__20240110.ome.tiff', 'HT413C1_Th1k4A1_U41__20240110.ome.tiff', 'HT413C1_Th1k4A1_U10__20240111.ome.tiff', 'HT413C1_Th1k4A1_U26__20240111.ome.tiff', 'HT413C1_Th1k4A1_U37__20240111.ome.tiff', 'HT413C1_Th1k4A1_U42__20240111.ome.tiff']),
 {'DAPI': 0.0,
  'Podoplanin': 0.0,
  'PanCytokeratin': 19.0,
  'HLA-DR': 99.0,
  'CD8': 10.0,
  'SMA (D)': 19.0,
  'CD45': 99.0,
  'Ki67': 0.0,
  'Hep-Par-1 (D)': 40.0,
  'GLUT1 (D)': 0.0,
  'CK14': 0.0,
  'CD31': 10.0,
  'CCL2': 0.0,
  'E-cadherin': 20.0,
  'MUC2': 0.0,
  'CD11b': 10.0,
  'SOX9': 0.0,
  'CD20 (D)': 0.0,
  'FoxP3': 100.0,
  'P21 (D)': 0.0,
  'CK7': 149.0,
  'CD68 (D)': 60.0,
  'P16 (Dnew)': 0.0,
  'CD4 (D)': 29.0,
  'PAI1 (D)': 0.0,
  'CD3e (D)': 19.0,
  'CK8/18': 0.0})

In [107]:
val = .05
gating_strategy = [
    {'name': 'Hepatocyte',
        'strategy': [{'channel': 'Hep-Par-1', 'value': val, 'direction': 'pos'}]},
    {'name': 'Epithelial',
        'strategy': [{'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Epithelial',
        'strategy': [{'channel': 'E-cadherin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Treg',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'FOXP3', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD4 T cell',
        'strategy': [{'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD8 T cell',
        'strategy': [{'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD8', 'value': val, 'direction': 'pos'}]},
    {'name': 'T cell',
        'strategy': [{'channel': 'CD3e', 'value': val, 'direction': 'pos'}]},
    {'name': 'Dendritic',
        'strategy': [
            {'channel': 'CD11b', 'value': val, 'direction': 'pos'},
            {'channel': 'CD3e', 'value': val, 'direction': 'neg'}]},
    {'name': 'B cell',
        'strategy': [
            {'channel': 'CD20', 'value': val, 'direction': 'pos'},
            {'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'neg'},
            {'channel': 'E-cadherin', 'value': val, 'direction': 'neg'}]},
    {'name': 'Endothelial',
        'strategy': [{'channel': 'CD31', 'value': val, 'direction': 'pos'}]},
    {'name': 'Macrophage',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'}]},
    {'name': 'Macrophage - M2',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'},
            {'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD45', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD11b', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'HLA-DR', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'SMA', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'Podoplanin', 'value': val, 'direction': 'pos'}]}
]

###### HT397B1

In [79]:
case = 'HT397B1'
runs = [
    '2023-03-15_HT397B1',
    '2023-03-16_HT397B1',
    '2023-04-07_HT397B1',
    '2023-04-13_HT397B1'
]

metadata = pd.read_csv(specimen_metadata_fp, sep='\t')
metadata = metadata[[True if x in runs else False for x in metadata['omero_run_name']]]

output_dir = os.path.join(parent_dir, case)
spatial_dir = os.path.join(output_dir, 'spatial_features')
Path(spatial_dir).mkdir(parents=True, exist_ok=True)

metadata

Unnamed: 0,run_name,omero_run_name,date,case,specimen,bbox,filepath
165,HT397B1-S1H3A1-U22_Scan1,2023-04-13_HT397B1,20230413,HT397B1,HT397B1-S1H3A1-U22,,/diskmnt/primary/CODEX/HTAN/041223_BRCA_HT397B...
166,HT397B1-U2_Scan1,2023-03-15_HT397B1,20230315,HT397B1,HT397B1-S1H3A1-U2,,/diskmnt/primary/CODEX/HTAN/031623_BRCA_HT397B...
167,HT397B1-S1H3A1U31_Scan2,2023-04-07_HT397B1,20230407,HT397B1,HT397B1-S1H3A1-U31,,/diskmnt/primary/CODEX/HTAN/040623_BRCA_HT397B...
168,HT397B1-U12_Scan1,2023-03-16_HT397B1,20230316,HT397B1,HT397B1-S1H3A1-U12,,/diskmnt/primary/CODEX/HTAN/03172023_BRCA_HT39...


In [81]:
channel_thresholds = get_channel_thresholds()
channel_thresholds.keys(), next(iter(channel_thresholds.values()))

(dict_keys(['HT397B1-S1H3A1-U2__20230315.ome.tiff', 'HT397B1-S1H3A1-U12__20230316.ome.tiff', 'HT397B1-S1H3A1-U31__20230407.ome.tiff', 'HT397B1-S1H3A1-U22__20230413.ome.tiff']),
 {'DAPI': 0.0,
  'CD8': 20.0,
  'Her2 (D)': 20.0,
  'GATA3 (D)': 29.0,
  'cKIT-(D)': 20.0,
  'Pan-Cytokeratin': 30.0,
  'GLUT1-(D)': 0.0,
  'Podoplanin': 19.0,
  'CD68 (D)': 0.0,
  'HLA-DR': 19.0,
  'Keratin 14': 19.0,
  'FoxP3': 20.0,
  'MGP-(D)': 20.0,
  'CD20-(D)': 20.0,
  'SMA-(D)': 15.0,
  'Ki67': 15.0,
  'Vimentin-(D)': 19.0,
  'PR-(D)': 49.0,
  'Bap1 (D)': 0.0,
  'CD45 (D)': 20.0,
  'ER': 79.0,
  'CD31': 14.0,
  'COX6c (D)': 0.0,
  'CK19': 14.0,
  'PLAT/tPA (D)': 0.0})

In [82]:
qc_channel_names()

/diskmnt/primary/CODEX/HTAN/041223_BRCA_HT397B1-S1H3A1-U22/HT397B1-S1H3A1-U22__20230413.ome.tiff ['DAPI', 'CD8', 'Her2', 'GATA3', 'cKit', 'Pan-Cytokeratin', 'GLUT1', 'Podoplanin', 'CD68', 'HLA-DR', 'CK14', 'FOXP3', 'MGP', 'E-cadherin', 'SMA', 'CD3e', 'Ki67', 'Vimentin', 'CD11b', 'PR', 'Bap1', 'CD45', 'ER', 'CD31', 'COX6c', 'CK19', 'PLAT/tPA']
/diskmnt/primary/CODEX/HTAN/031623_BRCA_HT397B1-U2/HT397B1-S1H3A1-U2__20230315.ome.tiff ['DAPI', 'CD8', 'Her2', 'GATA3', 'cKit', 'Pan-Cytokeratin', 'GLUT1', 'Podoplanin', 'CD68', 'HLA-DR', 'CK14', 'FOXP3', 'MGP', 'CD20', 'SMA', 'Ki67', 'Vimentin', 'PR', 'Bap1', 'CD45', 'ER', 'CD31', 'COX6c', 'CK19', 'PLAT/tPA']
/diskmnt/primary/CODEX/HTAN/040623_BRCA_HT397B1-U31/HT397B1-S1H3A1-U31__20230407.ome.tiff ['DAPI', 'CD8', 'Her2', 'GATA3', 'cKit', 'Pan-Cytokeratin', 'GLUT1', 'Podoplanin', 'CD68', 'HLA-DR', 'CK14', 'FOXP3', 'MGP', 'SMA', 'CD3e', 'Ki67', 'Vimentin', 'PR', 'Bap1', 'CD45', 'ER', 'CD31', 'COX6c', 'CK19', 'PLAT/tPA']
/diskmnt/primary/CODEX/HTAN

In [94]:
val = .05
gating_strategy = [
    {'name': 'Epithelial',
        'strategy': [{'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Epithelial',
        'strategy': [{'channel': 'E-cadherin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Treg',
        'strategy': [
#             {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'FOXP3', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD4 T cell',
        'strategy': [
#             {'channel': 'CD3e', 'value': val, 'direction': 'pos'}, # no cd3 in some of these sections
            {'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD8 T cell',
        'strategy': [
#             {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD8', 'value': val, 'direction': 'pos'}]},
#     {'name': 'T cell',
#         'strategy': [{'channel': 'CD3e', 'value': val, 'direction': 'pos'}]},
#     {'name': 'Dendritic', # not all have cd11b
#         'strategy': [
#             {'channel': 'CD11b', 'value': val, 'direction': 'pos'},
# #             {'channel': 'CD3e', 'value': val, 'direction': 'neg'}
#         ]},
    {'name': 'Endothelial',
        'strategy': [{'channel': 'CD31', 'value': val, 'direction': 'pos'}]},
    {'name': 'B cell',
        'strategy': [
            {'channel': 'CD20', 'value': val, 'direction': 'pos'},
            {'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'neg'},
            {'channel': 'E-cadherin', 'value': val, 'direction': 'neg'}]},
    {'name': 'Macrophage',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'}]},
    {'name': 'Macrophage - M2',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'},
            {'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD45', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD11b', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'HLA-DR', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'SMA', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'Podoplanin', 'value': val, 'direction': 'pos'}]}
]

###### S18-9906

In [117]:
case = 'S18-9906'
runs = [
    '2023-10-06_S18-9906'
]

metadata = pd.read_csv(specimen_metadata_fp, sep='\t')
metadata = metadata[[True if x in runs else False for x in metadata['omero_run_name']]]

output_dir = os.path.join(parent_dir, case)
spatial_dir = os.path.join(output_dir, 'spatial_features')
Path(spatial_dir).mkdir(parents=True, exist_ok=True)

metadata

Unnamed: 0,run_name,omero_run_name,date,case,specimen,bbox,filepath
64,20231006_Prostate_Serial_S18-9906_slide_3,2023-10-06_S18-9906,20231006,S18-9906,S18-9906-U25,5000066960100018000,/diskmnt/primary/CODEX/HTAN/20231006_Prostate_...
65,20231006_Prostate_Serial_S18-9906_slide_3,2023-10-06_S18-9906,20231006,S18-9906,S18-9906-U18,3300050000200019000,/diskmnt/primary/CODEX/HTAN/20231006_Prostate_...
66,20231006_Prostate_Serial_S18-9906_slide_3,2023-10-06_S18-9906,20231006,S18-9906,S18-9906-U10,1600033000300020000,/diskmnt/primary/CODEX/HTAN/20231006_Prostate_...
67,20231006_Prostate_Serial_S18-9906_slide_3,2023-10-06_S18-9906,20231006,S18-9906,S18-9906-U3,16000200019000,/diskmnt/primary/CODEX/HTAN/20231006_Prostate_...


In [118]:
channel_thresholds = get_channel_thresholds()
channel_thresholds.keys(), next(iter(channel_thresholds.values()))

(dict_keys(['S18-9906-U10__20231006.ome.tiff', 'S18-9906-U18__20231006.ome.tiff', 'S18-9906-U25__20231006.ome.tiff', 'S18-9906-U3__20231006.ome.tiff']),
 {'DAPI': 0.0,
  'CK14': 25.0,
  'LYVE1': 0.0,
  'CD4': 19.0,
  'CK5': 0.0,
  'STEAP4': 0.0,
  'CD68': 100.0,
  'E-Cadherin': 20.0,
  'SMA (D)': 49.0,
  'CD45(D)': 20.0,
  'p63': 0.0,
  'Pan-Cytokeratin': 20.0,
  'HLA-DR': 100.0,
  'Podoplanin': 20.0,
  'Vimentin': 20.0,
  'CD3e': 19.0,
  'CD8': 10.0,
  'CK7': 50.0,
  'CD31 (D)': 10.0,
  'CD20 (D)': 0.0,
  'Foxp3': 15.0,
  'Ki67': 10.0,
  'AR': 0.0,
  'AMACR': 0.0})

In [119]:
qc_channel_names()

/diskmnt/primary/CODEX/HTAN/20231006_Prostate_Serial_S18-9906_slide_3/S18-9906-U25__20231006.ome.tiff ['DAPI', 'CK14', 'Lyve-1', 'CD4', 'CK5', 'STEAP4', 'CD68', 'E-cadherin', 'SMA', 'CD45', 'P63', 'Pan-Cytokeratin', 'HLA-DR', 'Podoplanin', 'Vimentin', 'CD3e', 'CD8', 'CK7', 'CD31', 'CD20', 'FOXP3', 'Ki67', 'AR', 'AMACR']
/diskmnt/primary/CODEX/HTAN/20231006_Prostate_Serial_S18-9906_slide_3/S18-9906-U18__20231006.ome.tiff ['DAPI', 'CK14', 'Lyve-1', 'CD4', 'CK5', 'STEAP4', 'CD68', 'E-cadherin', 'SMA', 'CD45', 'P63', 'Pan-Cytokeratin', 'HLA-DR', 'Podoplanin', 'Vimentin', 'CD3e', 'CD8', 'CK7', 'CD31', 'CD20', 'FOXP3', 'Ki67', 'AR', 'AMACR']
/diskmnt/primary/CODEX/HTAN/20231006_Prostate_Serial_S18-9906_slide_3/S18-9906-U10__20231006.ome.tiff ['DAPI', 'CK14', 'Lyve-1', 'CD4', 'CK5', 'STEAP4', 'CD68', 'E-cadherin', 'SMA', 'CD45', 'P63', 'Pan-Cytokeratin', 'HLA-DR', 'Podoplanin', 'Vimentin', 'CD3e', 'CD8', 'CK7', 'CD31', 'CD20', 'FOXP3', 'Ki67', 'AR', 'AMACR']
/diskmnt/primary/CODEX/HTAN/202310

In [120]:
val = .05
gating_strategy = [
    {'name': 'Epithelial',
        'strategy': [{'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Epithelial',
        'strategy': [{'channel': 'E-cadherin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Treg',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'FOXP3', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD4 T cell',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD8 T cell',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD8', 'value': val, 'direction': 'pos'}]},
    {'name': 'T cell',
        'strategy': [{'channel': 'CD3e', 'value': val, 'direction': 'pos'}]},
    {'name': 'Dendritic', 
        'strategy': [
            {'channel': 'CD11b', 'value': val, 'direction': 'pos'},
            {'channel': 'CD3e', 'value': val, 'direction': 'neg'}
        ]},
    {'name': 'B cell',
        'strategy': [
            {'channel': 'CD20', 'value': val, 'direction': 'pos'},
            {'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'neg'},
            {'channel': 'E-cadherin', 'value': val, 'direction': 'neg'}]},
    {'name': 'Endothelial',
        'strategy': [{'channel': 'CD31', 'value': val, 'direction': 'pos'}]},
    {'name': 'Macrophage',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'}]},
    {'name': 'Macrophage - M2',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'},
            {'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD45', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD11b', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'HLA-DR', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'SMA', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'Podoplanin', 'value': val, 'direction': 'pos'}]}
]

###### S18-25943-A7

In [131]:
case = 'S18-25943-A7'
runs = [
    '2023-11-22__S18-25943'
]

metadata = pd.read_csv(specimen_metadata_fp, sep='\t')
metadata = metadata[[True if x in runs else False for x in metadata['omero_run_name']]]

output_dir = os.path.join(parent_dir, case)
spatial_dir = os.path.join(output_dir, 'spatial_features')
Path(spatial_dir).mkdir(parents=True, exist_ok=True)

metadata

Unnamed: 0,run_name,omero_run_name,date,case,specimen,bbox,filepath
37,20231122_Human_pancreatic_cancer_S18-25943-A7U...,2023-11-22__S18-25943,20231122,S18-25943,S18-25943-U16,5300066960200024960,/diskmnt/primary/CODEX/HTAN/20231122_Human_pan...
38,20231122_Human_pancreatic_cancer_S18-25943-A7U...,2023-11-22__S18-25943,20231122,S18-25943,S18-25943-U13,3300053000100024000,/diskmnt/primary/CODEX/HTAN/20231122_Human_pan...
39,20231122_Human_pancreatic_cancer_S18-25943-A7U...,2023-11-22__S18-25943,20231122,S18-25943,S18-25943-U9,1200033000100024960,/diskmnt/primary/CODEX/HTAN/20231122_Human_pan...
40,20231122_Human_pancreatic_cancer_S18-25943-A7U...,2023-11-22__S18-25943,20231122,S18-25943,S18-25943-U2,10000100024960,/diskmnt/primary/CODEX/HTAN/20231122_Human_pan...


In [132]:
channel_thresholds = get_channel_thresholds()
channel_thresholds.keys(), next(iter(channel_thresholds.values()))

(dict_keys(['S18-25943-U13__20231122.ome.tiff', 'S18-25943-U16__20231122.ome.tiff', 'S18-25943-U2__20231122.ome.tiff', 'S18-25943-U9__20231122.ome.tiff']),
 {'DAPI': 0.0,
  'E-cadherin': 19.0,
  'SMA': 20.0,
  'PGC (D)': 0.0,
  'CFTR (D)': 0.0,
  'Vimentin': 20.0,
  'CD4': 20.0,
  'Podoplanin': 20.0,
  'REG3A (D)': 0.0,
  'CD68': 20.0,
  'CD8': 20.0,
  'LAMC2 (D)': 0.0,
  'CD45': 20.0,
  'Amylase (D)': 0.0,
  'INS (D)': 20.0,
  'CD11b (D)': 20.0,
  'CRP (D)': 0.0,
  'TFF1 (D)': 0.0,
  'HLA-DR': 50.0,
  'CD163': 0.0,
  'CD74 (D)': 0.0,
  'CD3e': 20.0,
  'Ki67': 0.0,
  'MUC5AC (D)': 0.0,
  'CD31 (D)': 19.0,
  'CD20 (D)': 0.0})

In [133]:
qc_channel_names()

/diskmnt/primary/CODEX/HTAN/20231122_Human_pancreatic_cancer_S18-25943-A7Us1_2__Us1_9__Us1_13__Us1_16/S18-25943-U16__20231122.ome.tiff ['DAPI', 'E-cadherin', 'SMA', 'PGC', 'CFTR', 'Vimentin', 'CD4', 'Podoplanin', 'REG3A', 'CD68', 'CD8', 'LAMC2', 'CD45', 'a-Amylase', 'INS', 'CD11b', 'CRP', 'TFF1', 'HLA-DR', 'CD163', 'CD74', 'CD3e', 'Ki67', 'MUC5AC', 'CD31', 'CD20']
/diskmnt/primary/CODEX/HTAN/20231122_Human_pancreatic_cancer_S18-25943-A7Us1_2__Us1_9__Us1_13__Us1_16/S18-25943-U13__20231122.ome.tiff ['DAPI', 'E-cadherin', 'SMA', 'PGC', 'CFTR', 'Vimentin', 'CD4', 'Podoplanin', 'REG3A', 'CD68', 'CD8', 'LAMC2', 'CD45', 'a-Amylase', 'INS', 'CD11b', 'CRP', 'TFF1', 'HLA-DR', 'CD163', 'CD74', 'CD3e', 'Ki67', 'MUC5AC', 'CD31', 'CD20']
/diskmnt/primary/CODEX/HTAN/20231122_Human_pancreatic_cancer_S18-25943-A7Us1_2__Us1_9__Us1_13__Us1_16/S18-25943-U9__20231122.ome.tiff ['DAPI', 'E-cadherin', 'SMA', 'PGC', 'CFTR', 'Vimentin', 'CD4', 'Podoplanin', 'REG3A', 'CD68', 'CD8', 'LAMC2', 'CD45', 'a-Amylase', 

In [134]:
val = .05
gating_strategy = [
    {'name': 'Islet',
        'strategy': [{'channel': 'INS', 'value': val, 'direction': 'pos'}]},
    {'name': 'Epithelial',
        'strategy': [{'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Epithelial',
        'strategy': [{'channel': 'E-cadherin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Treg',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'FOXP3', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD4 T cell',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD8 T cell',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD8', 'value': val, 'direction': 'pos'}]},
    {'name': 'T cell',
        'strategy': [{'channel': 'CD3e', 'value': val, 'direction': 'pos'}]},
    {'name': 'Dendritic', 
        'strategy': [
            {'channel': 'CD11b', 'value': val, 'direction': 'pos'},
            {'channel': 'CD3e', 'value': val, 'direction': 'neg'}
        ]},
    {'name': 'B cell',
        'strategy': [
            {'channel': 'CD20', 'value': val, 'direction': 'pos'},
            {'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'neg'},
            {'channel': 'E-cadherin', 'value': val, 'direction': 'neg'}]},
    {'name': 'Endothelial',
        'strategy': [{'channel': 'CD31', 'value': val, 'direction': 'pos'}]},
    {'name': 'Macrophage',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'}]},
    {'name': 'Macrophage - M2',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'},
            {'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD45', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD11b', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'HLA-DR', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'SMA', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'Podoplanin', 'value': val, 'direction': 'pos'}]}
]

###### S18-5591-C8

In [None]:
case = 'S18-5591-C8'
runs = [
    '2023-11-16__S18-5591',
    '2023-11-18__S18-5591'
]

metadata = pd.read_csv(specimen_metadata_fp, sep='\t')
metadata = metadata[[True if x in runs else False for x in metadata['omero_run_name']]]

output_dir = os.path.join(parent_dir, case)
spatial_dir = os.path.join(output_dir, 'spatial_features')
Path(spatial_dir).mkdir(parents=True, exist_ok=True)

metadata

In [None]:
channel_thresholds = get_channel_thresholds()
channel_thresholds.keys(), next(iter(channel_thresholds.values()))

In [None]:
qc_channel_names()

In [None]:
val = .05
gating_strategy = [
    {'name': 'Epithelial',
        'strategy': [{'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Epithelial',
        'strategy': [{'channel': 'E-cadherin', 'value': val, 'direction': 'pos'}]},
    {'name': 'Treg',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'FOXP3', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD4 T cell',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'CD8 T cell',
        'strategy': [
            {'channel': 'CD3e', 'value': val, 'direction': 'pos'},
            {'channel': 'CD8', 'value': val, 'direction': 'pos'}]},
    {'name': 'T cell',
        'strategy': [{'channel': 'CD3e', 'value': val, 'direction': 'pos'}]},
    {'name': 'Dendritic', 
        'strategy': [
            {'channel': 'CD11b', 'value': val, 'direction': 'pos'},
            {'channel': 'CD3e', 'value': val, 'direction': 'neg'}
        ]},
    {'name': 'B cell',
        'strategy': [
            {'channel': 'CD20', 'value': val, 'direction': 'pos'},
            {'channel': 'Pan-Cytokeratin', 'value': val, 'direction': 'neg'},
            {'channel': 'E-cadherin', 'value': val, 'direction': 'neg'}]},
    {'name': 'Endothelial',
        'strategy': [{'channel': 'CD31', 'value': val, 'direction': 'pos'}]},
    {'name': 'Macrophage',
        'strategy': [
            {'channel': 'CD68', 'value': val, 'direction': 'pos'},
            {'channel': 'HLA-DR', 'value': val, 'direction': 'pos'}
        ]},
    {'name': 'Macrophage - M2',
        'strategy': [{'channel': 'CD68', 'value': val, 'direction': 'pos'},
            {'channel': 'CD163', 'value': val, 'direction': 'pos'},
            {'channel': 'HLA-DR', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD45', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD11b', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD4', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'CD163', 'value': val, 'direction': 'pos'}]},
    {'name': 'Immune',
        'strategy': [{'channel': 'HLA-DR', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'SMA', 'value': val, 'direction': 'pos'}]},
    {'name': 'Fibroblast',
        'strategy': [{'channel': 'Podoplanin', 'value': val, 'direction': 'pos'}]}
]

## gate cells

In [135]:
fps = sorted(utils.listfiles(output_dir, f'cell_segmentation.tif$'))
specimen_to_ome = {specimen:fp for specimen, fp in zip(metadata['specimen'], metadata['filepath'])}
specimen_to_seg = {fp.split('/')[-1].replace('_cell_segmentation.tif', ''):fp for fp in fps}
set(specimen_to_ome.keys()) - set(specimen_to_seg.keys())

set()

In [136]:
assert sorted([fp.split('/')[-1] for fp in specimen_to_ome.values()]) == sorted(channel_thresholds.keys())

In [None]:
for specimen, ome_fp in specimen_to_ome.items():
    print(specimen)
    seg_fp = specimen_to_seg[specimen]
    thresholds = channel_thresholds[ome_fp.split('/')[-1]]
    
    df, a = mip.get_spatial_features(seg_fp, ome_fp, thresholds=thresholds, gating_strategy=gating_strategy)
    a.write_h5ad(os.path.join(spatial_dir, f'{specimen}_spatial_features.h5ad'))
    df.to_csv(os.path.join(spatial_dir, f'{specimen}_spatial_features.txt'), sep='\t', index=False)

2024-02-14 13:49:21,877 - extracting /diskmnt/primary/CODEX/HTAN/20231122_Human_pancreatic_cancer_S18-25943-A7Us1_2__Us1_9__Us1_13__Us1_16/S18-25943-U16__20231122.ome.tiff


S18-25943-U16


2024-02-14 13:50:22,442 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom/S18-25943-A7/segmentation/S18-25943-U16_cell_segmentation.tif
2024-02-14 13:50:25,483 - thresholds detected: [('DAPI', 0.0), ('E-cadherin', 19.0), ('SMA', 20.0), ('PGC (D)', 0.0), ('CFTR (D)', 0.0), ('Vimentin', 20.0), ('CD4', 20.0), ('Podoplanin', 20.0), ('REG3A (D)', 0.0), ('CD68', 20.0), ('CD8', 20.0), ('LAMC2 (D)', 0.0), ('CD45', 20.0), ('Amylase (D)', 0.0), ('INS (D)', 20.0), ('CD11b (D)', 20.0), ('CRP (D)', 0.0), ('TFF1 (D)', 0.0), ('HLA-DR', 50.0), ('CD163', 0.0), ('CD74 (D)', 0.0), ('CD3e', 20.0), ('Ki67', 0.0), ('MUC5AC (D)', 0.0), ('CD31 (D)', 19.0), ('CD20 (D)', 0.0)]
2024-02-14 13:51:08,168 - num cells: 283360
2024-02-14 13:55:17,748 - cell typing key is: fraction
2024-02-14 13:55:17,754 - gating cells with the following markers: ['CD11b', 'CD31', 'CD3e', 'CD45', 'CD4', 'CD68', 'CD8', 'E-cadherin', 'HLA-DR', 'INS', 'Podoplanin', 'SMA', 'Vimentin']
2024-02-14 13:55:18,071 -

S18-25943-U13


2024-02-14 13:56:58,293 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom/S18-25943-A7/segmentation/S18-25943-U13_cell_segmentation.tif
2024-02-14 13:57:03,266 - thresholds detected: [('DAPI', 0.0), ('E-cadherin', 19.0), ('SMA', 20.0), ('PGC (D)', 0.0), ('CFTR (D)', 0.0), ('Vimentin', 20.0), ('CD4', 20.0), ('Podoplanin', 20.0), ('REG3A (D)', 0.0), ('CD68', 20.0), ('CD8', 20.0), ('LAMC2 (D)', 0.0), ('CD45', 20.0), ('Amylase (D)', 0.0), ('INS (D)', 20.0), ('CD11b (D)', 20.0), ('CRP (D)', 0.0), ('TFF1 (D)', 0.0), ('HLA-DR', 50.0), ('CD163', 0.0), ('CD74 (D)', 0.0), ('CD3e', 20.0), ('Ki67', 0.0), ('MUC5AC (D)', 0.0), ('CD31 (D)', 19.0), ('CD20 (D)', 0.0)]
2024-02-14 13:57:42,709 - num cells: 478182
2024-02-14 14:02:53,688 - cell typing key is: fraction
2024-02-14 14:02:53,694 - gating cells with the following markers: ['CD11b', 'CD31', 'CD3e', 'CD45', 'CD4', 'CD68', 'CD8', 'E-cadherin', 'HLA-DR', 'INS', 'Podoplanin', 'SMA', 'Vimentin']
2024-02-14 14:02:54,162 -

S18-25943-U9


2024-02-14 14:04:58,721 - extracting /diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom/S18-25943-A7/segmentation/S18-25943-U9_cell_segmentation.tif
2024-02-14 14:05:03,819 - thresholds detected: [('DAPI', 0.0), ('E-cadherin', 19.0), ('SMA', 20.0), ('PGC (D)', 0.0), ('CFTR (D)', 0.0), ('Vimentin', 20.0), ('CD4', 20.0), ('Podoplanin', 20.0), ('REG3A (D)', 0.0), ('CD68', 20.0), ('CD8', 20.0), ('LAMC2 (D)', 0.0), ('CD45', 20.0), ('Amylase (D)', 0.0), ('INS (D)', 20.0), ('CD11b (D)', 20.0), ('CRP (D)', 0.0), ('TFF1 (D)', 0.0), ('HLA-DR', 50.0), ('CD163', 0.0), ('CD74 (D)', 0.0), ('CD3e', 20.0), ('Ki67', 0.0), ('MUC5AC (D)', 0.0), ('CD31 (D)', 19.0), ('CD20 (D)', 0.0)]
2024-02-14 14:05:44,362 - num cells: 431815


In [None]:
fps = sorted(utils.listfiles(spatial_dir, regex=r'_spatial_features.txt'))
fps

In [None]:
df = pd.read_csv(next(iter(fps)), sep='\t')
df

In [None]:
cell_types = sorted(set([x['name'] for x in gating_strategy]))
cell_types.append('Unlabeled')
cmap = sns.color_palette('tab20') + sns.color_palette('tab20b') + sns.color_palette('tab20c')
cell_type_to_color = {ct:c for ct, c in zip(cell_types, cmap)}

In [None]:
def format_axis(ax):
    ax.invert_yaxis()
    ax.axis('equal')
    ax.set_xticks([])
    ax.set_yticks([])
    ax.title.set_fontsize(8)
    ax.yaxis.label.set_fontsize(6)

nrows, ncols = len(fps), len(cell_types) + 1
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols, nrows))
for i, fp in enumerate(fps):
    ax = axs[i, 0]
    sid = fp.split('/')[-1].replace('_spatial_features.txt', '')
    df = pd.read_csv(fp, sep='\t')
    ax.scatter(df['col'], df['row'], c=[cell_type_to_color[x] for x in df['cell_type']], s=.1)
    ax.set_ylabel(sid, rotation=0)
    if i == 0:
        ax.set_title('cell_type', rotation=90)
    format_axis(ax)
    
    for j, ct in enumerate(cell_types):
        ax = axs[i, j + 1]
        f = df[df['cell_type']==ct]
        ax.scatter(f['col'], f['row'], s=.1)
        
        if i == 0:
            ax.set_title(ct, rotation=90)
        format_axis(ax)

plt.savefig(os.path.join(spatial_dir, f'cell_types.png'))

In [None]:
data, sids = [], []
for i, fp in enumerate(fps):
    sid = fp.split('/')[-1].replace('_spatial_features.txt', '')
    df = pd.read_csv(fp, sep='\t')
    counts = Counter(df['cell_type'])
    data.append([counts.get(ct, 0) for ct in cell_types])
    sids.append(sid)
count_df = pd.DataFrame(data=data, columns=cell_types, index=sids)
count_df

In [None]:
frac_df = count_df.copy()
frac_df /= count_df.values.sum(1, keepdims=True)
frac_df