In [1]:
import logging
import json
import os
import re
from pathlib import Path
from collections import Counter

import anndata
import pandas as pd
import numpy as np
import tifffile
from einops import rearrange
from skimage.measure import regionprops

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
import multiplex_imaging_pipeline.utils as utils
import multiplex_imaging_pipeline.ome as ome
import multiplex_imaging_pipeline.segmentation as seg
import multiplex_imaging_pipeline.multiplex_imaging_pipeline as mip
from multiplex_imaging_pipeline.spatial_features import DEFAULT_GATING_STRATEGY

2024-02-13 13:55:41.880465: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /diskmnt/Projects/Users/estorrs/miniconda3/envs/imaging_analysis_v2/lib/python3.10/site-packages/cv2/../../lib64:/usr/local/lib:/usr/lib64:/usr/local/lib:/usr/lib64:
2024-02-13 13:55:41.880509: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [5]:
parent_dir = f'/diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom'

## define inputs

###### S18-9906

In [6]:
case = 'S18-9906'
runs = [
    '2023-10-06_S18-9906'
]
output_dir = os.path.join(parent_dir, case)

###### HT397B1

In [19]:
case = 'HT397B1'
runs = [
    '2023-03-15_HT397B1',
    '2023-03-16_HT397B1',
    '2023-04-07_HT397B1',
    '2023-04-13_HT397B1'
]
output_dir = os.path.join(parent_dir, case)

###### HT413C1-Th1k4A1

In [24]:
case = 'HT413C1-Th1k4A1'
runs = [
    '2024-01-11_HT413C1_Th1k4A1',
    '2024-01-10_HT413C1_Th1k4A1'
]
output_dir = os.path.join(parent_dir, case)

###### S18-25943-A7

In [31]:
case = 'S18-25943-A7'
runs = [
    '2023-11-22__S18-25943'
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### S18-5591-C8

In [46]:
case = 'S18-5591-C8'
runs = [
    '2023-11-16__S18-5591',
    '2023-11-18__S18-5591'
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### HT206B1-H2L1

In [None]:
case = 'HT206B1'
runs = [
    '2023-09-14_HT206B1',
    '2023-10-02_HT206B1'
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

## segmentation

In [51]:
metadata = pd.read_csv('/diskmnt/Projects/Users/estorrs/sandbox/specimen_tracking.tsv', sep='\t')
metadata = metadata[[True if x in runs else False for x in metadata['omero_run_name']]]
metadata

Unnamed: 0,run_name,omero_run_name,date,case,specimen,bbox,filepath
41,20231118_Human_prostate_African_American_seria...,2023-11-18__S18-5591,20231118,S18-5591,S18-5591-U24,4400061200100027000,/diskmnt/primary/CODEX/HTAN/20231118_Human_pro...
42,20231118_Human_prostate_African_American_seria...,2023-11-18__S18-5591,20231118,S18-5591,S18-5591-U21,2800044000100026000,/diskmnt/primary/CODEX/HTAN/20231118_Human_pro...
43,20231118_Human_prostate_African_American_seria...,2023-11-18__S18-5591,20231118,S18-5591,S18-5591-U14,800028000200027000,/diskmnt/primary/CODEX/HTAN/20231118_Human_pro...
44,20231118_Human_prostate_African_American_seria...,2023-11-18__S18-5591,20231118,S18-5591,S18-5591-U6,8000300026000,/diskmnt/primary/CODEX/HTAN/20231118_Human_pro...
45,20231116_Human_prostate_African_American_seria...,2023-11-16__S18-5591,20231116,S18-5591,S18-5591-U23,3800056000027000,/diskmnt/primary/CODEX/HTAN/20231116_Human_pro...
46,20231116_Human_prostate_African_American_seria...,2023-11-16__S18-5591,20231116,S18-5591,S18-5591-U19,2300038000100025000,/diskmnt/primary/CODEX/HTAN/20231116_Human_pro...
47,20231116_Human_prostate_African_American_seria...,2023-11-16__S18-5591,20231116,S18-5591,S18-5591-U8,700023000200026000,/diskmnt/primary/CODEX/HTAN/20231116_Human_pro...
48,20231116_Human_prostate_African_American_seria...,2023-11-16__S18-5591,20231116,S18-5591,S18-5591-U2,7000300023000,/diskmnt/primary/CODEX/HTAN/20231116_Human_pro...


In [54]:
mapping = utils.R_CHANNEL_MAPPING
common = set()
for fp in metadata['filepath']:
    channels = utils.get_ome_tiff_channels(fp)
    for c in channels:
        assert c in mapping, f'channel {c} is not in channel mapping, to ensure all channel names match consider adding {c} to channel mapping'
        common.add(mapping[c])
        
    print(fp, [mapping[c] for c in channels])
sorted(common)

/diskmnt/primary/CODEX/HTAN/20231118_Human_prostate_African_American_serial_S18_5591_Slide_6/S18-5591-U24__20231118.ome.tiff ['DAPI', 'CK14', 'Lyve-1', 'CD4', 'CK5', 'STEAP4', 'CD68', 'E-cadherin', 'SMA', 'CD45', 'P63', 'Pan-Cytokeratin', 'HLA-DR', 'Podoplanin', 'Vimentin', 'CD3e', 'CD8', 'CK7', 'CD31', 'CD20', 'CK8/18', 'Ki67', 'AR', 'AMACR']
/diskmnt/primary/CODEX/HTAN/20231118_Human_prostate_African_American_serial_S18_5591_Slide_6/S18-5591-U21__20231118.ome.tiff ['DAPI', 'CK14', 'Lyve-1', 'CD4', 'CK5', 'STEAP4', 'CD68', 'E-cadherin', 'SMA', 'CD45', 'P63', 'Pan-Cytokeratin', 'HLA-DR', 'Podoplanin', 'Vimentin', 'CD3e', 'CD8', 'CK7', 'CD31', 'CD20', 'CK8/18', 'Ki67', 'AR', 'AMACR']
/diskmnt/primary/CODEX/HTAN/20231118_Human_prostate_African_American_serial_S18_5591_Slide_6/S18-5591-U14__20231118.ome.tiff ['DAPI', 'CK14', 'Lyve-1', 'CD4', 'CK5', 'STEAP4', 'CD68', 'E-cadherin', 'SMA', 'CD45', 'P63', 'Pan-Cytokeratin', 'HLA-DR', 'Podoplanin', 'Vimentin', 'CD3e', 'CD8', 'CK7', 'CD31', 'CD

['AMACR',
 'AR',
 'CD20',
 'CD31',
 'CD3e',
 'CD4',
 'CD45',
 'CD68',
 'CD8',
 'CK14',
 'CK5',
 'CK7',
 'CK8/18',
 'DAPI',
 'E-cadherin',
 'HLA-DR',
 'Ki67',
 'Lyve-1',
 'P63',
 'Pan-Cytokeratin',
 'Podoplanin',
 'SMA',
 'STEAP4',
 'Vimentin']

In [55]:
nuclei_channels = ['DAPI']
# membrane_channels = [
#     'Pan-Cytokeratin', 'HLA-DR', 'SMA', 'CD4', 'CD45', 'Hep-Par-1', 'CD31', 'E-cadherin', 'CD68', 'CD3e']
# membrane_channels = [
#     'Pan-Cytokeratin', 'HLA-DR', 'SMA', 'CD4', 'CD45', 'CD31', 'E-cadherin', 'CD68', 'CD3e']
# membrane_channels = [
#     'Pan-Cytokeratin', 'HLA-DR', 'SMA', 'CD4', 'CD45', 'CD31', 'E-cadherin', 'CD3e']
# membrane_channels = [
#     'HLA-DR', 'SMA', 'CD4', 'CD45', 'CD31', 'E-cadherin', 'CD68', 'CD3e']
membrane_channels = [
    'Pan-Cytokeratin', 'HLA-DR', 'SMA', 'CD4', 'CD45', 'CD31', 'E-cadherin', 'CD68', 'CD3e']

In [None]:
seg_dir = os.path.join(output_dir, 'segmentation')
Path(seg_dir).mkdir(parents=True, exist_ok=True)
for specimen, fp in zip(metadata['specimen'], metadata['filepath']):
    print(specimen)
    prefix = os.path.join(seg_dir, specimen)
    mip.segment_ome(fp, prefix, 25000, nuclei_channels, membrane_channels)

2024-02-14 13:07:00,167 - starting segmentation for /diskmnt/primary/CODEX/HTAN/20231118_Human_prostate_African_American_serial_S18_5591_Slide_6/S18-5591-U24__20231118.ome.tiff
2024-02-14 13:07:00,169 - Checking for cached data


S18-5591-U24


2024-02-14 13:07:00,461 - Checking MultiplexSegmentation-9.tar.gz against provided file_hash...
2024-02-14 13:07:00,462 - MultiplexSegmentation-9.tar.gz with hash a1dfbce2594f927b9112f23a0a1739e0 already available.
2024-02-14 13:07:00,463 - Extracting /home/estorrs/.deepcell/models/MultiplexSegmentation-9.tar.gz
2024-02-14 13:07:03,274 - Successfully extracted /home/estorrs/.deepcell/models/MultiplexSegmentation-9.tar.gz into /home/estorrs/.deepcell/models




2024-02-14 13:07:10,824 - No training configuration found in save file, so the model was *not* compiled. Compile it manually.
2024-02-14 13:11:55,535 - spliting into nrows: 1, ncols: 2
2024-02-14 13:11:55,539 - 0 0
2024-02-14 13:11:55,540 - window: 0, 25000, 0, 25000
