In [1]:
import logging
import json
import os
import re
from pathlib import Path
from collections import Counter

import anndata
import pandas as pd
import numpy as np
import tifffile
from einops import rearrange
from skimage.measure import regionprops

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
import multiplex_imaging_pipeline.utils as utils
import multiplex_imaging_pipeline.ome as ome
import multiplex_imaging_pipeline.segmentation as seg
import multiplex_imaging_pipeline.multiplex_imaging_pipeline as mip
from multiplex_imaging_pipeline.spatial_features import DEFAULT_GATING_STRATEGY

2024-02-13 13:55:41.880465: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /diskmnt/Projects/Users/estorrs/miniconda3/envs/imaging_analysis_v2/lib/python3.10/site-packages/cv2/../../lib64:/usr/local/lib:/usr/lib64:/usr/local/lib:/usr/lib64:
2024-02-13 13:55:41.880509: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [5]:
parent_dir = f'/diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom'

## define inputs

###### S18-9906

In [6]:
case = 'S18-9906'
runs = [
    '2023-10-06_S18-9906'
]
output_dir = os.path.join(parent_dir, case)

###### HT397B1

In [19]:
case = 'HT397B1'
runs = [
    '2023-03-15_HT397B1',
    '2023-03-16_HT397B1',
    '2023-04-07_HT397B1',
    '2023-04-13_HT397B1'
]
output_dir = os.path.join(parent_dir, case)

###### HT413C1-Th1k4A1

In [24]:
case = 'HT413C1-Th1k4A1'
runs = [
    '2024-01-11_HT413C1_Th1k4A1',
    '2024-01-10_HT413C1_Th1k4A1'
]
output_dir = os.path.join(parent_dir, case)

###### S18-25943-A7

In [31]:
case = 'S18-25943-A7'
runs = [
    '2023-11-22__S18-25943'
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### S18-5591-C8

In [46]:
case = 'S18-5591-C8'
runs = [
    '2023-11-16__S18-5591',
    '2023-11-18__S18-5591'
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### HT206B1-H2L1

In [62]:
case = 'HT206B1'
runs = [
    '2023-09-14_HT206B1',
    '2023-10-02_HT206B1'
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### C3L-00982

In [64]:
case = 'C3L-00982'
runs = [
    '2024-01-17_C3L-00982',
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### C3L-02551

In [80]:
case = 'C3L-02551'
runs = [
    '2024-01-18_C3L-02551',
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### C3L-00970

In [111]:
case = 'C3L-00970'
runs = [
    '2023-12-20_C3L-00970__C3N-00733__PA00002352',
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### C3L-01287

In [116]:
case = 'C3L-01287'
runs = [
    '2023-12-19_C3L-01287__C3N-01200',
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### HT413C1-K2

In [121]:
case = 'HT413C1-K2'
runs = [
    '2023-04-27_HT339B2__HT413C1__HT553P1__HT565B1',
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### HT339B2-H1

In [128]:
case = 'HT339B2-H1'
runs = [
    '2023-04-27_HT339B2__HT413C1__HT553P1__HT565B1',
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### HT553P1-H2

In [133]:
case = 'HT553P1-H2'
runs = [
    '2023-04-27_HT339B2__HT413C1__HT553P1__HT565B1',
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### HT565B1-H2

In [138]:
case = 'HT565B1-H2'
runs = [
    '2023-04-27_HT339B2__HT413C1__HT553P1__HT565B1',
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

###### HT448C1-Th1K1Fp1

In [151]:
case = 'HT448C1-Th1K1Fp1'
runs = [
    '2023-04-13_HT488C1',
    '2023-04-19_HT488C1'
]
output_dir = os.path.join(parent_dir, case)
Path(output_dir).mkdir(parents=True, exist_ok=True)

## segmentation

In [152]:
metadata = pd.read_csv('/diskmnt/Projects/Users/estorrs/sandbox/specimen_tracking.tsv', sep='\t')
metadata = metadata[[True if x in runs else False for x in metadata['omero_run_name']]]
# metadata = metadata[[True if case in x else False for x in metadata['specimen']]]
metadata

Unnamed: 0,run_name,omero_run_name,date,case,specimen,bbox,filepath
157,HT488C1-Th1K1Fp1_Scan1,2023-04-13_HT488C1,20230413,HT488C1,HT488C1-Th1K1Fp1,,/diskmnt/primary/CODEX/HTAN/041323_CRC_HT488C1...
169,HT488C1-Th1K1Fp1-U14_Scan1,2023-04-19_HT488C1,20230419,HT488C1,HT488C1-Th1K1Fp1-U14,,/diskmnt/primary/CODEX/HTAN/041823_CRC_HT488C1...


In [147]:
mapping = utils.R_CHANNEL_MAPPING
common = set()
for fp in metadata['filepath']:
    channels = utils.get_ome_tiff_channels(fp)
    for c in channels:
        assert c in mapping, f'channel {c} is not in channel mapping, to ensure all channel names match consider adding {c} to channel mapping'
        common.add(mapping[c])
        
    print(fp, [mapping[c] for c in channels])
sorted(common)

/diskmnt/primary/CODEX/HTAN/041323_CRC_HT488C1-Th1K1Fp1-U2/HT488C1-Th1K1Fp1__20230413.ome.tiff ['DAPI', 'Podoplanin', 'Pan-Cytokeratin', 'HLA-DR', 'CD8', 'SMA', 'CD45', 'Ki67', 'Vimentin', 'GLUT1', 'CK19', 'Hep-Par-1', 'CD3e', 'E-cadherin', 'CK7', 'CD11b', 'CD68', 'CD31', 'FOXP3']
/diskmnt/primary/CODEX/HTAN/041823_CRC_HT488C1-Th1K1Fp1-U14/HT488C1-Th1K1Fp1-U14__20230419.ome.tiff ['DAPI', 'Podoplanin', 'Pan-Cytokeratin', 'HLA-DR', 'CD8', 'SMA', 'CD45', 'Ki67', 'Vimentin', 'GLUT1', 'CK19', 'CK7', 'CD3e', 'E-cadherin', 'CD11b', 'CD31', 'CD4', 'CD68']


['CD11b',
 'CD31',
 'CD3e',
 'CD4',
 'CD45',
 'CD68',
 'CD8',
 'CK19',
 'CK7',
 'DAPI',
 'E-cadherin',
 'FOXP3',
 'GLUT1',
 'HLA-DR',
 'Hep-Par-1',
 'Ki67',
 'Pan-Cytokeratin',
 'Podoplanin',
 'SMA',
 'Vimentin']

In [148]:
nuclei_channels = ['DAPI']
membrane_channels = [
    'Pan-Cytokeratin', 'HLA-DR', 'SMA', 'CD4', 'CD45', 'Hep-Par-1', 'CD31', 'E-cadherin', 'CD68', 'CD3e']
# membrane_channels = [
#     'Pan-Cytokeratin', 'HLA-DR', 'SMA', 'CD4', 'CD45', 'CD31', 'E-cadherin', 'CD68', 'CD3e']
# membrane_channels = [
#     'Pan-Cytokeratin', 'HLA-DR', 'SMA', 'CD4', 'CD45', 'CD31', 'E-cadherin', 'CD3e']
# membrane_channels = [
#     'HLA-DR', 'SMA', 'CD4', 'CD45', 'CD31', 'E-cadherin', 'CD68', 'CD3e']
# membrane_channels = [
#     'Pan-Cytokeratin', 'HLA-DR', 'SMA', 'CD4', 'CD45', 'CD31', 'E-cadherin', 'CD68', 'CD3e']
# membrane_channels = [
#     'SMA', 'CD4', 'CD45', 'CD31', 'CD68', 'CD3e', 'AQP1', 'AQP2', 'CA9', 'IBA1', 'LRP2']
# membrane_channels = [
#     'Pan-Cytokeratin', 'SMA', 'CD4', 'CD45', 'CD31', 'CD68', 'CD3e', 'AQP1', 'AQP2', 'CA9', 'IBA1', 'LRP2']

In [150]:
seg_dir = os.path.join(output_dir, 'segmentation')
Path(seg_dir).mkdir(parents=True, exist_ok=True)
for specimen, fp in zip(metadata['specimen'], metadata['filepath']):
    print(specimen)
    prefix = os.path.join(seg_dir, specimen)
#     prefix = os.path.join(seg_dir, case)
    mip.segment_ome(fp, prefix, 25000, nuclei_channels, membrane_channels)

2024-02-19 09:38:49,358 - starting segmentation for /diskmnt/primary/CODEX/HTAN/041323_CRC_HT488C1-Th1K1Fp1-U2/HT488C1-Th1K1Fp1__20230413.ome.tiff
2024-02-19 09:38:49,360 - Checking for cached data


HT488C1-Th1K1Fp1


2024-02-19 09:38:49,652 - Checking MultiplexSegmentation-9.tar.gz against provided file_hash...
2024-02-19 09:38:49,653 - MultiplexSegmentation-9.tar.gz with hash a1dfbce2594f927b9112f23a0a1739e0 already available.
2024-02-19 09:38:49,654 - Extracting /home/estorrs/.deepcell/models/MultiplexSegmentation-9.tar.gz
2024-02-19 09:38:52,565 - Successfully extracted /home/estorrs/.deepcell/models/MultiplexSegmentation-9.tar.gz into /home/estorrs/.deepcell/models




2024-02-19 09:39:01,665 - No training configuration found in save file, so the model was *not* compiled. Compile it manually.




2024-02-19 09:57:30,121 - finished segmentation
2024-02-19 09:57:30,126 - writing /diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom/HT448C1-Th1K1Fp1/segmentation/HT488C1-Th1K1Fp1_nuclei_segmentation.tif
2024-02-19 09:57:30,911 - writing /diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom/HT448C1-Th1K1Fp1/segmentation/HT488C1-Th1K1Fp1_cell_segmentation.tif
2024-02-19 09:57:31,832 - starting segmentation for /diskmnt/primary/CODEX/HTAN/041823_CRC_HT488C1-Th1K1Fp1-U14/HT488C1-Th1K1Fp1-U14__20230419.ome.tiff
2024-02-19 09:57:31,835 - Checking for cached data


HT488C1-Th1K1Fp1-U14


2024-02-19 09:57:32,135 - Checking MultiplexSegmentation-9.tar.gz against provided file_hash...
2024-02-19 09:57:32,136 - MultiplexSegmentation-9.tar.gz with hash a1dfbce2594f927b9112f23a0a1739e0 already available.
2024-02-19 09:57:32,137 - Extracting /home/estorrs/.deepcell/models/MultiplexSegmentation-9.tar.gz
2024-02-19 09:57:35,076 - Successfully extracted /home/estorrs/.deepcell/models/MultiplexSegmentation-9.tar.gz into /home/estorrs/.deepcell/models




2024-02-19 09:57:44,917 - No training configuration found in save file, so the model was *not* compiled. Compile it manually.




2024-02-19 10:15:31,318 - finished segmentation
2024-02-19 10:15:31,321 - writing /diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom/HT448C1-Th1K1Fp1/segmentation/HT488C1-Th1K1Fp1-U14_nuclei_segmentation.tif
2024-02-19 10:15:31,990 - writing /diskmnt/Projects/Users/estorrs/imaging-analysis/results/mushroom/HT448C1-Th1K1Fp1/segmentation/HT488C1-Th1K1Fp1-U14_cell_segmentation.tif
