In [38]:
import logging
import os
import math
import shutil
import subprocess
from pathlib import Path
from collections import Counter

import tifffile
import scanpy as sc
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torchvision.transforms.functional as TF
import torch
from skimage.filters import gaussian
from skimage.morphology import binary_erosion, remove_small_objects, binary_dilation, label
from skimage.measure import regionprops_table, regionprops
from scipy.ndimage import binary_fill_holes
from skimage.segmentation import expand_labels
from skimage.exposure import rescale_intensity
from einops import rearrange


In [26]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
%autoreload 2

In [28]:
import multiplex_imaging_pipeline.utils as utils
from multiplex_imaging_pipeline.region_features import get_region_features

In [5]:
output_dir = '/diskmnt/Projects/Users/estorrs/imaging-analysis/results/brca_subtype/revision_v1'
Path(output_dir).mkdir(parents=True, exist_ok=True)

In [7]:
adata_fps = sorted(utils.listfiles(os.path.join(output_dir, 'spatial_features'), regex=r'.h5ad$'))
sample_to_adata = {fp.split('/')[-1].replace('.h5ad', ''):fp for fp in adata_fps}

In [8]:
ome_fps = sorted(utils.listfiles('/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan',
                         regex='/level_2/[^/]+.ome.tiff$'))
sample_to_ome = {fp.split('/')[-1].replace('.ome.tiff', ''):fp for fp in ome_fps}

In [11]:
sample_to_adata.keys()

dict_keys(['HT110B1_S1H4', 'HT171B1-S1H9A1-4_left_05122023', 'HT171B1-S1H9A1-4_right_05122023', 'HT171B1-S1H9A1-A4_left_05262023', 'HT171B1-S1H9A1-A4_right_05262023', 'HT206B1-S1H5_U1_07012023', 'HT206B1-S1H5_U2_07012023', 'HT206B1_H1', 'HT206B1_H1_06252022', 'HT206B1_S1H2L4_20221028', 'HT243B1-S1H4A4_04192023', 'HT243B1-S1H4A4_left_05122023', 'HT243B1-S1H4A4_left_05262023', 'HT243B1-S1H4A4_right_05122023', 'HT243B1-S1H4A4_right_05262023', 'HT243B1_S1H4', 'HT271B1-S1H6A5_04192023', 'HT271B1-S1H6A5_left_05122023', 'HT271B1-S1H6A5_left_05262023', 'HT271B1-S1H6A5_mid_05262023', 'HT271B1-S1H6A5_right_05122023', 'HT271B1-S1H6A5_right_05262023', 'HT271B1_S1H3A5_02172023', 'HT297B1-S1H1_U1_07012023', 'HT297B1-S1H1_U2_07012023', 'HT305B1_S1H1', 'HT308B1-S1H5A4_04192023', 'HT308B1-S1H5A4_left_05122023', 'HT308B1-S1H5A4_left_05262023', 'HT308B1-S1H5A4_right_05122023', 'HT308B1-S1H5A4_right_05262023', 'HT323B1_H1A1', 'HT323B1_H1A4', 'HT323B1_H1_08042022', 'HT323B1_H3', 'HT339B1_H1A1', 'HT339B1_H2

In [14]:
sorted(sample_to_ome.keys())

['20230120_PKD_K1301459',
 '20230120_PKD_K200452_2PB',
 '20230120_PKD_K2200446_4PB',
 'CRC112',
 'E16.5_female_section1_04272023',
 'E16.5_female_section2_04272023',
 'E16.5_male_section1_04272023',
 'E16.5_male_section2_04272023',
 'HS-18-11250-1B_06102023',
 'HS-18-6934-2A_06062023',
 'HS-19-14816-1J_06062023',
 'HS-19-6647-1B_06102023',
 'HT110B1-S1H4',
 'HT171B1-S1H1A1',
 'HT171B1-S1H1A4',
 'HT171B1-S1H9A1-4_04192023',
 'HT171B1-S1H9A1-4_left_05122023',
 'HT171B1-S1H9A1-4_right_05122023',
 'HT171B1-S1H9A1-A4_left_05262023',
 'HT171B1-S1H9A1-A4_right_05262023',
 'HT171B1_20221205',
 'HT171B1_S1H1A1',
 'HT171B1_S1H1A4',
 'HT171B1_S1H8A1_20221122',
 'HT206B1-H1',
 'HT206B1-S1H5_U1_07012023',
 'HT206B1-S1H5_U2_07012023',
 'HT206B1_Ctrl',
 'HT206B1_H1_06252022',
 'HT206B1_S1H2L4',
 'HT243B1-S1H4',
 'HT243B1-S1H4A4_04192023',
 'HT243B1-S1H4A4_left_05122023',
 'HT243B1-S1H4A4_left_05262023',
 'HT243B1-S1H4A4_right_05122023',
 'HT243B1-S1H4A4_right_05262023',
 'HT271B1-S1H3A5',
 'HT271B1-S

In [10]:
set(sample_to_adata.keys()).difference(set(sample_to_ome.keys()))

{'HT110B1_S1H4',
 'HT206B1_H1',
 'HT206B1_S1H2L4_20221028',
 'HT243B1_S1H4',
 'HT271B1_S1H3A5_02172023',
 'HT305B1_S1H1',
 'HT323B1_H1A1',
 'HT323B1_H1A4',
 'HT323B1_H1_08042022',
 'HT323B1_H3',
 'HT339B1_H1A1',
 'HT339B1_H2A1',
 'HT365B1_S1H1_02132023',
 'HT397B1_H2A2',
 'HT397B1_H3A1',
 'HT480B1_S1H2_R001',
 'HT480B1_S1H2_R002'}

In [15]:
name_map = {
    'HT110B1-S1H4': 'HT110B1_S1H4',
    'HT206B1-H1': 'HT206B1_H1',
    'HT206B1_S1H2L4': 'HT206B1_S1H2L4_20221028',
    'HT243B1-S1H4': 'HT243B1_S1H4',
    'HT271B1-S1H3A5': 'HT271B1_S1H3A5_02172023',
    'HT305B1-S1H1': 'HT305B1_S1H1',
    'HT323B1-H1A1': 'HT323B1_H1A1',
    'HT323B1-H1A4': 'HT323B1_H1A4',
    'HT323B1-H1-08042022': 'HT323B1_H1_08042022',
    'HT323B1-H3': 'HT323B1_H3',
    'HT339B1-H1A1': 'HT339B1_H1A1',
    'HT339B1-H2A1': 'HT339B1_H2A1',
    'HT365B1_S1H1': 'HT365B1_S1H1_02132023',
    'HT397B1-H2A2': 'HT397B1_H2A2',
    'HT397B1-H3A1': 'HT397B1_H3A1',
    'HT480B1-S1H2-R001': 'HT480B1_S1H2_R001',
    'HT480B1-S1H2-R002': 'HT480B1_S1H2_R002'
}

sample_to_ome = {name_map.get(k, k):v for k, v in sample_to_ome.items()}

In [17]:
def generate_mask(mask, sigma=1., min_area=10000):
    mask = gaussian(mask, sigma=sigma)
    mask = binary_fill_holes(mask)
    
    labeled = label(mask)
    props = regionprops(labeled)
    
    mask = labeled > 0
    for prop in props:
        if prop.area < min_area:
            r1, c1, r2, c2 = prop.bbox
            r1, c1 = max(r1 - 10, 0), max(c1 - 10, 0)
            r2, c2, = r2 + 10, c2 + 10
            mask[r1:r2, c1:c2][labeled[r1:r2, c1:c2]==prop.label] = 0
            
    return mask

In [18]:
out = os.path.join(output_dir, 'region_masks')
Path(out).mkdir(parents=True, exist_ok=True)

In [21]:
keep = ['Pan-Cytokeratin', 'E-cadherin']
sample_to_mask = {}
for sample in sample_to_adata.keys():
    print(sample)
    channel_to_img = utils.extract_ome_tiff(sample_to_ome[sample])
    channel_to_img = {utils.R_CHANNEL_MAPPING.get(k, k):img for k, img in channel_to_img.items()}

    a = sc.read_h5ad(sample_to_adata[sample])
    
    markers = [x.replace('_fraction', '') for x in a.var.index.to_list()]
    thresholds = a.uns['thresholds']
    mask = np.zeros_like(next(iter(channel_to_img.values())), dtype=bool)
    for c in keep:
        if c in markers:
            val = thresholds[markers.index(c)]
            if val > 0:
                m = channel_to_img[c] >= val
                mask |= m
        
    mask = generate_mask(mask, sigma=1., min_area=10000)
    sample_to_mask[sample] = mask
    
    tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)    

HT110B1_S1H4


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT171B1-S1H9A1-4_left_05122023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT171B1-S1H9A1-4_right_05122023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT171B1-S1H9A1-A4_left_05262023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT171B1-S1H9A1-A4_right_05262023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT206B1-S1H5_U1_07012023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT206B1-S1H5_U2_07012023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT206B1_H1


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT206B1_H1_06252022


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT206B1_S1H2L4_20221028


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT243B1-S1H4A4_04192023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT243B1-S1H4A4_left_05122023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT243B1-S1H4A4_left_05262023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT243B1-S1H4A4_right_05122023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT243B1-S1H4A4_right_05262023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT243B1_S1H4


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT271B1-S1H6A5_04192023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT271B1-S1H6A5_left_05122023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT271B1-S1H6A5_left_05262023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT271B1-S1H6A5_mid_05262023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT271B1-S1H6A5_right_05122023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT271B1-S1H6A5_right_05262023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT271B1_S1H3A5_02172023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT297B1-S1H1_U1_07012023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT297B1-S1H1_U2_07012023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT305B1_S1H1


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT308B1-S1H5A4_04192023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT308B1-S1H5A4_left_05122023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT308B1-S1H5A4_left_05262023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT308B1-S1H5A4_right_05122023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT308B1-S1H5A4_right_05262023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT323B1_H1A1


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT323B1_H1A4


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT323B1_H1_08042022


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT323B1_H3


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT339B1_H1A1


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT339B1_H2A1


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT339B2-H2_U1_07012023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT339B2-H2_U2_07012023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT365B1_S1H1_02132023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT397B1_H2A2


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT397B1_H3A1


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT397B1_S1H1A3U22_04122023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT397B1_S1H1A3U31_04062023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT397B1_U12_03172023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT397B1_U2_03162023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


HT480B1_S1H2_R001


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT480B1_S1H2_R002


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT565B1-H2_04262023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT565B1-S1H2_U1_07012023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)
  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


HT565B1-S1H2_U2_07012023


  tifffile.imsave(os.path.join(out, f'{sample}.tif'), mask)


In [31]:
mask_fps = sorted(utils.listfiles(os.path.join(output_dir, 'region_masks'), regex=r'.tif$'))
sample_to_mask = {fp.split('/')[-1].replace('.tif', ''):fp for fp in mask_fps}

In [32]:
a = set(sample_to_adata.keys())
b = set(sample_to_mask.keys())
c = set(sample_to_ome.keys())

len(a.intersection(b).intersection(c))

51

In [35]:
a = sc.read_h5ad(next(iter(sample_to_adata.values())))
a

AnnData object with n_obs × n_vars = 77651 × 26
    obs: 'row', 'col', 'bbox-r1', 'bbox-c1', 'bbox-r2', 'bbox-c2', 'area', 'DAPI_intensity', 'CD8_intensity', 'CD138_intensity', 'CD4_intensity', 'cKit_intensity', 'Pan-Cytokeratin_intensity', 'GLUT1_intensity', 'Podoplanin_intensity', 'CK17_intensity', 'HLA-DR_intensity', 'GATA3_intensity', 'CD36_intensity', 'MGP_intensity', 'CK14_intensity', 'SMA_intensity', 'CD68_intensity', 'CD20_intensity', 'Vimentin_intensity', 'CD3e_intensity', 'CD163_intensity', 'Ki67_intensity', 'PR_intensity', 'ER_intensity', 'P16_intensity', 'CK19_intensity', 'P21_intensity', 'default_annotation', 'cKit_annotation', 'GATA3_annotation', 'CK14_annotation', 'CK19_annotation', 'PR_annotation', 'ER_annotation', 'Her2_annotation', 'CD8_annotation', 'BCA1_annotation', 'CTLA4_annotation', 'CD68_annotation', 'CD163_annotation', 'Granzyme-B_annotation', 'pan-immune_annotation'
    uns: 'gating_strategy', 'thresholds'
    obsm: 'spatial'

In [36]:
annotation_keys = [c for c in a.obs.columns if '_annotation' in c]
annotation_keys

['default_annotation',
 'cKit_annotation',
 'GATA3_annotation',
 'CK14_annotation',
 'CK19_annotation',
 'PR_annotation',
 'ER_annotation',
 'Her2_annotation',
 'CD8_annotation',
 'BCA1_annotation',
 'CTLA4_annotation',
 'CD68_annotation',
 'CD163_annotation',
 'Granzyme-B_annotation',
 'pan-immune_annotation']

In [33]:
region_features_dir = os.path.join(output_dir, 'region_features')
region_features_masks_dir = os.path.join(output_dir, 'region_features_masks')
Path(region_features_dir).mkdir(parents=True, exist_ok=True)
Path(region_features_masks_dir).mkdir(parents=True, exist_ok=True)

In [40]:
logging.getLogger().setLevel(logging.INFO)

In [None]:
for sample in sample_to_adata.keys():
    print(sample)
    try:
        combined, labeled_dict = get_region_features(
            sample_to_adata[sample], sample_to_ome[sample], mask_fp=sample_to_mask[sample],
            annotation_keys=annotation_keys
        )
        combined.index.name = 'label'
        combined.to_csv(os.path.join(region_features_dir, f'{sample}.txt'), sep='\t', index=True)

        for name, img in labeled_dict.items():
            tifffile.imwrite(os.path.join(region_features_masks_dir, f'{sample}_{name}.tif'), img, compression='LZW')
    except:
        print(f'error in sample {sample}')

HT110B1_S1H4


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
INFO:root:reading mask from /diskmnt/Projects/Users/estorrs/imaging-analysis/results/brca_subtype/revision_v1/region_masks/HT110B1_S1H4.tif
INFO:root:generating region features for region
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fraction: {'CD8': 11.0, 'Pan-Cytokeratin': 10.0, 'Podoplanin': 10.0, 'CK17': 20.0, 'HLA-DR': 12.0, 'GATA3': 9.0, 'CK14': 19.0, 'SMA': 15.0, 'CD20': 12.0, 'CD3e': 15.0, 'CD163': 19.0, 'PR': 39.0, 'ER': 10.0, 'CK19': 10.0}
INFO:root:generating region features for ductal
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fraction: {'CD8': 11.0, 'Pan-Cytokeratin': 10.0, 'Podoplanin': 10.0, 'CK17': 20.0, 'HLA-DR': 12.0, 'GATA3': 9.0, 'CK14': 19.0

HT171B1-S1H9A1-4_left_05122023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
INFO:root:reading mask from /diskmnt/Projects/Users/estorrs/imaging-analysis/results/brca_subtype/revision_v1/region_masks/HT171B1-S1H9A1-4_left_05122023.tif
INFO:root:generating region features for region
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fraction: {'CD8': 10.0, 'Her2': 10.0, 'GATA3': 29.0, 'cKit': 29.0, 'Pan-Cytokeratin': 15.0, 'GLUT1': 6.0, 'Podoplanin': 10.0, 'HLA-DR': 12.0, 'CK14': 10.0, 'E-cadherin': 212.0, 'SMA': 10.0, 'CD3e': 10.0, 'CD11b': 10.0, 'PR': 10.0, 'ER': 10.0, 'CD31': 10.0, 'CK19': 10.0, 'CK5': 8.0, 'CTLA4': 10.0, 'CD4': 14.0, 'BCA1': 10.0}
INFO:root:generating region features for ductal
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fra

HT171B1-S1H9A1-4_right_05122023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
INFO:root:reading mask from /diskmnt/Projects/Users/estorrs/imaging-analysis/results/brca_subtype/revision_v1/region_masks/HT171B1-S1H9A1-4_right_05122023.tif
INFO:root:generating region features for region
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fraction: {'CD8': 10.0, 'Her2': 10.0, 'GATA3': 29.0, 'cKit': 29.0, 'Pan-Cytokeratin': 15.0, 'GLUT1': 6.0, 'Podoplanin': 10.0, 'HLA-DR': 12.0, 'CK14': 10.0, 'E-cadherin': 212.0, 'SMA': 10.0, 'CD3e': 10.0, 'CD11b': 10.0, 'PR': 10.0, 'ER': 10.0, 'CD31': 10.0, 'CK19': 10.0, 'CK5': 8.0, 'CTLA4': 10.0, 'CD4': 14.0, 'BCA1': 10.0}
INFO:root:generating region features for ductal
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fr

HT171B1-S1H9A1-A4_left_05262023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
INFO:root:reading mask from /diskmnt/Projects/Users/estorrs/imaging-analysis/results/brca_subtype/revision_v1/region_masks/HT171B1-S1H9A1-A4_left_05262023.tif
INFO:root:generating region features for region
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fraction: {'CD8': 10.0, 'Her2': 10.0, 'GATA3': 15.0, 'cKit': 10.0, 'Pan-Cytokeratin': 22.0, 'GLUT1': 13.0, 'Podoplanin': 10.0, 'HLA-DR': 15.0, 'CK14': 10.0, 'SMA': 10.0, 'PR': 19.0, 'ER': 10.0, 'CD31': 10.0, 'CK19': 10.0, 'CTLA4': 10.0, 'CD4': 12.0, 'BCA1': 11.0}
INFO:root:generating region features for ductal
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fraction: {'CD8': 10.0, 'Her2': 10.0, 'GATA3': 15.0, 'cKit': 10

HT171B1-S1H9A1-A4_right_05262023


  d = to_dict(os.fspath(xml), parser=parser, validate=validate)
INFO:root:reading mask from /diskmnt/Projects/Users/estorrs/imaging-analysis/results/brca_subtype/revision_v1/region_masks/HT171B1-S1H9A1-A4_right_05262023.tif
INFO:root:generating region features for region
INFO:root:generating cell fractions
INFO:root:generating marker intensities
INFO:root:generating marker fractions
INFO:root:Using the following threhsolds to calculate positive fraction: {'CD8': 10.0, 'Her2': 10.0, 'GATA3': 15.0, 'cKit': 10.0, 'Pan-Cytokeratin': 22.0, 'GLUT1': 13.0, 'Podoplanin': 10.0, 'HLA-DR': 15.0, 'CK14': 10.0, 'SMA': 10.0, 'PR': 19.0, 'ER': 10.0, 'CD31': 10.0, 'CK19': 10.0, 'CTLA4': 10.0, 'CD4': 12.0, 'BCA1': 11.0}


In [42]:
df = pd.read_csv(
    '/diskmnt/Projects/Users/estorrs/imaging-analysis/results/brca_subtype/revision_v1/region_features/HT110B1_S1H4.txt',
    sep='\t'
)
df

Unnamed: 0,region_r1,region_c1,region_r2,region_c2,region_area,region_perimeter,region_row,region_col,region_compactness,region_cell_fraction_default_annotation_CD8 T cell,...,expanded_marker_fraction_CK14,expanded_marker_fraction_CK17,expanded_marker_fraction_CK19,expanded_marker_fraction_ER,expanded_marker_fraction_GATA3,expanded_marker_fraction_HLA-DR,expanded_marker_fraction_PR,expanded_marker_fraction_Pan-Cytokeratin,expanded_marker_fraction_Podoplanin,expanded_marker_fraction_SMA
0,12393,1635,20321,8363,145261,29305.071068,14656.459545,3607.557810,0.002126,0.000000,...,0.000742,0.000505,0.000278,0.000701,0.000646,0.000584,0.000350,0.000486,0.000585,0.000712
1,13107,4566,13340,4692,19471,654.433550,13221.489446,4628.472035,0.571304,0.000000,...,0.000009,0.000009,0.008284,0.000167,0.000405,0.000009,0.000009,0.070268,0.000282,0.000009
2,13113,8356,13124,9323,10637,1952.000000,13118.000000,8839.000000,0.035081,,...,0.009743,0.004662,0.005114,0.015615,0.010070,0.007304,0.007596,0.007306,0.007438,0.009347
3,13833,9316,13844,10283,10637,1952.000000,13838.000000,9799.000000,0.035081,,...,0.009740,0.003962,0.005279,0.013265,0.010073,0.007304,0.007469,0.007306,0.007466,0.009446
4,15630,5176,16261,5799,183966,3108.049999,15945.783536,5494.163280,0.239316,0.006211,...,0.006817,0.000061,0.147570,0.016393,0.000098,0.081942,0.036244,0.200717,0.035409,0.025373
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302,25087,13207,25256,13425,21537,951.286363,25165.226448,13318.306449,0.299070,0.000000,...,0.018457,0.000068,0.161217,0.009170,0.000010,0.068815,0.011404,0.201754,0.000476,0.019603
303,25147,13659,25343,13880,21002,835.955411,25246.970574,13754.373012,0.377663,0.000000,...,0.012991,0.000005,0.080930,0.009343,0.000103,0.076452,0.000717,0.093894,0.005152,0.018870
304,25170,13846,25283,14111,16435,863.144228,25226.698509,13975.475814,0.277213,0.000000,...,0.009155,0.000015,0.095551,0.006074,0.000643,0.186165,0.000044,0.116409,0.000805,0.060292
305,25264,15223,25436,15400,19207,891.730014,25347.975790,15310.674702,0.303531,0.000000,...,0.000038,0.000011,0.019016,0.000131,0.000925,0.001691,0.000011,0.106157,0.005501,0.117307


In [43]:
list(df.columns)

['region_r1',
 'region_c1',
 'region_r2',
 'region_c2',
 'region_area',
 'region_perimeter',
 'region_row',
 'region_col',
 'region_compactness',
 'region_cell_fraction_default_annotation_CD8 T cell',
 'region_cell_fraction_default_annotation_Epithelial',
 'region_cell_fraction_default_annotation_Immune',
 'region_cell_fraction_default_annotation_Macrophage - M1',
 'region_cell_fraction_default_annotation_Stroma',
 'region_cell_fraction_default_annotation_Unlabeled',
 'region_cell_fraction_cKit_annotation_Unlabeled',
 'region_cell_fraction_GATA3_annotation_Unlabeled',
 'region_cell_fraction_GATA3_annotation_epithelial - GATA3 positive',
 'region_cell_fraction_CK14_annotation_Unlabeled',
 'region_cell_fraction_CK14_annotation_epithelial - CK14 positive',
 'region_cell_fraction_CK19_annotation_Unlabeled',
 'region_cell_fraction_CK19_annotation_epithelial - CK19 positive',
 'region_cell_fraction_PR_annotation_Unlabeled',
 'region_cell_fraction_PR_annotation_epithelial - PR positive',
 're

In [None]:
out = os.path.join(output_dir, 'pseudo_fullres')
Path(out).mkdir(parents=True, exist_ok=True)

In [None]:
cmap = {
    'Pan-Cytokeratin': (1., 0., 0.),
    'SMA': (0., 1., 0.),
    'DAPI': (0., 0., 1.),
    'Podoplanin': (1., 1., 1.),    
}

for sample, fp in sample_to_ome.items():
    print(sample)
    channel_to_img = utils.extract_ome_tiff(fp, channels=list(cmap.keys()), flexibility='loose')

    pseudo = utils.make_pseudo(channel_to_img, cmap=cmap, contrast_pct=95.)
    pseudo *= 255.
    pseudo = pseudo.astype(np.uint8)
    
    tifffile.imsave(os.path.join(out, f'{sample}.tif'), pseudo)    