In [1]:
import os
from pathlib import Path

import numpy as np
import pandas as pd
import scanpy as sc
import tifffile
import torch
import torchvision.transforms.functional as TF
import yaml
from einops import rearrange

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
from mushroom.data.multiplex import extract_ome_tiff, get_ome_tiff_channels, make_pseudo

In [5]:
run_dir = '/data/estorrs/mushroom/data/examples/HT397B1_v1'
Path(run_dir).mkdir(parents=True, exist_ok=True)

specify filepaths

In [6]:
# spaceranger output directories
section_to_visium = {
    'HT397B1-U1': '/data/spatial_transcriptomics/spaceranger_outputs/breast/HT397B1-S1H3A1U1/',
    'HT397B1-U21': '/data/spatial_transcriptomics/spaceranger_outputs/breast/HT397B1-S1H3A1U21/',
}

# hi res H&E 
section_to_he = {
    'HT397B1-U1':  '/data/spatial_transcriptomics/highres_images/breast/A1_HT397B1-S1H3A1U1.tif',
    'HT397B1-U21': '/data/spatial_transcriptomics/highres_images/breast/B1_HT397B1-S1H3A1U21.tif',
}

# codex
section_to_multiplex = {
    'HT397B1-U2': '/data/multiplex_imaging/codex/htan/brca/031623_BRCA_HT397B1-U2/level_2/HT397B1_U2_03162023.ome.tiff',
    'HT397B1-U12': '/data/multiplex_imaging/codex/htan/brca/03172023_BRCA_HT397B1-U12/level_2/HT397B1_U12_03172023.ome.tiff',
    'HT397B1-U22': '/data/multiplex_imaging/codex/htan/brca/041223_BRCA_HT397B1-S1H3A1-U22/level_2/HT397B1_S1H1A3U22_04122023.ome.tiff',
    'HT397B1-U31': '/data/multiplex_imaging/codex/htan/brca/040623_BRCA_HT397B1-U31/level_2/HT397B1_S1H1A3U31_04062023.ome.tiff',
}

In [7]:
# order of tissue sections
order = [
    'HT397B1-U1',
    'HT397B1-U2',
    'HT397B1-U12',
    'HT397B1-U21',
    'HT397B1-U22',
    'HT397B1-U31'
]

writing images to same directory, they will then be in bigwarp to generate a ddf for each section

note that if multiple data types are present for one section (for example H&E and visium), then usually you only need to do registration on one of those data types (in this case we use H&E)

In [45]:
bigwarp_dir = os.path.join(run_dir, 'registration', 'bigwarp')
output_dir = os.path.join(bigwarp_dir, 'unregistered_tifs')
Path(output_dir).mkdir(parents=True, exist_ok=True)

we also downsample the images so registration goes faster. note that if you do this you'll need to upscale the ddf after registration

In [9]:
scale = .1

writing H&E

In [13]:
def rescale(x, scale=.1):
    x = rearrange(torch.tensor(x), 'h w c -> c h w')
    x = TF.resize(x, (int(x.shape[-2] * scale), int(x.shape[-1] * scale)))
    x = TF.convert_image_dtype(x, torch.uint8)
    x = rearrange(x.numpy(), 'c h w -> h w c')
    
    return x

In [14]:
for sid, filepath in section_to_he.items():
    x = tifffile.imread(filepath)
    x = rescale(x, scale=scale)
    
    idx = order.index(sid)
    
    tifffile.imwrite(os.path.join(output_dir, f's{idx}.tif'), x, compression='LZW')

writing multiplex pseudo-color images

In [18]:
# see what channels we are working with
get_ome_tiff_channels(next(iter(section_to_multiplex.values())))

  d = to_dict(os.fspath(xml), parser=parser, validate=validate)


['DAPI',
 'CD8',
 'Her2 (D)',
 'GATA3 (D)',
 'cKIT-(D)',
 'Pan-Cytokeratin',
 'GLUT1-(D)',
 'Podoplanin',
 'CD68 (D)',
 'HLA-DR',
 'Keratin 14',
 'FoxP3',
 'MGP-(D)',
 'CD20-(D)',
 'SMA-(D)',
 'Ki67',
 'Vimentin-(D)',
 'PR-(D)',
 'Bap1 (D)',
 'CD45 (D)',
 'ER',
 'CD31',
 'COX6c (D)',
 'CK19',
 'PLAT/tPA (D)']

In [26]:
cmap = {
    'DAPI': (0., 0., 1.),
    'Pan-Cytokeratin': (1., 0., 0.),
    'CD45 (D)': (0., 1., 0.),
    'SMA-(D)': (1., 1., 1.)
}

sid_to_pseudo = {}
for sid, filepath in section_to_multiplex.items():
    print(sid)
    d = extract_ome_tiff(filepath, channels=list(cmap.keys()))
    d = {channel:np.squeeze(rescale(np.expand_dims(img, -1), scale=scale))
         for channel, img in d.items()}
    
    pseudo = make_pseudo(d, cmap=cmap, contrast_pct=90.)
    pseudo /= pseudo.max()
    pseudo *= 255
    pseudo = pseudo.astype(np.uint8)
    
    idx = order.index(sid)

    tifffile.imwrite(os.path.join(output_dir, f's{idx}.tif'), pseudo, compression='LZW')

HT397B1-U2




dict_keys(['DAPI', 'Pan-Cytokeratin', 'SMA-(D)', 'CD45 (D)'])
HT397B1-U12




dict_keys(['DAPI', 'Pan-Cytokeratin', 'SMA-(D)', 'CD45 (D)'])
HT397B1-U22




dict_keys(['DAPI', 'Pan-Cytokeratin', 'SMA-(D)', 'CD45 (D)'])
HT397B1-U31




dict_keys(['DAPI', 'Pan-Cytokeratin', 'SMA-(D)', 'CD45 (D)'])


In [38]:
# will be pixels per micron for whichever data type is first in the sectioning experiment (as all data will be registered downstream of this section)
# in this case the first slice is visium st data, and we can get it from the anndata object.
# this value will vary based on data type
adata = sc.read_visium(section_to_visium[order[0]])
scalefactors = next(iter(adata.uns['spatial'].values()))['scalefactors']
registered_pixels_per_micron = scalefactors['spot_diameter_fullres'] / 65. # each spot is 65 microns wide
registered_pixels_per_micron

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


2.3223207975307805

In [46]:
metadata = {
    'data': {
        'he': section_to_he,
        'visium': section_to_visium,
        'multiplex': section_to_multiplex
    },
    'order': order,
    'ids': [f's{i}' for i in range(len(order))],
    'scale': scale,
    'registered_pixels_per_micron': registered_pixels_per_micron
}

In [48]:
yaml.safe_dump(metadata, open(os.path.join(bigwarp_dir, 'metadata.yaml'), 'w'))