In [1]:
import pickle
import json
import os
import sys
from copy import deepcopy
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import skimage
import tifffile
import yaml

In [2]:
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

In [3]:
%load_ext autoreload

In [4]:
%autoreload 2

In [21]:
from mushroom.mushroom import Mushroom, DEFAULT_CONFIG
import mushroom.utils as utils
import mushroom.visualization.utils as vis_utils
import mushroom.data.multiplex as multiplex
import mushroom.data.xenium as xenium
import mushroom.data.cosmx as cosmx

In [6]:
source_root = '/diskmnt/Projects/Users/estorrs/mushroom/data'
target_root = '/data/estorrs/mushroom/data'

In [7]:
run_dir = '/data/estorrs/mushroom/data/projects/submission_v1'

In [8]:
def alter_filesystem(config, source_root, target_root):
    for entry in config['sections']:
        for mapping in entry['data']:
            mapping['filepath'] = mapping['filepath'].replace(source_root, target_root)
        
    return config

In [114]:
case = 'HT206B1'
# case = 'HT413C1-Th1k4A1'

In [115]:
project_dir = f'/data/estorrs/mushroom/data/projects/submission_v1/{case}'

In [116]:
config = yaml.safe_load(open(os.path.join(project_dir, 'registered', 'metadata.yaml')))
config = alter_filesystem(config, source_root, target_root)
config

{'resolution': 1.0,
 'sections': [{'data': [{'dtype': 'xenium',
     'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s0_HT206B1-U1_xenium.h5ad'}],
   'position': 0,
   'sid': 'HT206B1-U1'},
  {'data': [{'dtype': 'multiplex',
     'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s1_HT206B1-U2_multiplex.ome.tiff'}],
   'position': 5,
   'sid': 'HT206B1-U2'},
  {'data': [{'dtype': 'he',
     'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s2_HT206B1-U4_he.tif'}],
   'position': 15,
   'sid': 'HT206B1-U4'},
  {'data': [{'dtype': 'multiplex',
     'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s3_HT206B1-U5_multiplex.ome.tiff'}],
   'position': 20,
   'sid': 'HT206B1-U5'},
  {'data': [{'dtype': 'xenium',
     'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s4_HT206B1-U8_xenium.h5ad'}],
   'position': 40,
   'sid': 'HT2

In [95]:
# tiling_size = 10 # in microns per pixel

In [108]:
from einops import rearrange
def write_imaris(config, dtype='multiplex', channel_names=None, tiling_size=10, drop=None):
    sections = [x for x in config['sections'] if x['data'][0]['dtype']==dtype]
    
    if drop is not None:
        sections = [x for x in sections if x['sid'] not in drop]
    
    print([x['sid'] for x in sections])
    
    fps = [x['data'][0]['filepath'] for x in sections]
    
    if dtype == 'multiplex':
        channels = multiplex.get_common_channels(fps)
    elif dtype == 'xenium':
        channels = xenium.get_common_channels(fps)
    elif dtype == 'cosmx':
        channels = cosmx.get_common_channels(fps)
    else:
        raise ValueError(f'{dtype} is not valid dtype')
        
    if channel_names is not None:
        present = [channel for channel in channel_names if channel in channels]
        assert len(present) == len(channel_names), f'{set(channel_names) - set(present)} not in all images'
        channels = channel_names
    print(channels)

    imgs = []
    target_size = None
    for fp in fps:
        
        if dtype == 'multiplex':
            channel_to_img = multiplex.extract_ome_tiff(fp, channels=channels, as_dict=True)
            img = np.stack([channel_to_img[c] for c in channels])
        elif dtype == 'xenium':
            adata = xenium.adata_from_xenium(fp)
            adata = adata[:, channels]
            img = xenium.to_multiplex(adata, tiling_size=tiling_size, method='grid')
            img = rearrange(img, 'h w c -> c h w')
        elif dtype == 'cosmx':
            adata = cosmx.adata_from_cosmx(fp)
            adata = adata[:, channels]
            img = xenium.to_multiplex(adata, tiling_size=tiling_size, method='grid')
            img = rearrange(img, 'h w c -> c h w')
        if target_size is None:
            target_size = img.shape[-2:]
        
        if img.shape[-2:] != target_size:
            img = utils.rescale(img, size=target_size, target_dtype=img.dtype, dim_order='c h w')
        
        print(img.shape)
        imgs.append(img)
    x = np.stack(imgs)
    print(x.shape, x.dtype)
    
    if x.dtype != np.uint8:
        x = x - x.min()
        x = x / x.max()
        x = x * 255
        x = x.astype(np.uint8)
    
    directory = Path(os.path.join(project_dir, 'imaris', dtype))
    directory.mkdir(parents=True, exist_ok=True)
    for c in range(x.shape[1]):
        print(c, channels[c])
        filepath = directory / f'{case}_C{c}.tif'
        with tifffile.TiffWriter(filepath, bigtiff=True) as tif:
            metadata={
                'axes': 'XYZCT',
                'PhysicalSizeX': config['resolution'],
                'PhysicalSizeXUnit': 'µm',
                'PhysicalSizeY': config['resolution'],
                'PhysicalSizeYUnit': 'µm',
            }
            tif.write(
                rearrange(x[:, c], 'z h w -> 1 1 z h w'),
                metadata=metadata,
                compression='LZW',
            )
    meta = {
        'channels': channels,
        'sections': [entry['sid'] for entry in sections]
    }
    yaml.safe_dump(meta, open(directory / 'metadata.yaml', 'w'))
    
    del(img)
    
    
    

In [99]:
write_imaris(config, dtype='multiplex')

['CCL2', 'CD11b', 'CD20 (D)', 'CD31', 'CD3e (D)', 'CD4 (D)', 'CD45', 'CD68 (D)', 'CD8', 'CK14', 'CK7', 'CK8/18', 'DAPI', 'E-cadherin', 'FoxP3', 'GLUT1 (D)', 'HLA-DR', 'Hep-Par-1 (D)', 'Ki67', 'MUC2', 'P16 (Dnew)', 'P21 (D)', 'PAI1 (D)', 'PanCytokeratin', 'Podoplanin', 'SMA (D)', 'SOX9']
(27, 5085, 3640)
(27, 5085, 3640)
(27, 5085, 3640)
(27, 5085, 3640)
(27, 5085, 3640)
(27, 5085, 3640)
(27, 5085, 3640)
(7, 27, 5085, 3640) uint8
0 CCL2
1 CD11b
2 CD20 (D)
3 CD31
4 CD3e (D)
5 CD4 (D)
6 CD45
7 CD68 (D)
8 CD8
9 CK14
10 CK7
11 CK8/18
12 DAPI
13 E-cadherin
14 FoxP3
15 GLUT1 (D)
16 HLA-DR
17 Hep-Par-1 (D)
18 Ki67
19 MUC2
20 P16 (Dnew)
21 P21 (D)
22 PAI1 (D)
23 PanCytokeratin
24 Podoplanin
25 SMA (D)
26 SOX9


In [110]:
genes = [
    'EPCAM', 'ACTA2', 'PDPN', 'PECAM1', 'CD8A', 'PTPRC', 'IL7R', 'CD44', 'FN1', 'ENTPD1', 'TGFB1', 'PTN', 'PLAT', 'CD68', 'CD163', 'LAG3', 'PDCD1', 'MGP', 'MS4A1', 'CD3D'
]
# genes = [
#     'EPCAM', 'ACTA2', 'PDPN', 'PECAM1', 'CD8A', 'PTPRC', 'IL7R', 'PTN', 'CD68', 'CD163', 'LAG3', 'PDCD1'
# ]

In [111]:
drop = ['HT413C1-Th1k4A1-U31']

In [112]:
write_imaris(config, dtype='xenium', channel_names=genes, tiling_size=20, drop=drop)

['HT413C1-Th1k4A1-U19', 'HT413C1-Th1k4A1-U2', 'HT413C1-Th1k4A1-U9', 'HT413C1-Th1k4A1-U25', 'HT413C1-Th1k4A1-U36']
['EPCAM', 'ACTA2', 'PDPN', 'PECAM1', 'CD8A', 'PTPRC', 'IL7R', 'CD44', 'FN1', 'ENTPD1', 'TGFB1', 'PTN', 'PLAT', 'CD68', 'CD163', 'LAG3', 'PDCD1', 'MGP', 'MS4A1', 'CD3D']
(20, 255, 183)
(20, 255, 183)
(20, 255, 183)
(20, 255, 183)
(20, 255, 183)
(5, 20, 255, 183) float64
0 EPCAM
1 ACTA2
2 PDPN
3 PECAM1
4 CD8A
5 PTPRC
6 IL7R
7 CD44
8 FN1
9 ENTPD1
10 TGFB1
11 PTN
12 PLAT
13 CD68
14 CD163
15 LAG3
16 PDCD1
17 MGP
18 MS4A1
19 CD3D


In [117]:
sections = [x for x in config['sections'] if x['data'][0]['dtype']=='multiplex']
sections

[{'data': [{'dtype': 'multiplex',
    'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s1_HT206B1-U2_multiplex.ome.tiff'}],
  'position': 5,
  'sid': 'HT206B1-U2'},
 {'data': [{'dtype': 'multiplex',
    'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s3_HT206B1-U5_multiplex.ome.tiff'}],
  'position': 20,
  'sid': 'HT206B1-U5'},
 {'data': [{'dtype': 'multiplex',
    'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s6_HT206B1-U10_multiplex.ome.tiff'}],
  'position': 50,
  'sid': 'HT206B1-U10'},
 {'data': [{'dtype': 'multiplex',
    'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s8_HT206B1-U13_multiplex.ome.tiff'}],
  'position': 65,
  'sid': 'HT206B1-U13'},
 {'data': [{'dtype': 'multiplex',
    'filepath': '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s11_HT206B1-U18_multiplex.ome.tiff'}],
  'position': 90,
  'sid': 'HT206B1-U

In [118]:
fps = [x['data'][0]['filepath'] for x in sections]
fps

['/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s1_HT206B1-U2_multiplex.ome.tiff',
 '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s3_HT206B1-U5_multiplex.ome.tiff',
 '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s6_HT206B1-U10_multiplex.ome.tiff',
 '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s8_HT206B1-U13_multiplex.ome.tiff',
 '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s11_HT206B1-U18_multiplex.ome.tiff',
 '/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/registered/s13_HT206B1-U21_multiplex.ome.tiff']

In [119]:
mpp = 1.
from einops import rearrange

In [120]:
channels = multiplex.get_common_channels(fps)
imgs = []
for fp in fps:
    channel_to_img = multiplex.extract_ome_tiff(fp, channels=channels, as_dict=True)
    img = np.stack([channel_to_img[c] for c in channels])
    img = img[:, 2000:3000, 2000:3000]
    imgs.append(img)
x = np.stack(imgs)
x.shape

(6, 33, 1000, 1000)

In [128]:
from ome_types import OME, model, to_xml

In [141]:


o = model.OME()
o.images.append(
    model.Image(
        id='Image:0',
        pixels=model.Pixels(
            dimension_order='XYCZT',
            size_c=len(channels),
            size_t=1,
            size_x=x.shape[-2],
            size_y=x.shape[-1],
            size_z=x.shape[0],
            type='uint8',
            big_endian=False,
            channels=[model.Channel(id=f'Channel:{i}', name=c) for i, c in enumerate(channels)],
            physical_size_x=config['resolution'],
            physical_size_y=config['resolution'],
            physical_size_x_unit='µm',
            physical_size_y_unit='µm'
        )
    )
)

im = o.images[0]
for i in range(len(im.pixels.channels)):
    for j in range(x.shape[0]):
        im.pixels.planes.append(model.Plane(the_c=i, the_t=0, the_z=j))
im.pixels.tiff_data_blocks.append(model.TiffData(plane_count=len(im.pixels.planes)))

o

OME(images=[<1 field_type>])

In [146]:
def write_HTAN_ome(output_fp, data, ome_model):
    with tifffile.TiffWriter(output_fp, ome=True, bigtiff=True) as out_tif:
        opts = {
            'compression': 'LZW',
        }
        out_tif.write(
#             rearrange(data, 'x y c z t -> t c y x z'),
            rearrange(data, 'x y c z t -> t z c y x'),
            metadata={'SamplesPerPixel': 1.0},
            **opts
        )
        xml_str = to_xml(ome_model)
        out_tif.overwrite_description(xml_str.encode())

In [147]:
directory = Path(os.path.join(project_dir, 'imaris', 'multiplex'))
directory.mkdir(parents=True, exist_ok=True)

write_HTAN_ome(
    directory / 'test.ome.tif',
    rearrange(x, 'z c h w -> w h c z 1'),
    o
)

In [133]:
directory

PosixPath('/data/estorrs/mushroom/data/projects/submission_v1/HT206B1/imaris/multiplex')

In [19]:
directory = Path(os.path.join(project_dir, 'imaris', 'multiplex'))
directory.mkdir(parents=True, exist_ok=True)

with tifffile.TiffWriter(filepath) as tif:
    metadata={
        'axes': 'ZYX',
        'PhysicalSizeX': mpp,
        'PhysicalSizeXUnit': 'µm',
        'PhysicalSizeY': mpp,
        'PhysicalSizeYUnit': 'µm',
    }
    tif.write(
        rearrange(x, 'z c h w -> z h w'),
        metadata=metadata,
        compression='LZW',
    )
meta = {
'channels': channels,
'sections': [entry['sid'] for entry in sections]
}
yaml.safe_dump(meta, open(directory / 'metadata.yaml', 'w'))

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


In [57]:
for i, c in enumerate(channels):
    print(i + 1, c)

1 BCA1 (D)
2 CD163
3 CD20
4 CD31
5 CD39
6 CD4 (D)
7 CD45 (D)
8 CD68
9 CD8
10 CK14 (D)
11 COX6C (D)
12 DAPI
13 E-cadherin
14 FOXP3
15 GATA3 (D)
16 Granzyme B
17 HER2 (D)
18 HIF1a
19 HLA-DR
20 Keratin 5
21 Ki67
22 MGP (D)
23 PD-1
24 PLAT/tPA (D)
25 PR (D)
26 Pan-Cytokeratin
27 Podoplanin (D)
28 SLC39A6 (D)
29 SMA (D)
30 TCF-1
31 TFF1 (D)
32 Vimentin
33 cd11b


In [None]:
def tile_img(adata, tiling_size, size):
    n_rows, n_cols = size[-2] // tiling_size + 1, size[-1] // tiling_size + 1

    pts = adata.obsm['spatial'][:, [1, 0]]

    img = np.zeros((n_rows, n_cols, adata.shape[1]))
    for r in range(n_rows):
        r1, r2 = r * tiling_size, (r + 1) * tiling_size
        row_mask = ((pts[:, 0] >= r1) & (pts[:, 0] < r2))
        row_adata, row_pts = adata[row_mask], pts[row_mask]
        for c in range(n_cols):
            c1, c2 = c * tiling_size, (c + 1) * tiling_size
            col_mask = ((row_pts[:, 1] >= c1) & (row_pts[:, 1] < c2))
            img[r, c] = row_adata[col_mask].X.sum(0)
    return img


In [None]:
sid_to_img = {
    k:tile_img(a, tiling_size, size) for k, a in sid_to_adata.items()
}