In [40]:
import re
import os

import tifffile
from tifffile import TiffFile
import scanpy as sc
import pandas as pd
import numpy as np
import anndata
import matplotlib.pyplot as plt
import mgitools.os_helpers as os_helpers

from skimage.io import imread
from skimage import exposure


In [20]:
from imctools.io.mcd.mcdparser import McdParser

In [17]:
def normalize_sample_name(name):
    name = name.replace(' ', '_')
    return name

In [36]:
# !pip install ome-types

In [297]:
batch_folder = 'b1_01042020'

## generate ome.tiff from hyperion .mcd

In [277]:
fps = os_helpers.listfiles(f'../data/hyperion/{batch_folder}/mcd/')
# fps = [fp for fp in fps if 'HT056' in fp]
sample_fp_tups = [(normalize_sample_name(fp.split('/')[-1].replace('.mcd', '')), fp) for fp in fps]
sample_fp_tups

[('HT122P1_S1H4_090320',
  '../data/hyperion/b3_09012020/mcd/HT122P1 S1H4 090320.mcd'),
 ('HT168P1_S1H4_091420',
  '../data/hyperion/b3_09012020/mcd/HT168P1 S1H4 091420.mcd'),
 ('JH_hu_spleen_090320',
  '../data/hyperion/b3_09012020/mcd/JH hu spleen 090320.mcd'),
 ('Human_spleen_091120',
  '../data/hyperion/b3_09012020/mcd/Human spleen 091120.mcd'),
 ('HT168P1_S1H3_091420',
  '../data/hyperion/b3_09012020/mcd/HT168P1 S1H3 091420.mcd'),
 ('HT122P1_S1H9_090320',
  '../data/hyperion/b3_09012020/mcd/HT122P1 S1H9 090320.mcd'),
 ('HT122P1_S1H5_090320',
  '../data/hyperion/b3_09012020/mcd/HT122P1 S1H5 090320.mcd'),
 ('HT122P1_S1H3_L1_L4_090320',
  '../data/hyperion/b3_09012020/mcd/HT122P1 S1H3 L1 L4 090320.mcd'),
 ('HT168P1_S1H2', '../data/hyperion/b3_09012020/mcd/HT168P1 S1H2.mcd'),
 ('HT122P1_S1H3_L1_L4_rescan_092820',
  '../data/hyperion/b3_09012020/mcd/HT122P1 S1H3 L1 L4 rescan 092820.mcd'),
 ('HT123P1_S1H5_091120',
  '../data/hyperion/b3_09012020/mcd/HT123P1 S1H5 091120.mcd')]

In [278]:
out_dir = f'../data/hyperion/{batch_folder}/ome-tiff'
from pathlib import Path
Path(out_dir).mkdir(exist_ok=True, parents=True)

In [279]:
def mcd_to_ome(sample_id, fp, out_dir):
    """reads .mcd file and return ome.tiff with HTAN required metadata"""
    parser = McdParser(fp)
    session = parser.session
    # Get all acquisition IDs
    ids = parser.session.acquisition_ids
    
    for a_id in ids:
#         print(fp, a_id)
        try:
            ac_data = parser.get_acquisition_data(a_id)
            metadata = ac_data.acquisition.metadata
            roi = metadata['Description'].replace(' ', '_')
            print(f'{sample_id}_{roi}.ome.tiff')
            ac_data.save_ome_tiff(os.path.join(out_dir, f'{sample_id}_{roi}.ome.tiff'))
        except ValueError:
            print(f'error loading data for {sample_id}, {fp}, {a_id}')
        
        

In [280]:
for s, fp in sample_fp_tups:
    mcd_to_ome(s, fp, out_dir)

HT122P1_S1H4_090320_ROI_001.ome.tiff
HT122P1_S1H4_090320_ROI_002.ome.tiff
HT168P1_S1H4_091420_ROI_001.ome.tiff
HT168P1_S1H4_091420_ROI_002.ome.tiff
JH_hu_spleen_090320_ROI_001.ome.tiff
Human_spleen_091120_ROI_001.ome.tiff
HT168P1_S1H3_091420_ROI_L1.ome.tiff
HT168P1_S1H3_091420_ROI_L4.ome.tiff
HT168P1_S1H3_091420_ROI_L1_full.ome.tiff
HT122P1_S1H9_090320_ROI_L1.ome.tiff
HT122P1_S1H9_090320_ROI_L4.ome.tiff
HT122P1_S1H5_090320_ROI_L1.ome.tiff
HT122P1_S1H5_090320_ROI_L4.ome.tiff
HT122P1_S1H3_L1_L4_090320_ROI_1_L1.ome.tiff
HT122P1_S1H3_L1_L4_090320_ROI_002.ome.tiff
HT168P1_S1H2_ROI_L1.ome.tiff
HT168P1_S1H2_ROI_L4.ome.tiff
HT122P1_S1H3_L1_L4_rescan_092820_ROI_L1.ome.tiff
HT122P1_S1H3_L1_L4_rescan_092820_ROI_L4.ome.tiff
HT123P1_S1H5_091120_ROI_L1.ome.tiff
HT123P1_S1H5_091120_ROI_L4.ome.tiff


## generate level 2 ome.tiffs

In [281]:
def parse_channel_name_from_raw(c):
    if '-' in c and re.sub(r'^[^-]*-(.*)$', r'\1', c):
        return re.sub(r'^[^-]*-(.*)$', r'\1', c)
    elif '176YbHistoneH3' in c:
        return 'Histone H3'
    return None

In [290]:
from ome_types import from_tiff, from_xml, to_xml
from ome_types.model.simple_types import UnitsLength

d = {
    'Vimentin': ['vimentin'],
    'Pan Keratin': ['PanCK', 'PanKeratin', 'PanK'],
    'PD-L2': ['PDL2'],
    'PD-L1': ['PDL1'],
    'PD-1': ['PD1'],
    'FOXP3': ['FoxP3'],
    'Granzyme B': ['GranzymeB'],
    'T1 Collagen': ['T1Collagen', 'Type1Coll', 'Type1_collagen'],
    'HLA-DR': ['HLADR'],
    'Histone H3': ['HistoneH3'],
    'Podoplanin': ['podoplanin'],
    'Alpha Amylase': ['AlphaAmylase']
}
CHANNEL_MAP = {v:k for k, vs in d.items() for v in vs}

def convert_hyperion_ome_to_htan_ome(fp, out_fp):
    tif = TiffFile(fp)
    m = re.sub(r'<Acquis.*AcquisitionDate>', r'', tif.ome_metadata)
    ome = from_xml(m)
    im = ome.images[0]
    im.pixels.physical_size_x = float(im.pixels.size_x)
    im.pixels.physical_size_y = float(im.pixels.size_y)
    im.pixels.physical_size_x_unit = 'µm'
    im.pixels.physical_size_y_unit = 'µm'
    im.pixels.tiff_data_blocks[0].plane_count = 1
    im.pixels.type = 'float'
    
    names = sorted({c.name for c in im.pixels.channels})
    keep = [n for n in names if parse_channel_name_from_raw(n) is not None]
    keep_idxs = [i for i, n in enumerate(names) if parse_channel_name_from_raw(n) is not None]
    new = [CHANNEL_MAP.get(parse_channel_name_from_raw(n), parse_channel_name_from_raw(n))
          for n in keep]
    name_to_identifier = {k:n for k, n in zip(keep, new)}
#     print(list(zip(keep, new)))
    im.pixels.channels = [c for c in im.pixels.channels if c.name in keep]
    for c in im.pixels.channels: c.name = name_to_identifier[c.name]
    if len(im.pixels.channels):
        xml_str = to_xml(ome)
#         print(xml_str)
        with tifffile.TiffWriter(out_fp, ome=True) as out_tif:
            for i, p in enumerate(tif.pages):
                if i in keep_idxs:
                    out_tif.write(p.asarray())
            out_tif.overwrite_description(xml_str.encode())
    else:
        print(f'{fp} has no channels')

In [283]:
fps = os_helpers.listfiles(f'../data/hyperion/{batch_folder}/ome-tiff')
sample_fp_tups = [(fp.split('/')[-1].replace('.ome.tiff', ''), fp) for fp in fps]
sample_fp_tups

[('HT122P1_S1H9_090320_ROI_L1',
  '../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H9_090320_ROI_L1.ome.tiff'),
 ('HT168P1_S1H3_091420_ROI_L1',
  '../data/hyperion/b3_09012020/ome-tiff/HT168P1_S1H3_091420_ROI_L1.ome.tiff'),
 ('Human_spleen_091120_ROI_001',
  '../data/hyperion/b3_09012020/ome-tiff/Human_spleen_091120_ROI_001.ome.tiff'),
 ('HT168P1_S1H2_ROI_L1',
  '../data/hyperion/b3_09012020/ome-tiff/HT168P1_S1H2_ROI_L1.ome.tiff'),
 ('HT122P1_S1H9_090320_ROI_L4',
  '../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H9_090320_ROI_L4.ome.tiff'),
 ('HT122P1_S1H3_L1_L4_090320_ROI_002',
  '../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H3_L1_L4_090320_ROI_002.ome.tiff'),
 ('HT123P1_S1H5_091120_ROI_L4',
  '../data/hyperion/b3_09012020/ome-tiff/HT123P1_S1H5_091120_ROI_L4.ome.tiff'),
 ('HT168P1_S1H3_091420_ROI_L4',
  '../data/hyperion/b3_09012020/ome-tiff/HT168P1_S1H3_091420_ROI_L4.ome.tiff'),
 ('HT122P1_S1H3_L1_L4_rescan_092820_ROI_L1',
  '../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H3_L1_

In [291]:
tif = TiffFile(sample_fp_tups[0][1])
m = re.sub(r'<Acquis.*AcquisitionDate>', r'', tif.ome_metadata)
ome = from_xml(m)
im = ome.images[0]
names = sorted({c.name for c in im.pixels.channels})
list(zip(names, [CHANNEL_MAP.get(parse_channel_name_from_raw(n), parse_channel_name_from_raw(n))
                 for n in names]))

[('100Ru', None),
 ('101Ru', None),
 ('102Ru', None),
 ('104Ru', None),
 ('113In', None),
 ('115In', None),
 ('127I', None),
 ('131Xe', None),
 ('138Ba', None),
 ('139La', None),
 ('141Pr-', None),
 ('142Nd-podoplanin', 'Podoplanin'),
 ('143Nd-Vimentin', 'Vimentin'),
 ('144Nd-CD14', 'CD14'),
 ('145Nd-AlphaAmylase', 'Alpha Amylase'),
 ('146Nd-CD16', 'CD16'),
 ('147Sm-CD11b', 'CD11b'),
 ('148Nd-PanK', 'Pan Keratin'),
 ('149Sm-', None),
 ('150Nd-PD-L1', 'PD-L1'),
 ('151Eu-CD31', 'CD31'),
 ('152Sm-', None),
 ('153Eu-Lag3', 'Lag3'),
 ('155Gd-FOXP3', 'FOXP3'),
 ('156Gd-CD4', 'CD4'),
 ('158Gd-', None),
 ('159Tb-CD68', 'CD68'),
 ('160Gd-', None),
 ('161Dy-CD20', 'CD20'),
 ('162Dy-CD8a', 'CD8a'),
 ('163Dy-CD133', 'CD133'),
 ('164Dy-CXCR4', 'CXCR4'),
 ('165Ho-PD1', 'PD-1'),
 ('166Er-', None),
 ('167Er-GranzymeB', 'Granzyme B'),
 ('168Er-', None),
 ('169Tm-Type1_collagen', 'T1 Collagen'),
 ('170Er-CD3', 'CD3'),
 ('171Yb', None),
 ('172Yb-PDL2', 'PD-L2'),
 ('173Yb-CD45RO', 'CD45RO'),
 ('174Yb-', N

In [292]:
out_dir = f'../data/hyperion/{batch_folder}/ome-tiff_htan'
Path(out_dir).mkdir(exist_ok=True, parents=True)

In [293]:
for sample, fp in sample_fp_tups:
    print(fp)
    out_fp = os.path.join(out_dir, f'{sample}.ome.tiff')
    convert_hyperion_ome_to_htan_ome(fp, out_fp)

../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H9_090320_ROI_L1.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT168P1_S1H3_091420_ROI_L1.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/Human_spleen_091120_ROI_001.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT168P1_S1H2_ROI_L1.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H9_090320_ROI_L4.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H3_L1_L4_090320_ROI_002.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT123P1_S1H5_091120_ROI_L4.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT168P1_S1H3_091420_ROI_L4.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H3_L1_L4_rescan_092820_ROI_L1.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT122P1_S1H3_L1_L4_rescan_092820_ROI_L4.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT168P1_S1H2_ROI_L4.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT168P1_S1H4_091420_ROI_002.ome.tiff
../data/hyperion/b3_09012020/ome-tiff/HT123P1_S1H5_091120_ROI_L1.ome.tiff
../data/hyperion/

###### sandbox

In [45]:
tif = TiffFile('../data/hyperion/b1_01042020/ome-tiff/HT056P1_PA_A1_A4_ROI_01.ome.tiff')
tif

<tifffile.tifffile.TiffFile at 0x7f77013e2250>

In [46]:
tif.ome_metadata

'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<OME Creator="imctools 2.1.0" xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd"><Image ID="Image:0" Name="HT056P1_PA_A1_A4_ROI_01.ome.tiff"><Pixels BigEndian="false" DimensionOrder="XYCZT" ID="Pixels:0" Interleaved="true" SizeC="51" SizeT="1" SizeX="538" SizeY="587" SizeZ="1" Type="float"><Channel Fluor="Y89" ID="Channel:0:0" Name="89Y-" SamplesPerPixel="1" /><Channel Fluor="Ru100" ID="Channel:0:1" Name="100Ru" SamplesPerPixel="1" /><Channel Fluor="Ru101" ID="Channel:0:2" Name="101Ru" SamplesPerPixel="1" /><Channel Fluor="Ru102" ID="Channel:0:3" Name="102Ru" SamplesPerPixel="1" /><Channel Fluor="Ru104" ID="Channel:0:4" Name="104Ru" SamplesPerPixel="1" /><Channel Fluor="In113" ID="Channel:0:5" Name="113In" SamplesPerPixel="1" /><Channel Fluor="In115" ID="

In [47]:
p = tif.pages[0]
p

<tifffile.tifffile.TiffPage at 0x7f770158a590>

In [48]:
p.description

'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<OME Creator="imctools 2.1.0" xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd"><Image ID="Image:0" Name="HT056P1_PA_A1_A4_ROI_01.ome.tiff"><Pixels BigEndian="false" DimensionOrder="XYCZT" ID="Pixels:0" Interleaved="true" SizeC="51" SizeT="1" SizeX="538" SizeY="587" SizeZ="1" Type="float"><Channel Fluor="Y89" ID="Channel:0:0" Name="89Y-" SamplesPerPixel="1" /><Channel Fluor="Ru100" ID="Channel:0:1" Name="100Ru" SamplesPerPixel="1" /><Channel Fluor="Ru101" ID="Channel:0:2" Name="101Ru" SamplesPerPixel="1" /><Channel Fluor="Ru102" ID="Channel:0:3" Name="102Ru" SamplesPerPixel="1" /><Channel Fluor="Ru104" ID="Channel:0:4" Name="104Ru" SamplesPerPixel="1" /><Channel Fluor="In113" ID="Channel:0:5" Name="113In" SamplesPerPixel="1" /><Channel Fluor="In115" ID="

In [123]:
tif = TiffFile('../data/john_will_grant_imageing/S18_40270_C3_JH010721_roi2.ome.tiff')
m = re.sub(r'<Acquis.*AcquisitionDate>', r'', tif.ome_metadata)
m

'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<OME Creator="imctools 2.1.0" xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd"><Image ID="Image:0" Name="S18_40270_C3_JH010721_roi2.ome.tiff"><Pixels BigEndian="false" DimensionOrder="XYCZT" ID="Pixels:0" Interleaved="true" SizeC="196" SizeT="1" SizeX="1349" SizeY="1371" SizeZ="1" Type="float"><Channel Fluor="As75" ID="Channel:0:0" Name="75As" SamplesPerPixel="1" /><Channel Fluor="Se76" ID="Channel:0:1" Name="76Se" SamplesPerPixel="1" /><Channel Fluor="Se77" ID="Channel:0:2" Name="77Se" SamplesPerPixel="1" /><Channel Fluor="Se78" ID="Channel:0:3" Name="78Se" SamplesPerPixel="1" /><Channel Fluor="Br79" ID="Channel:0:4" Name="79Br" SamplesPerPixel="1" /><Channel Fluor="ArAr80" ID="Channel:0:5" Name="80ArAr" SamplesPerPixel="1" /><Channel Fluor="Kr80" ID="

In [138]:
from ome_types import from_tiff, from_xml, to_xml
from ome_types.model.simple_types import UnitsLength

d = {
    'Vimentin': ['vimentin'],
    'PanKeratin': ['PanCK'],
    'PD-L1': ['PDL1'],
    'PD-1': ['PD1']
}
CHANNEL_MAP = {v:k for k, vs in d.items() for v in vs}

def convert_hyperion_ome_to_htan_ome(fp, out_fp):
    tif = TiffFile(fp)
    m = re.sub(r'<Acquis.*AcquisitionDate>', r'', tif.ome_metadata)
    ome = from_xml(m)
    im = ome.images[0]
    im.pixels.physical_size_x = float(im.pixels.size_x)
    im.pixels.physical_size_y = float(im.pixels.size_y)
    im.pixels.physical_size_x_unit = 'µm'
    im.pixels.physical_size_y_unit = 'µm'
    im.pixels.tiff_data_blocks[0].plane_count = 1
    im.pixels.type = 'float'
    
    names = sorted({c.name for c in im.pixels.channels})
    keep = [n for n in names if '-' in n if re.sub(r'^[^-]*-(.*)$', r'\1', n)]
    keep_idxs = [i for i, n in enumerate(names) if '-' in n if re.sub(r'^[^-]*-(.*)$', r'\1', n)]
    new = [CHANNEL_MAP.get(re.sub(r'^[^-]*-(.*)$', r'\1', n), re.sub(r'^[^-]*-(.*)$', r'\1', n))
          for n in keep]
    name_to_identifier = {k:n for k, n in zip(keep, new)}
#     p(zip(keep, new))
    im.pixels.channels = [c for c in im.pixels.channels if c.name in keep]
    for c in im.pixels.channels: c.name = name_to_identifier[c.name]
        
    xml_str = to_xml(ome)
    with tifffile.TiffWriter(out_fp, ome=True) as out_tif:
        for i, p in enumerate(tif.pages):
            if i in keep_idxs:
                out_tif.write(p.asarray())
        out_tif.overwrite_description(xml_str.encode())

In [124]:
t = from_xml(m)
t

OME(
   creator='imctools 2.1.0',
   images=[<1 Images>],
)

In [125]:
im = t.images[0]
im

Image(
   id='Image:0',
   name='S18_40270_C3_JH010721_roi2.ome.tiff',
   pixels=Pixels(
      id='Pixels:0',
      dimension_order='XYCZT',
      size_c=196,
      size_t=1,
      size_x=1349,
      size_y=1371,
      size_z=1,
      type='float',
      big_endian=False,
      channels=[<196 Channels>],
      interleaved=True,
      tiff_data_blocks=[<1 Tiff_Data_Blocks>],
   ),
)

In [126]:
im.pixels.physical_size_x = float(im.pixels.size_x)
im.pixels.physical_size_y = float(im.pixels.size_y)
im.pixels.physical_size_x_unit = 'µm'
im.pixels.physical_size_y_unit = 'µm'
im.pixels.tiff_data_blocks[0].plane_count = 1
im.pixels.type = 'float'
im

Image(
   id='Image:0',
   name='S18_40270_C3_JH010721_roi2.ome.tiff',
   pixels=Pixels(
      id='Pixels:0',
      dimension_order='XYCZT',
      size_c=196,
      size_t=1,
      size_x=1349,
      size_y=1371,
      size_z=1,
      type='float',
      big_endian=False,
      channels=[<196 Channels>],
      interleaved=True,
      physical_size_x=1349.0,
      physical_size_x_unit='µm',
      physical_size_y=1371.0,
      physical_size_y_unit='µm',
      tiff_data_blocks=[<1 Tiff_Data_Blocks>],
   ),
)

In [127]:
for c in im.pixels.channels:
    print(c)

Channel(
   id='Channel:0:0',
   name='75As',
   fluor='As75',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:1',
   name='76Se',
   fluor='Se76',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:2',
   name='77Se',
   fluor='Se77',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:3',
   name='78Se',
   fluor='Se78',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:4',
   name='79Br',
   fluor='Br79',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:5',
   name='80ArAr',
   fluor='ArAr80',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:6',
   name='80Kr',
   fluor='Kr80',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:7',
   name='80Se',
   fluor='Se80',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:8',
   name='81Br',
   fluor='Br81',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:9',
   name='82Kr',
   fluor='Kr82',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:10',
   name='83Kr',
   fluor='Kr83',
   samples_per_pixel=1,
)
Channel(
   id='

In [128]:
names = sorted({c.name for c in im.pixels.channels})
names

['100Mo',
 '100Ru',
 '101Ru',
 '102Pd',
 '102Ru',
 '103Rh',
 '104Pd',
 '104Ru',
 '105Pd',
 '106Cd',
 '106Pd',
 '107Ag',
 '108Cd',
 '108Pd',
 '109Ag',
 '110Cd',
 '110Pd',
 '111Cd',
 '112Cd',
 '112Sn',
 '113Cd',
 '113In',
 '114Cd',
 '114Sn',
 '115In-',
 '115Sn',
 '116Cd',
 '116Sn',
 '117Sn',
 '118Sn',
 '119Sn',
 '120Sn',
 '120Te',
 '121Sb',
 '122Sn',
 '122Te',
 '123Sb',
 '123Te',
 '124Sn',
 '124Te',
 '124Xe',
 '125Te',
 '126Te',
 '126Xe',
 '127I',
 '128Te',
 '128Xe',
 '129Xe',
 '130Ba',
 '130Te',
 '130Xe',
 '131Xe',
 '132Ba',
 '132Xe',
 '133Cs',
 '134Ba',
 '134Xe',
 '135Ba',
 '136Ba',
 '136Ce',
 '136Xe',
 '137Ba',
 '138Ba',
 '138Ce',
 '138La',
 '139La-',
 '140Ce',
 '141Pr-SMA',
 '142Ce',
 '142Nd-',
 '143Nd-vimentin',
 '144Nd-',
 '144Sm',
 '145Nd-',
 '146Nd-',
 '147Sm-',
 '148Nd-PanCK',
 '148Sm',
 '149Sm-',
 '150Nd-PDL1',
 '150Sm-',
 '151Eu-',
 '152Gd',
 '152Sm-',
 '153Eu-',
 '154Gd',
 '155Gd-FOXP3',
 '156Dy',
 '156Gd-CD4',
 '157Gd-',
 '158Dy',
 '158Gd-Ecadherin',
 '159Tb-CD68',
 '160Dy',

In [129]:
d = {
    'Vimentin': ['vimentin'],
    'PanKeratin': ['PanCK'],
    'PD-L1': ['PDL1'],
    'PD-1': ['PD1']
}
CHANNEL_MAP = {v:k for k, vs in d.items() for v in vs}

In [130]:
keep = [n for n in names if '-' in n if re.sub(r'^[^-]*-(.*)$', r'\1', n)]
keep_idxs = [i for i, n in enumerate(names) if '-' in n if re.sub(r'^[^-]*-(.*)$', r'\1', n)]
new = [CHANNEL_MAP.get(re.sub(r'^[^-]*-(.*)$', r'\1', n), re.sub(r'^[^-]*-(.*)$', r'\1', n))
      for n in keep]
name_to_identifier = {k:n for k, n in zip(keep, new)}
list(zip(keep, new))

[('141Pr-SMA', 'SMA'),
 ('143Nd-vimentin', 'Vimentin'),
 ('148Nd-PanCK', 'PanKeratin'),
 ('150Nd-PDL1', 'PD-L1'),
 ('155Gd-FOXP3', 'FOXP3'),
 ('156Gd-CD4', 'CD4'),
 ('158Gd-Ecadherin', 'Ecadherin'),
 ('159Tb-CD68', 'CD68'),
 ('161Dy-CD20', 'CD20'),
 ('162Dy-CD8a', 'CD8a'),
 ('165Ho-PD1', 'PD-1'),
 ('167Er-GranzymeB', 'GranzymeB'),
 ('168Er-Ki67', 'Ki67'),
 ('169Tm-CollagenT1', 'CollagenT1'),
 ('170Er-CD3', 'CD3'),
 ('173Yb-CD45RO', 'CD45RO'),
 ('176Yb-HistoneH3', 'HistoneH3'),
 ('191Ir-DNA1', 'DNA1'),
 ('193Ir-DNA2', 'DNA2'),
 ('195Pt-cellseg1', 'cellseg1'),
 ('196Pt-cellseg2', 'cellseg2'),
 ('198Pt-cellseg3', 'cellseg3')]

In [131]:
im.pixels.channels = [c for c in im.pixels.channels if c.name in keep]
for c in im.pixels.channels: c.name = name_to_identifier[c.name]
for c in im.pixels.channels: print(c)

Channel(
   id='Channel:0:100',
   name='SMA',
   fluor='Pr141',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:103',
   name='Vimentin',
   fluor='Nd143',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:109',
   name='PanKeratin',
   fluor='Nd148',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:112',
   name='PD-L1',
   fluor='Nd150',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:120',
   name='FOXP3',
   fluor='Gd155',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:122',
   name='CD4',
   fluor='Gd156',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:125',
   name='Ecadherin',
   fluor='Gd158',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:126',
   name='CD68',
   fluor='Tb159',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:129',
   name='CD20',
   fluor='Dy161',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:130',
   name='CD8a',
   fluor='Dy162',
   samples_per_pixel=1,
)
Channel(
   id='Channel:0:135',
   name='PD-1',
   fluor='Ho165',

In [132]:
xml_str = to_xml(t)
xml_str

'<OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Creator="imctools 2.1.0" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd">\n    <Image ID="Image:0" Name="S18_40270_C3_JH010721_roi2.ome.tiff">\n        <Pixels BigEndian="false" DimensionOrder="XYCZT" ID="Pixels:0" Interleaved="true" PhysicalSizeX="1349.0" PhysicalSizeXUnit="µm" PhysicalSizeY="1371.0" PhysicalSizeYUnit="µm" SizeC="196" SizeT="1" SizeX="1349" SizeY="1371" SizeZ="1" Type="float">\n            <Channel Fluor="Pr141" ID="Channel:0:100" Name="SMA" SamplesPerPixel="1" />\n            <Channel Fluor="Nd143" ID="Channel:0:103" Name="Vimentin" SamplesPerPixel="1" />\n            <Channel Fluor="Nd148" ID="Channel:0:109" Name="PanKeratin" SamplesPerPixel="1" />\n            <Channel Fluor="Nd150" ID="Channel:0:112" Name="PD-L1" SamplesPerPixel="1" />\n            <Channel Fluor="Gd15

In [133]:
with tifffile.TiffWriter('../data/test3.ome.tiff', ome=True) as out_tif:
    for i, p in enumerate(tif.pages):
        if i in keep_idxs:
            out_tif.write(p.asarray())
    out_tif.overwrite_description(xml_str.encode())

In [134]:
h = from_tiff('../data/test3.ome.tiff')
h

OME(
   creator='imctools 2.1.0',
   images=[<1 Images>],
)

In [135]:
h.images[0]

Image(
   id='Image:0',
   name='S18_40270_C3_JH010721_roi2.ome.tiff',
   pixels=Pixels(
      id='Pixels:0',
      dimension_order='XYCZT',
      size_c=196,
      size_t=1,
      size_x=1349,
      size_y=1371,
      size_z=1,
      type='float',
      big_endian=False,
      channels=[<22 Channels>],
      interleaved=True,
      physical_size_x=1349.0,
      physical_size_y=1371.0,
      tiff_data_blocks=[<1 Tiff_Data_Blocks>],
   ),
)

In [136]:
h = tifffile.TiffFile('../data/test3.ome.tiff').ome_metadata
h

'<OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Creator="imctools 2.1.0" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd">\n    <Image ID="Image:0" Name="S18_40270_C3_JH010721_roi2.ome.tiff">\n        <Pixels BigEndian="false" DimensionOrder="XYCZT" ID="Pixels:0" Interleaved="true" PhysicalSizeX="1349.0" PhysicalSizeXUnit="µm" PhysicalSizeY="1371.0" PhysicalSizeYUnit="µm" SizeC="196" SizeT="1" SizeX="1349" SizeY="1371" SizeZ="1" Type="float">\n            <Channel Fluor="Pr141" ID="Channel:0:100" Name="SMA" SamplesPerPixel="1" />\n            <Channel Fluor="Nd143" ID="Channel:0:103" Name="Vimentin" SamplesPerPixel="1" />\n            <Channel Fluor="Nd148" ID="Channel:0:109" Name="PanKeratin" SamplesPerPixel="1" />\n            <Channel Fluor="Nd150" ID="Channel:0:112" Name="PD-L1" SamplesPerPixel="1" />\n            <Channel Fluor="Gd15

In [34]:
tif = TiffFile('../data/test3.ome.tiff')
tif.ome_metadata

'<?xml version="1.0" encoding="UTF-8"?><OME xmlns="http://www.openmicroscopy.org/Schemas/OME/2016-06" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openmicroscopy.org/Schemas/OME/2016-06 http://www.openmicroscopy.org/Schemas/OME/2016-06/ome.xsd" UUID="urn:uuid:1fcd2f32-6c99-11eb-a15d-ac1f6b9e3060"  Creator="tifffile.py 2020.12.8"><Image ID="Image:0" Name="Image0"><Pixels ID="Pixels:0" DimensionOrder="XYCZT" Type="double" SizeX="10" SizeY="10" SizeC="1" SizeZ="1" SizeT="1"><Channel ID="Channel:0:0" SamplesPerPixel="1"><LightPath/></Channel><TiffData IFD="0" PlaneCount="1"/></Pixels></Image><Image ID="Image:1" Name="Image1"><Pixels ID="Pixels:1" DimensionOrder="XYCZT" Type="double" SizeX="10" SizeY="10" SizeC="1" SizeZ="1" SizeT="1"><Channel ID="Channel:1:0" SamplesPerPixel="1"><LightPath/></Channel><TiffData IFD="1"/></Pixels></Image></OME>'

In [None]:
def rewrite_ome_tiff_header(tif_obj, out_fp, metadata):
    """add metadata to tif header"""
    w = tifffile.TiffWrite('../data/test5.ome.tiff', ome=True)
    for p in tif_obj.pages:
        tif.write(p.asarray())
    header = tif_obj.ome_metadata
    
    xml_str = ' '.join([f'{k}="{v}"' for k, v in metadata.items()])
    header.replace
    

## generate channel level metadata

In [294]:
fps = os_helpers.listfiles(f'../data/hyperion/{batch_folder}/ome-tiff_htan')
sample_fp_tups = [(fp.split('/')[-1].replace('.ome.tiff', ''), fp) for fp in fps]
sample_fp_tups

[('HT122P1_S1H9_090320_ROI_L1',
  '../data/hyperion/b3_09012020/ome-tiff_htan/HT122P1_S1H9_090320_ROI_L1.ome.tiff'),
 ('HT168P1_S1H3_091420_ROI_L1',
  '../data/hyperion/b3_09012020/ome-tiff_htan/HT168P1_S1H3_091420_ROI_L1.ome.tiff'),
 ('Human_spleen_091120_ROI_001',
  '../data/hyperion/b3_09012020/ome-tiff_htan/Human_spleen_091120_ROI_001.ome.tiff'),
 ('HT168P1_S1H2_ROI_L1',
  '../data/hyperion/b3_09012020/ome-tiff_htan/HT168P1_S1H2_ROI_L1.ome.tiff'),
 ('HT122P1_S1H9_090320_ROI_L4',
  '../data/hyperion/b3_09012020/ome-tiff_htan/HT122P1_S1H9_090320_ROI_L4.ome.tiff'),
 ('HT122P1_S1H3_L1_L4_090320_ROI_002',
  '../data/hyperion/b3_09012020/ome-tiff_htan/HT122P1_S1H3_L1_L4_090320_ROI_002.ome.tiff'),
 ('HT123P1_S1H5_091120_ROI_L4',
  '../data/hyperion/b3_09012020/ome-tiff_htan/HT123P1_S1H5_091120_ROI_L4.ome.tiff'),
 ('HT168P1_S1H3_091420_ROI_L4',
  '../data/hyperion/b3_09012020/ome-tiff_htan/HT168P1_S1H3_091420_ROI_L4.ome.tiff'),
 ('HT122P1_S1H3_L1_L4_rescan_092820_ROI_L1',
  '../data/hyperi

In [295]:
out_dir = f'../data/hyperion/{batch_folder}/ome-tiff_htan'
Path(out_dir).mkdir(exist_ok=True, parents=True)

In [296]:
channel_id, channel_name = [], []
for sample, fp in sample_fp_tups:
    print(sample, fp)
    tif = TiffFile(fp)
    m = re.sub(r'<Acquis.*AcquisitionDate>', r'', tif.ome_metadata)
    ome = from_xml(m)
    im = ome.images[0]
    for channel in im.pixels.channels:
        channel_id.append(channel.id)
        channel_name.append(channel.name)
        
    df = pd.DataFrame.from_dict({
        'Channel ID': channel_id,
        'Channel Name': channel_name
    })
    df.to_csv(fp.replace('.ome.tiff', '.csv'), sep=',', index=False)
        

HT122P1_S1H9_090320_ROI_L1 ../data/hyperion/b3_09012020/ome-tiff_htan/HT122P1_S1H9_090320_ROI_L1.ome.tiff
HT168P1_S1H3_091420_ROI_L1 ../data/hyperion/b3_09012020/ome-tiff_htan/HT168P1_S1H3_091420_ROI_L1.ome.tiff
Human_spleen_091120_ROI_001 ../data/hyperion/b3_09012020/ome-tiff_htan/Human_spleen_091120_ROI_001.ome.tiff
HT168P1_S1H2_ROI_L1 ../data/hyperion/b3_09012020/ome-tiff_htan/HT168P1_S1H2_ROI_L1.ome.tiff
HT122P1_S1H9_090320_ROI_L4 ../data/hyperion/b3_09012020/ome-tiff_htan/HT122P1_S1H9_090320_ROI_L4.ome.tiff
HT122P1_S1H3_L1_L4_090320_ROI_002 ../data/hyperion/b3_09012020/ome-tiff_htan/HT122P1_S1H3_L1_L4_090320_ROI_002.ome.tiff
HT123P1_S1H5_091120_ROI_L4 ../data/hyperion/b3_09012020/ome-tiff_htan/HT123P1_S1H5_091120_ROI_L4.ome.tiff
HT168P1_S1H3_091420_ROI_L4 ../data/hyperion/b3_09012020/ome-tiff_htan/HT168P1_S1H3_091420_ROI_L4.ome.tiff
HT122P1_S1H3_L1_L4_rescan_092820_ROI_L1 ../data/hyperion/b3_09012020/ome-tiff_htan/HT122P1_S1H3_L1_L4_rescan_092820_ROI_L1.ome.tiff
HT122P1_S1H3_L1_L4