In [2]:
import os
import pandas as pd
import numpy as np
import anndata
import tifffile
from skimage.measure import regionprops

In [25]:
%load_ext autoreload

In [26]:
%autoreload 2

In [27]:
from mip.utils import extract_ome_tiff

In [None]:
def get_spatial_features(label_fp, ome_fp):
    label_img = tifffile.imread(label_fp)
    cell_ids = np.unique(label_img)
    
    channel_to_img = extract_ome_tiff(ome_fp)
    channels = list(channel_to_img.keys())
    multichannel_img = np.stack([channel_to_img[c] for c in channels], axis=-1)
    
    props = regionprops(label_img, intensity_image=multichannel_img, )
    
    data = []
    columns = ['area', 'perimeter',
               'bbox_row_min', 'bbox_col_min', 'bbox_row_max', 'bbox_col_max',
               'centroid_row', 'centroid_col',
               'eccentricity']
    columns += [f'{c} intensity max' for c in channels]
    columns += [f'{c} intensity mean' for c in channels]
    columns += [f'{c} intensity min' for c in channels]

    for p in props:
        prop_data = [p.area, p.perimeter,
                     p.bbox[0], p.bbox[1], p.bbox[2], p.bbox[3],
                     p.centroid[0], p.centroid[1],
                     p.eccentricity]
        prop_data += p.intensity_max.tolist()
        prop_data += p.intensity_mean.tolist()
        prop_data += p.intensity_min.tolist()

        data.append(prop_data)

    df = pd.DataFrame(data=data, columns=columns, index=cell_ids)
    df.index.name = 'cell_id'
    
    return df

In [None]:
def save_spatial_features(label_fp, ome_fp, output_prefix):
    df = get_spatial_features(label_fp, ome_fp)
    
    adata = anndata.AnnData(X=df[[c for c in df.columns if 'intensity mean' in c]].values,
                        obs=df[[c for c in df.columns if 'intensity mean' not in c]],
                        var=pd.DataFrame(index=[c.replace(' intensity mean', '')
                                                for c in df.columns if 'intensity mean' in c]))
    
    df.to_csv(f'{output_prefix}.txt', sep='\t')
    adata.write_h5ad(f'{output_prefix}.h5ad')

In [None]:
label_fp = '/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/HT206B1-H1/level_3/segmentation/cell_segmentation.tif'
ome_fp = '/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/HT206B1-H1/level_2/HT206B1-H1.ome.tiff'
df = save_spatial_features(label_fp, ome_fp, 'output')

###### sandbox

In [32]:
label_img = tifffile.imread('/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/HT206B1-H1/level_3/segmentation/cell_segmentation.tif')
label_img.shape

(15120, 13440)

In [43]:
cell_ids = np.unique(label_img)[1:]
len(cell_ids), cell_ids[:10]

(221081, array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=uint32))

In [28]:
channel_to_img = extract_ome_tiff('/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/HT206B1-H1/level_2/HT206B1-H1.ome.tiff')
channel_to_img.keys()

dict_keys(['DAPI', 'CD31', 'E-cadherin', 'CD4', 'CD20', 'Ki67', 'CD68', 'Pan-CK', 'CD8', 'Histone H3 Pho', 'CD163', 'Podoplanin', 'CD3', 'CD36', 'HIF1a', 'GLUT1', 'GATA3', 'CD45RO', 'CD45RA', 'SMA', 'CK19', 'MGP', 'Vimentin', 'Maspin', 'Foxp3', 'MMP9', 'HLA-DR', 'CK14', 'CD11c', 'CD44'])

In [31]:
channels = list(channel_to_img.keys())
multichannel_img = np.stack([channel_to_img[c] for c in channels], axis=-1)
multichannel_img.shape

(15120, 13440, 30)

In [34]:
props = regionprops(label_img, intensity_image=multichannel_img, )

In [44]:
data = []
columns = ['area', 'perimeter',
           'bbox_row_min', 'bbox_col_min', 'bbox_row_max', 'bbox_col_max',
           'centroid_row', 'centroid_col',
           'eccentricity']
columns += [f'{c} intensity max' for c in channels]
columns += [f'{c} intensity mean' for c in channels]
columns += [f'{c} intensity min' for c in channels]

for p in props:
    prop_data = [p.area, p.perimeter,
                 p.bbox[0], p.bbox[1], p.bbox[2], p.bbox[3],
                 p.centroid[0], p.centroid[1],
                 p.eccentricity]
    prop_data += p.intensity_max.tolist()
    prop_data += p.intensity_mean.tolist()
    prop_data += p.intensity_min.tolist()
    
    data.append(prop_data)

df = pd.DataFrame(data=data, columns=columns, index=cell_ids)
df

Unnamed: 0,area,perimeter,bbox_row_min,bbox_col_min,bbox_row_max,bbox_col_max,centroid_row,centroid_col,eccentricity,DAPI intensity max,...,CK19 intensity min,MGP intensity min,Vimentin intensity min,Maspin intensity min,Foxp3 intensity min,MMP9 intensity min,HLA-DR intensity min,CK14 intensity min,CD11c intensity min,CD44 intensity min
1,520,110.840620,0,3673,25,3709,8.378846,3693.750000,0.723583,49006.0,...,0.0,803.0,1.0,454.0,205.0,0.0,8.0,1547.0,404.0,38.0
2,409,98.704581,0,3711,26,3740,8.728606,3722.951100,0.452175,44971.0,...,14.0,573.0,0.0,54.0,15.0,0.0,8.0,4518.0,61.0,29.0
3,845,150.675144,0,3739,32,3776,13.584615,3755.928994,0.368285,49170.0,...,485.0,275.0,2.0,24.0,19.0,0.0,4.0,8671.0,21.0,71.0
4,241,70.870058,0,3873,12,3902,4.082988,3886.991701,0.928244,49978.0,...,53.0,758.0,1.0,419.0,191.0,47.0,6.0,56332.0,283.0,95.0
5,733,115.639610,0,3888,35,3921,16.049113,3906.064120,0.497079,59773.0,...,127.0,819.0,2.0,24.0,23.0,13.0,3.0,4903.0,65.0,66.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221077,340,80.970563,15108,10235,15120,10268,15114.164706,10249.664706,0.935688,26368.0,...,304.0,572.0,262.0,590.0,108.0,0.0,149.0,1728.0,46.0,636.0
221078,162,62.248737,15107,9158,15120,9182,15115.290123,9168.080247,0.861327,44711.0,...,2449.0,1549.0,28.0,1145.0,1103.0,449.0,227.0,52670.0,445.0,910.0
221079,35,24.485281,15116,9213,15120,9224,15117.828571,9218.000000,0.938642,33583.0,...,1337.0,3605.0,85.0,5778.0,1359.0,1250.0,499.0,13957.0,810.0,507.0
221080,165,80.420310,15112,9224,15120,9262,15116.981818,9240.272727,0.985418,31372.0,...,1417.0,3320.0,26.0,1382.0,1042.0,513.0,586.0,54412.0,619.0,996.0


In [46]:
adata = anndata.AnnData(X=df[[c for c in df.columns if 'intensity mean' in c]].values,
                        obs=df[[c for c in df.columns if 'intensity mean' not in c]])
adata.var.index = [c.replace(' intensity mean', '') for c in df.columns if 'intensity mean' in c]
adata

AnnData object with n_obs × n_vars = 221081 × 30
    obs: 'area', 'perimeter', 'bbox_row_min', 'bbox_col_min', 'bbox_row_max', 'bbox_col_max', 'centroid_row', 'centroid_col', 'eccentricity', 'DAPI intensity max', 'CD31 intensity max', 'E-cadherin intensity max', 'CD4 intensity max', 'CD20 intensity max', 'Ki67 intensity max', 'CD68 intensity max', 'Pan-CK intensity max', 'CD8 intensity max', 'Histone H3 Pho intensity max', 'CD163 intensity max', 'Podoplanin intensity max', 'CD3 intensity max', 'CD36 intensity max', 'HIF1a intensity max', 'GLUT1 intensity max', 'GATA3 intensity max', 'CD45RO intensity max', 'CD45RA intensity max', 'SMA intensity max', 'CK19 intensity max', 'MGP intensity max', 'Vimentin intensity max', 'Maspin intensity max', 'Foxp3 intensity max', 'MMP9 intensity max', 'HLA-DR intensity max', 'CK14 intensity max', 'CD11c intensity max', 'CD44 intensity max', 'DAPI intensity min', 'CD31 intensity min', 'E-cadherin intensity min', 'CD4 intensity min', 'CD20 intensity min

In [20]:
adata = anndata.AnnData(X=np.random.rand(10, 10), obs=pd.DataFrame(data=np.random.rand(10, 5), columns=['a', 'b', 'c', 'd', 'e']),
                       var=pd.DataFrame(index=['a'] * 10))
adata

Variable names are not unique. To make them unique, call `.var_names_make_unique`.


AnnData object with n_obs × n_vars = 10 × 10
    obs: 'a', 'b', 'c', 'd', 'e'

In [21]:
adata.var

a
a
a
a
a
a
a
a
a
a


In [22]:
adata.write_h5ad('/diskmnt/Projects/Users/estorrs/sandbox/a.h5ad')

In [23]:
a = anndata.read_h5ad('/diskmnt/Projects/Users/estorrs/sandbox/a.h5ad')
a

Variable names are not unique. To make them unique, call `.var_names_make_unique`.


AnnData object with n_obs × n_vars = 10 × 10
    obs: 'a', 'b', 'c', 'd', 'e'

In [24]:
a.var

a
a
a
a
a
a
a
a
a
a


In [35]:
p = next(iter(props))
p

<skimage.measure._regionprops.RegionProperties at 0x7f8aae032910>

In [40]:
p.intensity_max.tolist()

[49006.0,
 1424.0,
 16257.0,
 9121.0,
 3389.0,
 59061.0,
 5852.0,
 57813.0,
 5822.0,
 8731.0,
 2483.0,
 3549.0,
 2178.0,
 1530.0,
 3181.0,
 54798.0,
 1024.0,
 2380.0,
 1270.0,
 1102.0,
 57764.0,
 26634.0,
 1122.0,
 15387.0,
 3662.0,
 5291.0,
 1948.0,
 56291.0,
 3284.0,
 4855.0]

In [3]:
df = pd.read_csv('/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/HT206B1-H1/level_4/qitissue_spatial_features.csv',
                 index_col=0)
df

Unnamed: 0_level_0,Nuc X,Nuc Y,Nuc Y Inv,Nucleus DNA Mean,Nucleus DNA Excess Kurtosis,DAPI Cell Exp,DAPI Nuc/Cyto Ratio,CD31 Cell Exp,CD31 Nuc/Cyto Ratio,E-cadherin Cell Exp,...,MMP9 Cell Exp,MMP9 Nuc/Cyto Ratio,HLA-DR Cell Exp,HLA-DR Nuc/Cyto Ratio,CK14 Cell Exp,CK14 Nuc/Cyto Ratio,CD11c Cell Exp,CD11c Nuc/Cyto Ratio,CD44 Cell Exp,CD44 Nuc/Cyto Ratio
Cell Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3692.270020,9.425287,15110.575195,32283.992188,-0.637072,24656.068359,3.500443,215.534622,0.364503,1721.984619,...,836.248047,1.965579,289.642303,0.475819,52670.019531,1.023708,1244.232666,0.897908,685.573059,0.645095
2,3724.271973,9.130268,15110.870117,33393.191406,-0.195382,24279.294922,4.068957,285.154022,0.143937,5018.948730,...,563.831299,3.269746,210.831299,0.508312,47831.367188,0.856714,727.938904,0.568297,884.860657,0.376165
3,3755.168701,12.611987,15107.387695,30022.921875,-0.634247,23984.521484,5.140298,288.570404,0.667963,4907.833008,...,552.828430,2.406036,379.328979,1.004636,56477.675781,1.007318,957.560974,1.322493,1133.892334,0.592331
4,3886.607666,4.876923,15115.123047,27117.162109,-0.905962,20557.472656,2.106195,263.929474,0.320742,5697.377441,...,769.016602,0.704628,170.701248,0.545165,56598.097656,0.999049,913.116211,0.705773,880.751038,0.511887
5,3906.127686,14.233813,15105.766602,31667.726562,-0.471252,26074.011719,3.724392,297.663025,0.480868,2800.372559,...,1245.723022,1.731062,283.980896,0.911356,49766.582031,0.826463,963.334229,1.093270,939.159607,0.478353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221077,10256.485352,15117.114258,2.885714,11811.400391,-0.784746,3536.964600,4.564899,32.502941,0.339167,716.744141,...,913.814697,0.473094,525.511780,1.302739,3100.744141,1.007782,401.108826,1.226893,3011.258789,0.767284
221078,9167.737305,15117.526367,2.473684,21353.816406,-0.695558,12205.851562,2.271092,360.543213,0.799652,1359.697510,...,1210.030884,1.184241,703.820984,0.984222,53611.273438,1.020003,1113.253052,0.861693,1812.771606,0.976193
221079,9217.380859,15117.238281,2.761905,22826.427734,-0.200796,22372.257812,1.052346,203.428574,0.558912,711.942871,...,2038.971436,0.859232,798.028564,0.954495,35521.687500,0.689700,1341.771484,0.766587,990.114258,1.024597
221080,9241.156250,15117.312500,2.687500,16829.359375,-0.528829,14513.278320,1.290035,740.424255,0.941241,2992.048584,...,2547.321289,1.175385,4466.357422,0.975433,55094.980469,0.999184,2168.394043,1.202150,2524.812012,1.108967


In [18]:
def features_from_qitissue(fp, version='1.1.0'):
    df = pd.read_csv(fp, index_col=0)
    x, y = df['Nuc X'].astype(int), df['Nuc Y'].astype(int)
    cols = [c for c in df.columns if 'Cell Exp' in c]

    f = pd.DataFrame.from_dict({'X': x, 'Y': y})
    
    intensity = df[cols]
    intensity.columns = [c.replace(' Cell Exp', '') + ' intensity' for c in intensity.columns]
    
    intensity_preprocessed = np.log1p(intensity)
    
    raw, norm = f.copy(), f.copy()
    
    for c in intensity.columns:
        raw[c] = intensity[c]
        norm[c] = intensity_preprocessed[c]
        
    adata = anndata.AnnData(X=intensity_preprocessed.values, obs=f)
    adata.var.index = [c.replace(' intensity', '') for c in intensity.columns]
    
    return raw, norm, adata

In [19]:
raw, norm, adata = features_from_qitissue('/diskmnt/Projects/Users/estorrs/multiplex_data/codex/htan/HT206B1-H1/level_4/qitissue_spatial_features.csv')
norm

Unnamed: 0_level_0,X,Y,DAPI intensity,CD31 intensity,E-cadherin intensity,CD4 intensity,CD20 intensity,Ki67 intensity,CD68 intensity,Pan-CK intensity,...,CK19 intensity,MGP intensity,Vimentin intensity,Maspin intensity,Foxp3 intensity,MMP9 intensity,HLA-DR intensity,CK14 intensity,CD11c intensity,CD44 intensity
Cell Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3692,9,10.112819,5.377750,7.451813,7.496782,6.564257,10.781075,6.510567,10.076103,...,9.243500,9.039124,4.957464,8.836527,6.974970,6.730120,5.672093,10.871821,7.127078,6.531713
2,3724,9,10.097420,5.656530,8.521175,7.596585,8.778914,9.512868,6.219729,10.037006,...,9.756136,9.630567,5.228385,8.653435,7.184903,6.336527,5.355790,10.775458,6.591590,6.786560
3,3755,12,10.085206,5.668398,8.498792,7.448926,8.312296,10.083555,6.573113,10.394336,...,10.278964,10.084393,5.749122,8.858783,7.108659,6.316855,5.941037,10.941618,6.865433,7.034293
4,3886,4,9.931028,5.579464,8.647937,7.579681,9.415211,10.575358,6.614670,9.650255,...,9.187308,8.530131,4.853035,8.416833,7.643912,6.646412,5.145756,10.943748,6.817958,6.781910
5,3906,14,10.168733,5.699316,7.937865,7.573392,8.716563,10.538465,6.512922,9.903500,...,8.888238,8.549433,5.067410,8.633588,7.599263,7.128274,5.652422,10.815119,6.871438,6.846050
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
221077,10256,15117,8.171307,3.511633,6.576113,6.834786,5.199325,6.510626,7.315958,4.333670,...,6.735542,7.734799,9.055353,7.464326,6.768341,6.818722,6.266274,8.039720,5.996723,8.010446
221078,9167,15117,9.409753,5.890382,7.215753,7.218059,4.612060,9.692287,6.522637,9.220130,...,8.643541,8.120974,5.308970,8.172619,7.754847,7.099227,6.557944,10.889533,7.015940,7.503164
221079,9217,15117,10.015622,5.320219,6.569401,6.707107,7.052696,10.509117,6.996420,7.795035,...,7.650074,8.891346,5.598739,8.820045,7.970958,7.620691,6.683397,10.477927,7.202491,6.898830
221080,9241,15117,9.582888,6.608573,8.004048,7.739744,6.242931,9.583509,7.157216,8.784504,...,8.460051,9.115099,5.530655,8.826845,8.878660,7.843190,8.404552,10.916832,7.682203,7.834318
