In [1]:
import numpy as np
import pandas as pd
import tifffile as tf
from glob import glob
import os
from scipy import ndimage, interpolate, spatial

In [2]:
def celllocs_read(filename):
    celllocs = pd.read_csv(filename)
    sel = [0,3,4,5,6,7,8,9]
    celllocs = celllocs.iloc[~np.any(celllocs.iloc[:, :5].isnull().values, axis=1)]
    celllocs = celllocs[celllocs['Cell.Area..px.'] > 9]
    celllocs = celllocs.astype(dict(zip(celllocs.columns[np.array(sel)], [int for i in range(len(sel))])))
    return celllocs

def match_original_ndimage(celllocs, wall, label, cellnum):
    cnuclei = np.asarray(ndimage.center_of_mass(wall, label, range(1,cellnum+1)))
    dcoords = celllocs.iloc[:, 1:3].values
    cdist = spatial.distance.cdist(np.flip(cnuclei, axis=1), dcoords, metric='euclidean')
    cmatches = np.argmin(cdist, axis=1)
    foo = len(np.unique(cmatches))
    print("Matched {} ndimage.cells to {} unique cells in the metadata".format(cellnum,foo))
    print("Out of {} cells in the metadata\n{}".format(len(celllocs),foo>=cellnum) )

    return dcoords, cnuclei, cmatches

def generate_cell_metadata(label, objss, nuclei):
    meta = np.zeros((len(objss), 8), dtype=int)
    for i in range(len(meta)):
        meta[i, :4] = objss[i][1].start, objss[i][1].stop, objss[i][0].start, objss[i][0].stop
        meta[i, 4] = meta[i,1] - meta[i,0]
        meta[i, 5] = meta[i,3] - meta[i,2]
    meta[:, 6], _ = np.histogram(label, bins=np.arange(1, len(objss) + 2))
    meta[:, 7], _ = np.histogram(label[nuclei], bins=np.arange(1, len(objss) + 2))
    meta = pd.DataFrame(meta, columns=['x0', 'x1', 'y0', 'y1', 'length', 'height', 'total_area', 'nuclei_area'])
    meta['cyto_area'] = meta['total_area'] - meta['nuclei_area']
    meta['c2t_area_ratio'] = meta['cyto_area']/meta['total_area']
    return meta

def generate_transcell_metadata(translocs, transcriptomes, cellnum, label):
    meta = np.zeros((len(transcriptomes), cellnum), dtype=int)
    bins = np.arange(1, cellnum + 2)
    for tidx in range(len(meta)):
        coords = translocs[tidx].loc[:, ['X', 'Y']].values.T
        meta[tidx], _ = np.histogram(label[coords[1], coords[0]], bins=bins)
        
    meta = pd.DataFrame(meta, columns=bins[:-1])
    meta['gene'] = transcriptomes

    return meta

In [3]:
wsrc = '../cell_dams/'
nsrc = '../nuclear_mask/'
tsrc = '../translocs/'
psrc = '../proc/'
osrc = '../data/'
sample = 'D2'

dst = '../kde/'

dst += sample + os.sep
if not os.path.isdir(dst):
    os.mkdir(dst)

# Load all general data

In [3]:
wall = tf.imread(wsrc + sample + '_dams.tif').astype(bool)
nuclei = tf.imread(nsrc + sample + '_EDT.tif') < 2
label, cellnum = ndimage.label(wall, ndimage.generate_binary_structure(2,1))
print('Detected',cellnum,'cells')

Detected 2937 cells


In [4]:
filenames = sorted(glob(tsrc + sample + os.sep + '*.csv'))
tsize = np.zeros(len(filenames), dtype=int)
transcriptomes = [os.path.splitext(filenames[i])[0].split('_-_')[-1] for i in range(len(filenames)) ]
translocs = [None for i in range(len(filenames))]
for i in range(len(filenames)):
    translocs[i] = pd.read_csv(filenames[i], header=None, names=['X', 'Y', 'Z'])
    tsize[i] = len(translocs[i])

In [5]:
i = 0
print(filenames[i], translocs[i].shape)
translocs[i]

../translocs/D2/location_corrected_D2_-_BAC45727.csv (32171, 3)


Unnamed: 0,X,Y,Z
0,8128,414,26
1,9769,1282,25
2,11874,1991,26
3,3654,2715,17
4,4231,4180,17
...,...,...,...
32166,8960,15772,28
32167,8974,15756,29
32168,8982,15796,29
32169,11360,16088,22


# Compute metadata

In [6]:
filename = dst + sample + '_cells_metadata.csv'
print(filename)
if not os.path.isfile(filename):
    celllocs = celllocs_read(osrc + sample + '_data/' + transcriptomes[1] + '/' + transcriptomes[1] + ' - localization results by cell.csv')
    dcoords, cnuclei, cmatches = match_original_ndimage(celllocs, wall, label, cellnum)
    objss = ndimage.find_objects(label)
    meta = generate_cell_metadata(label, objss, nuclei)
    meta = meta.join(pd.DataFrame(np.round(dcoords[cmatches], 2), columns=['orig_comX', 'orig_comY']))
    meta = meta.join(pd.DataFrame(np.round(np.flip(cnuclei, axis=1),2), columns=['ndimage_comX', 'ndimage_comY']))
    meta['orig_cellID'] = celllocs['Cell.ID..'].values[cmatches]
    meta['ndimage_cellID'] = np.arange(1,cellnum+1)
    meta.to_csv(filename, index=False)

metacell = pd.read_csv(filename)
print(metacell.shape)
metacell.head()

../kde/D2/D2_cells_metadata.csv
(2937, 16)


Unnamed: 0,x0,x1,y0,y1,length,height,total_area,nuclei_area,cyto_area,c2t_area_ratio,orig_comX,orig_comY,ndimage_comX,ndimage_comY,orig_cellID,ndimage_cellID
0,0,17152,0,17152,17152,17152,114911591,40677,114870914,0.999646,7865.12,8160.49,7865.56,8160.76,1,1
1,8333,8935,1293,1979,602,686,222903,1456,221447,0.993468,8635.46,1576.65,8635.18,1577.03,2,2
2,8893,9151,1317,1598,258,281,42604,0,42604,1.0,9013.47,1470.26,9013.07,1469.69,3,3
3,10790,11221,1337,1591,431,254,77463,0,77463,1.0,10999.04,1460.48,10998.62,1459.99,4,4
4,7740,8048,1362,1823,308,461,86041,1006,85035,0.988308,7894.78,1596.06,7894.19,1595.83,5,5


In [9]:
metacell.loc[:,['length', 'height']].describe().astype(int).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
length,2937,309,336,77,209,278,372,17152
height,2937,309,338,81,205,278,379,17152


In [7]:
filename = dst + sample + '_transcripts_metadata.csv'
print(filename)
if not os.path.isfile(filename):
    data = pd.read_csv(osrc + sample + '_data/32771-slide1_' + sample + '_results.txt', header=None, sep='\t').drop(columns=[4])
    _, orig_size = np.unique(data.iloc[:,-1], return_index = False, return_inverse=False, return_counts=True) 
    meta = pd.DataFrame()
    meta['total_number'] = orig_size
    meta['cyto_number'] = tsize
    meta['nuclei_number'] = orig_size - tsize
    meta['ratio'] = tsize/orig_size
    meta['gene'] = transcriptomes
    
    meta.to_csv(filename, index=False)

metatrans = pd.read_csv(filename)
metatrans.head()

../kde/D2/D2_transcripts_metadata.csv


Unnamed: 0,total_number,cyto_number,nuclei_number,ratio,gene
0,32613,32171,442,0.986447,BAC45727
1,1162,1151,11,0.990534,BAC46169
2,1820064,1795069,24995,0.986267,BAC47034
3,1939,1923,16,0.991748,BAC47856
4,3033,2985,48,0.984174,BAC48395


In [8]:
filename = dst + sample + '_transcells_metadata.csv'
print(filename)
if not os.path.isfile(filename):
    meta = generate_transcell_metadata(translocs, transcriptomes, cellnum, label)
    meta.to_csv(filename, index=False)
transcell = pd.read_csv(filename)
transcell.head()

../kde/D2/D2_transcells_metadata.csv


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,2929,2930,2931,2932,2933,2934,2935,2936,2937,gene
0,31,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,BAC45727
1,7,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,BAC46169
2,859,1,0,1,1,0,0,0,0,0,...,0,1,0,1,0,2,0,0,0,BAC47034
3,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,BAC47856
4,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,BAC48395


In [9]:
np.max(transcell.iloc[:, 2926:-1].values, axis=0)

array([31,  0,  1, 43,  0,  3,  6,  2,  0,  0,  0])