# Compute distance to cell centers for all RNA spots

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
import os
import sys
import dask
import dask.array as da
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from skimage.io import imread
import tifffile as tf
import torch

# directories and inputs

In [2]:
dataPath = r"..\results\spatial_statistics"

idCols = ['Y', 'X', 'CellY', 'CellX', 'FOV', 'CellLabel', 'TissueSource']

# Read RNA spots data and cell centers.  Merge dataframes

In [3]:
os.chdir(dataPath)

dfSpots = pd.read_pickle('01_all_RNA_positions.pkl')
dfCenters = pd.read_pickle('02_cell_centers.pkl')

In [4]:
# merge dataframes based on cell labels
dfAll = dfSpots.merge(dfCenters, how = 'outer')

# drop rows (pixels) if any nan.  These may be RNA spots w/o corresponding masks or masks w/o corresponding RNA spots
# markers = dfAll.drop(columns = idCols).columns.tolist()
dfAll.dropna(axis = 0, how = 'any', inplace = True)

In [5]:
dfAll

Unnamed: 0,EEF2,ACTB,SOX9,GAPDH,SPP1,IL8,IL6,CCL11,COL5A2,COL1A1,...,CXCR4,MKI67,NANOG,Y,X,FOV,CellLabel,TissueSource,CellY,CellX
29680,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,622.0,70.0,160.0,BM,425.115286,614.254096
29681,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,645.0,70.0,160.0,BM,425.115286,614.254096
29682,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,670.0,70.0,160.0,BM,425.115286,614.254096
29683,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,700.0,70.0,160.0,BM,425.115286,614.254096
29684,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,685.0,70.0,160.0,BM,425.115286,614.254096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3031028,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2138.0,1420.0,27.0,215.0,HCH,1035.150859,709.793703
3031029,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2141.0,1406.0,27.0,215.0,HCH,1035.150859,709.793703
3031030,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2142.0,1363.0,27.0,215.0,HCH,1035.150859,709.793703
3031031,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2142.0,1408.0,27.0,215.0,HCH,1035.150859,709.793703


# Compute distance to cell center for every spot

In [6]:
# distCenters = []
# for ii in trange(dfAll.shape[0]): # each row/spot
    
#     row = dfAll.iloc[ii, :]
#     v1 = row[['Y', 'X']].values
#     v2 = row[['CellY', 'CellX']].values
#     dist = np.linalg.norm( - )
    
#     distCenters.append(dist)

cuda = torch.device('cuda')     # Default CUDA device
v1 = torch.tensor(dfAll[['Y', 'X']].values, device = cuda)
v2 = torch.tensor(dfAll[['CellY', 'CellX']].values, device = cuda)

# use GPU for faster calculations
distCenters = torch.square(v1 - v2)
distCenters = torch.sum(distCenters, axis = 1)
distCenters = torch.sqrt(distCenters)
distCenters = distCenters.cpu() # bring back to CPU

In [7]:
# append to dataframe
dfAll['DistCenter'] = distCenters

In [8]:
# relabel tissue sources
dfAll['TissueSource'].replace(to_replace = {'BM': 'HBM', 'UC': 'HUC'}, inplace = True)

In [9]:
dfAll

Unnamed: 0,EEF2,ACTB,SOX9,GAPDH,SPP1,IL8,IL6,CCL11,COL5A2,COL1A1,...,MKI67,NANOG,Y,X,FOV,CellLabel,TissueSource,CellY,CellX,DistCenter
29680,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,622.0,70.0,160.0,HBM,425.115286,614.254096,424.186014
29681,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,645.0,70.0,160.0,HBM,425.115286,614.254096,425.228276
29682,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,670.0,70.0,160.0,HBM,425.115286,614.254096,427.763231
29683,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,700.0,70.0,160.0,HBM,425.115286,614.254096,432.696355
29684,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,685.0,70.0,160.0,HBM,425.115286,614.254096,428.988960
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3031028,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2138.0,1420.0,27.0,215.0,HCH,1035.150859,709.793703,1311.742815
3031029,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2141.0,1406.0,27.0,215.0,HCH,1035.150859,709.793703,1306.753814
3031030,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2142.0,1363.0,27.0,215.0,HCH,1035.150859,709.793703,1285.221182
3031031,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2142.0,1408.0,27.0,215.0,HCH,1035.150859,709.793703,1308.666135


# export dataframe

In [10]:
os.chdir(dataPath)

dfAll.to_pickle('03_spots_wt_distCenters.pkl')