### Notebook to produce the closest spots for any given cell

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import json

In [2]:
spots = pd.read_csv('detected_transcripts.csv')
spots

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,barcode_id,global_x,global_y,global_z,x,y,fov,gene
0,0,45081,7,552.92620,10716.962,2.0,1935.27670,1013.29120,1034,Adamts19
1,1,53581,8,540.67395,10629.979,2.0,1821.83010,207.88884,1034,Adamtsl5
2,2,55122,8,552.49493,10685.430,3.0,1931.28340,721.33130,1034,Adamtsl5
3,3,57192,8,539.08124,10630.553,5.0,1807.08280,213.21092,1034,Adamtsl5
4,4,57677,8,550.93570,10714.046,5.0,1916.84640,986.29480,1034,Adamtsl5
...,...,...,...,...,...,...,...,...,...,...
75109419,75109419,2095870,313,9838.65200,9140.547,5.0,169.73628,219.52689,2202,Pdzrn3
75109420,75109420,2095885,313,9839.31500,9152.776,5.0,175.87277,332.76532,2202,Pdzrn3
75109421,75109421,2096016,313,9842.59800,9131.945,6.0,206.26532,139.88422,2202,Pdzrn3
75109422,75109422,2096029,313,9839.06800,9153.387,6.0,173.58246,338.41754,2202,Pdzrn3


In [3]:
x_mean = spots.global_x.mean()
y_mean = spots.global_y.mean()
z_mean = spots.global_z.mean()

In [4]:
spots.global_x = spots.global_x - x_mean
spots.global_y = spots.global_y - y_mean
spots.global_z = spots.global_z - z_mean
spots

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,barcode_id,global_x,global_y,global_z,x,y,fov,gene
0,0,45081,7,-4625.761077,1306.732691,-0.715491,1935.27670,1013.29120,1034,Adamts19
1,1,53581,8,-4638.013327,1219.749691,-0.715491,1821.83010,207.88884,1034,Adamtsl5
2,2,55122,8,-4626.192347,1275.200691,0.284509,1931.28340,721.33130,1034,Adamtsl5
3,3,57192,8,-4639.606037,1220.323691,2.284509,1807.08280,213.21092,1034,Adamtsl5
4,4,57677,8,-4627.751577,1303.816691,2.284509,1916.84640,986.29480,1034,Adamtsl5
...,...,...,...,...,...,...,...,...,...,...
75109419,75109419,2095870,313,4659.964723,-269.682309,2.284509,169.73628,219.52689,2202,Pdzrn3
75109420,75109420,2095885,313,4660.627723,-257.453309,2.284509,175.87277,332.76532,2202,Pdzrn3
75109421,75109421,2096016,313,4663.910723,-278.284309,3.284509,206.26532,139.88422,2202,Pdzrn3
75109422,75109422,2096029,313,4660.380723,-256.842309,3.284509,173.58246,338.41754,2202,Pdzrn3


In [5]:
cell_metadata = pd.read_csv('cell_metadata.csv')
cell_metadata['center_z'] = 0

In [6]:
centroids = cell_metadata[['center_x', 'center_y', 'center_z']].copy()
centroids.center_x = centroids.center_x - x_mean
centroids.center_y = centroids.center_y - y_mean

In [7]:
centroids

Unnamed: 0,center_x,center_y,center_z
0,-4623.522981,1306.431061,0
1,-4626.222982,1292.877061,0
2,-4621.794981,1310.697061,0
3,-4636.428982,1203.998459,0
4,-4638.102982,1200.483058,0
...,...,...,...
124170,4646.175412,-215.253416,0
124171,4643.799412,-236.259417,0
124172,4644.447412,-228.753417,0
124173,4808.764016,-215.145416,0


In [8]:
top_row = pd.Series({'center_x': np.finfo(np.float32).min,
                     'center_y': np.finfo(np.float32).min,
                     'center_z': np.finfo(np.float32).min
                    })

# top row will correspond to cell label=0, ie background
centroids = pd.concat([top_row.to_frame().T, centroids], ignore_index=True)
centroids

Unnamed: 0,center_x,center_y,center_z
0,-3.402823e+38,-3.402823e+38,-3.402823e+38
1,-4.623523e+03,1.306431e+03,0.000000e+00
2,-4.626223e+03,1.292877e+03,0.000000e+00
3,-4.621795e+03,1.310697e+03,0.000000e+00
4,-4.636429e+03,1.203998e+03,0.000000e+00
...,...,...,...
124171,4.646175e+03,-2.152534e+02,0.000000e+00
124172,4.643799e+03,-2.362594e+02,0.000000e+00
124173,4.644447e+03,-2.287534e+02,0.000000e+00
124174,4.808764e+03,-2.151454e+02,0.000000e+00


In [9]:
centroids['label'] = np.arange(centroids.shape[0])
centroids

Unnamed: 0,center_x,center_y,center_z,label
0,-3.402823e+38,-3.402823e+38,-3.402823e+38,0
1,-4.623523e+03,1.306431e+03,0.000000e+00,1
2,-4.626223e+03,1.292877e+03,0.000000e+00,2
3,-4.621795e+03,1.310697e+03,0.000000e+00,3
4,-4.636429e+03,1.203998e+03,0.000000e+00,4
...,...,...,...,...
124171,4.646175e+03,-2.152534e+02,0.000000e+00,124171
124172,4.643799e+03,-2.362594e+02,0.000000e+00,124172
124173,4.644447e+03,-2.287534e+02,0.000000e+00,124173
124174,4.808764e+03,-2.151454e+02,0.000000e+00,124174


In [10]:
# # just truncate the data to do some testing
# spot_mask = (spots.global_x > -100) & (spots.global_x < 100) & (spots.global_y > -100) & (spots.global_y < 100)
# spots_min = spots[spot_mask]
# spots_min.shape

In [11]:
nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(centroids[['center_x', 'center_y', 'center_z']].values)
    

In [12]:
dist, cell_labels = nbrs.kneighbors(spots[['global_x', 'global_y', 'global_z']].values)

In [13]:
TARGET_CELL = 61569

In [14]:
mask = (cell_labels==TARGET_CELL) & (dist < 10)

In [16]:
df = spots.iloc[mask].copy()
df

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,barcode_id,global_x,global_y,global_z,x,y,fov,gene
35583960,35583960,20870,2,-10.296677,5.518691,0.284509,1783.4678,507.66583,1590,9630002D21Rik
35583965,35583965,23141,2,-3.975277,4.942691,1.284509,1842.0000,502.33722,1590,9630002D21Rik
35584046,35584046,57434,7,-11.362277,6.110691,0.284509,1773.6052,513.14746,1590,Adamts19
35584047,35584047,57441,7,-7.862577,7.092691,0.284509,1806.0062,522.24225,1590,Adamts19
35584149,35584149,87991,10,-12.860577,5.100691,-2.715491,1759.7285,503.79788,1590,Adcyap1
...,...,...,...,...,...,...,...,...,...,...
35664807,35664807,2225953,307,-2.213677,9.193691,-1.715491,1858.3118,541.69550,1590,Kcna1
35664991,35664991,2227756,307,-7.252677,9.147691,2.284509,1811.6530,541.27094,1590,Kcna1
35665240,35665240,2233642,310,1.793723,4.340691,-1.715491,1895.4193,496.76288,1590,Npy5r
35665258,35665258,2234160,310,-0.133277,9.511691,-0.715491,1877.5773,544.63904,1590,Npy5r


In [18]:
spots.iloc[mask].mean(axis=0)

  spots.iloc[mask].mean(axis=0)


Unnamed: 0.1    3.561784e+07
Unnamed: 0      1.060604e+06
barcode_id      1.449937e+02
global_x       -4.762854e+00
global_y        8.022383e+00
global_z        1.288137e-01
x               1.834709e+03
y               5.308497e+02
fov             1.590000e+03
dtype: float64

In [17]:
centroids[centroids.label==TARGET_CELL]

Unnamed: 0,center_x,center_y,center_z,label
61569,-4.947393,7.768615,0.0,61569


In [25]:
df_out = df[['global_x', 'global_y', 'global_z']]
df_out.columns=['x', 'y', 'z']
df_out.to_json('61569.json', orient='records')

In [26]:
np.save('dist.npy', dist)
np.save('cell_labels.npy', cell_labels)

Unnamed: 0,x,y,z
35583960,-10.296677,5.518691,0.284509
35583965,-3.975277,4.942691,1.284509
35584046,-11.362277,6.110691,0.284509
35584047,-7.862577,7.092691,0.284509
35584149,-12.860577,5.100691,-2.715491
...,...,...,...
35664807,-2.213677,9.193691,-1.715491
35664991,-7.252677,9.147691,2.284509
35665240,1.793723,4.340691,-1.715491
35665258,-0.133277,9.511691,-0.715491
