In [80]:
import os
import numpy as np
import plyfile
from scout import cyto
from scout.utils import write_csv, read_csv

In [2]:
working_dir = '/data/datasets/organoid_phenotyping/d35_vs_d60/d35/20190419_14_35_07_AA_org1_488LP13_561LP120_642LP60/'

os.listdir(working_dir)

['syto.zarr',
 'Ex0_hist.csv',
 'Ex2_hist.csv',
 'nuclei_binary.zarr',
 'nuclei_probability.zarr',
 'nuclei_fluorescence',
 'segment_ventricles.tif',
 'org1_ventricles.gif',
 'mesh.obj',
 'Ex2_rescaled',
 'nuclei_morphologies.csv',
 'syto_down6x',
 'organoid_features.xlsx',
 'syto_down6x.tif',
 'niche_labels.npy',
 'Ex1_hist.csv',
 'Ex_2_Em_2_destriped_stitched',
 'cyto_profiles.npy',
 'nuclei_foreground.zarr',
 'celltype_names.csv',
 'centroids.npy',
 'nuclei_gating.npy',
 'cyto_profiles_sample.npy',
 'sox2.zarr',
 'niche_names.csv',
 'Ex_1_Em_1_destriped_stitched',
 'voxel_size.csv',
 'mesh_ventricles.pkl',
 'nuclei_segmentations.npz',
 'tbr1.zarr',
 'cyto_labels.npy',
 'metadata.txt',
 'cyto_names.csv',
 'niche_proximities.npy',
 'Ex0_rescaled',
 'segment_foreground.tif',
 'Ex_0_Em_0_destriped_stitched_master',
 'cyto_sample_index.npy',
 'Ex1_rescaled',
 'centroids_um.npy']

Load venctricle mesh and save OBJ

In [3]:
mesh = cyto.load_mesh(os.path.join(working_dir, 'mesh_ventricles.pkl'))
list(mesh.keys())

['verts', 'faces', 'normals', 'values']

In [13]:
cyto.write_obj(name=os.path.join(working_dir, 'mesh.obj'), one=True, **mesh)

File written 30%
File written 60%


Load nuclei points, subsample, and save OBJ

In [13]:
centroids_um = np.load(os.path.join(working_dir, 'centroids_um.npy'))
celltype_labels = np.load(os.path.join(working_dir, 'nuclei_gating.npy'))
centroids_um.shape, celltype_labels.shape

((2173528, 3), (2173528, 2))

In [15]:
# Option 1 - Sample same number in each

n_points = 10_000

# SOX2
centroids_sox2 = centroids_um[np.where(celltype_labels[:, 0])[0]]
idx = np.random.choice(np.arange(len(centroids_sox2)), n_points)
points_sox2 = centroids_sox2[idx]

# TBR1
centroids_tbr1 = centroids_um[np.where(celltype_labels[:, 1])[0]]
idx = np.random.choice(np.arange(len(centroids_tbr1)), n_points)
points_tbr1 = centroids_tbr1[idx]

# DN
loc = np.where(np.logical_and(celltype_labels[:, 0] == 0, 
                              celltype_labels[:, 1] == 0))[0]
centroids_dn = centroids_um[loc]
idx = np.random.choice(np.arange(len(centroids_dn)), n_points)
points_dn = centroids_dn[idx]

points_sox2.shape, points_tbr1.shape, points_dn.shape

((10000, 3), (10000, 3), (10000, 3))

In [17]:
# Option 2 - Sample overall number

n_points = 50_000

idx = np.random.choice(np.arange(len(centroids_um)), n_points)
centroids_sample = centroids_um[idx]
labels_sample = celltype_labels[idx]

points_sox2 = centroids_sample[np.where(labels_sample[:, 0])[0]]
points_tbr1 = centroids_sample[np.where(labels_sample[:, 1])[0]]
loc = np.where(np.logical_and(labels_sample[:, 0] == 0, 
                              labels_sample[:, 1] == 0))[0]
points_dn = centroids_sample[loc]

points_sox2.shape, points_tbr1.shape, points_dn.shape

((21266, 3), (8338, 3), (20765, 3))

In [18]:
cyto.write_point_cloud(os.path.join(working_dir, 'points_sox2_2.obj'), points_sox2)
cyto.write_point_cloud(os.path.join(working_dir, 'points_tbr1_2.obj'), points_tbr1)
cyto.write_point_cloud(os.path.join(working_dir, 'points_dn_2.obj'), points_dn)

Compute cyto labels for mesh faces

In [71]:
from sklearn.neighbors import NearestNeighbors
from tqdm import tqdm_notebook as tqdm
from scipy.stats import mode

In [54]:
verts = mesh['verts']
nbrs = NearestNeighbors().fit(verts)
nbrs

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=None, n_neighbors=5, p=2, radius=1.0)

In [68]:
cyto_labels = np.load(os.path.join(working_dir, 'cyto_labels.npy'))
cyto_labels.shape, verts.shape

((118650,), (118650, 3))

In [77]:
face_labels = []
for face_idx in tqdm(mesh['faces'], total=len(mesh['faces'])):
    face_centroid = verts[face_idx].mean(axis=0)
    dist, indices = nbrs.kneighbors(face_centroid[np.newaxis], n_neighbors=5)
    nbrs_labels = cyto_labels[indices[0]]
    face_labels.append(mode(nbrs_labels).mode[0])
face_labels = np.asarray(face_labels)
face_labels.shape

HBox(children=(IntProgress(value=0, max=236904), HTML(value='')))

(236904,)

In [78]:
face_labels.dtype, face_labels.max(axis=0)

(dtype('int64'), 5)

In [79]:
write_csv(os.path.join(working_dir, 'face_labels.csv'), face_labels)

Write our point clouds by niche

In [83]:
niche_names = read_csv(os.path.join(working_dir, 'niche_names.csv'))
niche_labels = np.load(os.path.join(working_dir, 'niche_labels.npy'))

niche_names, niche_labels.shape, len(niche_names) == len(np.unique(niche_labels))

(['DN', 'SOX2', 'TBR1', 'DP', 'MidTBR1', 'MidSOX2', 'MidInter'],
 (2173528,),
 True)

In [88]:
n_points = 200_000

idx = np.random.choice(np.arange(len(centroids_um)), n_points)
centroids_sample = centroids_um[idx]
labels_sample = niche_labels[idx]

In [96]:
basename = 'subpopulation'

for i, name in enumerate(niche_names):
    idx = np.where(labels_sample == i)[0]
    print(f'{len(idx)} \t({len(idx) / n_points * 100:.1f}%)\t cells in {name} subpopulation')
    
    centroids_subpop = centroids_sample[idx]
    
    filename = f'{basename}_{name}.csv'
    path = os.path.join(working_dir, )
    cyto.write_point_cloud(os.path.join(working_dir, 'points_sox2_2.obj'), points_sox2)

22637 	(11.3%)	 cells in DN subpopulation
83177 	(41.6%)	 cells in SOX2 subpopulation
31278 	(15.6%)	 cells in TBR1 subpopulation
1489 	(0.7%)	 cells in DP subpopulation
20239 	(10.1%)	 cells in MidTBR1 subpopulation
15675 	(7.8%)	 cells in MidSOX2 subpopulation
25505 	(12.8%)	 cells in MidInter subpopulation
