In [65]:
import os
import numpy as np
import plyfile
from scout import cyto
from scout.utils import write_csv, read_csv

In [78]:
working_dir = '/data/datasets/organoid_phenotyping/analysis/zika_vs_mock/Zika/20190625_14_48_13_AA_ef24-zika1_488LP15_561LP140_642LP50/'

os.listdir(working_dir)

['face_labels.csv',
 'organoid_features.xlsx',
 'dataset',
 'individual_ventricle_cyto.xlsx',
 'cyto_labels.npy',
 'cyto_names.csv',
 'individual_ventricle_cellfreq.xlsx']

Load venctricle mesh and save OBJ

In [79]:
mesh = cyto.load_mesh(os.path.join(working_dir, 'dataset/mesh_ventricles.pkl'))
list(mesh.keys())

['verts', 'faces', 'normals', 'values']

In [80]:
cyto.write_obj(name=os.path.join(working_dir, 'dataset/mesh.obj'), one=True, **mesh)

File written 30%
File written 60%


Load nuclei points, subsample, and save OBJ

In [68]:
centroids_um = np.load(os.path.join(working_dir, 'dataset/centroids_um.npy'))
celltype_labels = np.load(os.path.join(working_dir, 'dataset/nuclei_gating.npy'))
centroids_um.shape, celltype_labels.shape

((469702, 3), (469702, 2))

In [15]:
# Option 1 - Sample same number in each

n_points = 10_000

# SOX2
centroids_sox2 = centroids_um[np.where(celltype_labels[:, 0])[0]]
idx = np.random.choice(np.arange(len(centroids_sox2)), n_points)
points_sox2 = centroids_sox2[idx]

# TBR1
centroids_tbr1 = centroids_um[np.where(celltype_labels[:, 1])[0]]
idx = np.random.choice(np.arange(len(centroids_tbr1)), n_points)
points_tbr1 = centroids_tbr1[idx]

# DN
loc = np.where(np.logical_and(celltype_labels[:, 0] == 0, 
                              celltype_labels[:, 1] == 0))[0]
centroids_dn = centroids_um[loc]
idx = np.random.choice(np.arange(len(centroids_dn)), n_points)
points_dn = centroids_dn[idx]

points_sox2.shape, points_tbr1.shape, points_dn.shape

((10000, 3), (10000, 3), (10000, 3))

In [69]:
# Option 2 - Sample overall number

n_points = 50_000

idx = np.random.choice(np.arange(len(centroids_um)), n_points)
centroids_sample = centroids_um[idx]
labels_sample = celltype_labels[idx]

points_sox2 = centroids_sample[np.where(labels_sample[:, 0])[0]]
points_tbr1 = centroids_sample[np.where(labels_sample[:, 1])[0]]
loc = np.where(np.logical_and(labels_sample[:, 0] == 0, 
                              labels_sample[:, 1] == 0))[0]
points_dn = centroids_sample[loc]

points_sox2.shape, points_tbr1.shape, points_dn.shape

((11450, 3), (287, 3), (38335, 3))

In [36]:
cyto.write_point_cloud(os.path.join(working_dir, 'dataset/points_sox2.obj'), points_sox2)
cyto.write_point_cloud(os.path.join(working_dir, 'dataset/points_tbr1.obj'), points_tbr1)
cyto.write_point_cloud(os.path.join(working_dir, 'dataset/points_dn.obj'), points_dn)

Compute cyto labels for mesh faces

In [81]:
from sklearn.neighbors import NearestNeighbors
from tqdm import tqdm_notebook as tqdm
from scipy.stats import mode

In [82]:
cyto_names = read_csv(os.path.join(working_dir, 'cyto_names.csv'))
cyto_names

['Surface-TBR1', 'Surface-DN', 'DN', 'Artifacts', 'Adjacent']

In [83]:
verts = mesh['verts']
nbrs = NearestNeighbors().fit(verts)
nbrs

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=None, n_neighbors=5, p=2, radius=1.0)

In [84]:
cyto_labels = np.load(os.path.join(working_dir, 'cyto_labels.npy'))
cyto_labels.shape, verts.shape, np.unique(cyto_labels)

((127829,), (127829, 3), array([0, 1, 2, 3, 4]))

In [85]:
face_labels = []
for face_idx in tqdm(mesh['faces'], total=len(mesh['faces'])):
    face_centroid = verts[face_idx].mean(axis=0)
    dist, indices = nbrs.kneighbors(face_centroid[np.newaxis], n_neighbors=5)
    nbrs_labels = cyto_labels[indices[0]]
    face_labels.append(mode(nbrs_labels).mode[0])
face_labels = np.asarray(face_labels)
face_labels.shape

HBox(children=(IntProgress(value=0, max=254842), HTML(value='')))




(254842,)

In [88]:
face_labels.dtype, face_labels.max(axis=0), np.unique(face_labels)

(dtype('int64'), 4, array([0, 1, 2, 3, 4]))

In [87]:
write_csv(os.path.join(working_dir, 'face_labels.csv'), face_labels)

Write our point clouds by niche

In [83]:
niche_names = read_csv(os.path.join(working_dir, 'niche_names.csv'))
niche_labels = np.load(os.path.join(working_dir, 'niche_labels.npy'))

niche_names, niche_labels.shape, len(niche_names) == len(np.unique(niche_labels))

(['DN', 'SOX2', 'TBR1', 'DP', 'MidTBR1', 'MidSOX2', 'MidInter'],
 (2173528,),
 True)

In [88]:
n_points = 200_000

idx = np.random.choice(np.arange(len(centroids_um)), n_points)
centroids_sample = centroids_um[idx]
labels_sample = niche_labels[idx]

In [100]:
basename = 'subpopulation'

for i, name in enumerate(niche_names):
    idx = np.where(labels_sample == i)[0]
    centroids_subpop = centroids_sample[idx]
    filename = f'{basename}_{name}.obj'
    path = os.path.join(working_dir, filename)
    cyto.write_point_cloud(path, centroids_subpop)
    print(f'Saved {len(idx)} ({len(idx) / n_points * 100:.1f}%) cells to {filename}')

Saved 22637 (11.3%) cells to subpopulation_DN.obj
Saved 83177 (41.6%) cells to subpopulation_SOX2.obj
Saved 31278 (15.6%) cells to subpopulation_TBR1.obj
Saved 1489 (0.7%) cells to subpopulation_DP.obj
Saved 20239 (10.1%) cells to subpopulation_MidTBR1.obj
Saved 15675 (7.8%) cells to subpopulation_MidSOX2.obj
Saved 25505 (12.8%) cells to subpopulation_MidInter.obj
