## Mapping Nucleus to Soma Segment IDs

In [None]:
# Path to our full segmentation.  In this data, we expect nuclei to be
# labeled separately from the cell soma that contains them; but the rest
# of the cell should be all one segment ID.
seg_path = "gs://zheng_mouse_hippocampus_scratch_30/make_cv_happy/seg/v0.2-8nm-updown3x-m0.01_18-18-45_20240525072804"
seg_resolution = (36, 36, 45)   # actually, index resolution

# Path to the CSV file containing nucleus centroids.
nucleus_csv_path = "/home/joe/zheng-mouse-hippo/nucleus-centroids-filtered.csv"
nucleus_resolution = (384, 384, 45)

In [None]:
from caveclient import CAVEclient
from zetta_utils.layer.volumetric.cloudvol import build_cv_layer
from zetta_utils.geometry import Vec3D
import cc3d
import numpy as np
import zetta_utils.tensor_ops.convert as convert
import pandas as pd
from scipy import ndimage

In [None]:
seg_resolution = Vec3D(*seg_resolution)
nucleus_resolution = Vec3D(*nucleus_resolution)

In [None]:
# Load the CSV data into a Pandas DataFrame
df = pd.read_csv(nucleus_csv_path)

# Display the first few rows of the DataFrame to check the data
print(df.head())


In [None]:
# load cloud volume
data_resolution = Vec3D(36, 36, 45)
cvl = build_cv_layer(path=seg_path,
                     allow_slice_rounding=True,
                     index_resolution=seg_resolution,
                     data_resolution=data_resolution,
                     interpolation_mode='nearest',
                    )

In [None]:
def findSegAtPoint(pos):
    # define a smallish bounding box around the points
    bmin = pos - Vec3D(256,256,64)
    bmax = pos + Vec3D(256,256,64)
    global cutout
    cutout = cvl[seg_resolution, bmin[0]:bmax[0], bmin[1]:bmax[1], bmin[2]:bmax[2]]
    cutout = cutout[0]  # (use only channel 0)
    i = np.round(pos - bmin).astype(int)
    return cutout[i[0], i[1], i[2]]

In [None]:
def print_mask_bounds(nuc_mask):
    # As a sanity check, let's check the bounds of our nucleus.  It should not hit the edge of our cutout.
    indices = np.where(nuc_mask == 1)
    if len(indices[0]) == 0:
        print('NO NUCLEUS INDICES FOUND')
        return
    print(indices)
    # Get the range of "1" values in each dimension
    x_min, x_max = indices[0].min(), indices[0].max()
    y_min, y_max = indices[1].min(), indices[1].max()
    z_min, z_max = indices[2].min(), indices[2].max()
    
    print(f"x range: {x_min} to {x_max}; y range: {y_min} to {y_max}; z range: {z_min} to {z_max}")

In [None]:
def get_ids_in_mask(mask, id_space, exclude_list=(0)):
    id_list = id_space[mask]
    id_list = id_list[~np.isin(id_list, exclude_list)]
    vals, counts = np.unique(id_list, return_counts=True)
    return vals, counts

def find_soma_id(nucleus_id):
    # find all the nucleus voxels
    nuc_mask = np.array(cutout == nucleus_id)
    print_mask_bounds(nuc_mask)
    # enlarge 1 step, and (separately) 3 more steps
    nuc_mask_enlarg1 = ndimage.binary_dilation(nuc_mask, iterations=1)
    nuc_mask_enlarg3 = ndimage.binary_dilation(nuc_mask_enlarg1, iterations=2)
    # find the thin shell by XORing out the interior from the fully inflated
    border_nuc_mask = np.logical_xor(nuc_mask_enlarg1, nuc_mask_enlarg3)
    print(f'Generated nuclear border of {np.sum(border_nuc_mask)} voxels')

    for neighbor_id in get_ids_in_mask(border_nuc_mask, cutout, (id,))[0]:
        if neighbor_id > 0:
            return neighbor_id

In [None]:
results = []  # list of (nucleus_id, cell_id) pairs
for index, row in df.iterrows():
    print(f'{index}: {[row.x, row.y, row.z]}...')
    pos_in_nuc_coords = Vec3D(row.x, row.y, row.z)
    pos_in_seg_coords = pos_in_nuc_coords * nucleus_resolution / seg_resolution
    nucleus_id = findSegAtPoint(pos_in_seg_coords)
    soma_id = find_soma_id(nucleus_id)
    results.append((nucleus_id, soma_id))
    print(f'Nucleus {nucleus_id} is in cell {soma_id}')

In [None]:
results

In [None]:
# Create a DataFrame from the list of tuples
df_out = pd.DataFrame(results, columns=['neuron_id', 'soma_id'])

# Save the DataFrame to a CSV file
output_csv_path = '/home/joe/zheng-mouse-hippo/nucleus-soma-v2.csvs'
df_out.to_csv(output_csv_path, index=False)

print(f"Data saved to {output_csv_path}")

In [None]:
# Group by 'soma_id' and filter groups with more than one 'neuron_id'
grouped = df_out.groupby('soma_id').filter(lambda x: len(x) > 1).sort_values(by='soma_id')

# Print the results
print(grouped)


In [None]:
len(df)