# chunk_predict
download chunks
overlap is inevitable missing cell bodies is much more dangerous

In [1]:
# libraries
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import sys
import os
import pandas as pd
import csv
from tqdm import tqdm

from cloudvolume import CloudVolume, view
import cc3d
from tifffile.tifffile import imwrite

In [2]:
%pwd
sys.path.append(os.path.abspath("../segmentation"))
# to import rootID_lookup and authentication_utils

import rootID_lookup as IDlook
import authentication_utils as auth

In [3]:
# setting
cv = CloudVolume(auth.get_cv_path('Image')['url'], use_https=True, agglomerate=False)
# loading from US server to use mip0

In [4]:
# make grid to download chunked volumes
[X,Y,Z]=cv.mip_volume_size(0)
# [ 83968 223232   4390]
# print(cv.mip_volume_size(0))
# print(cv.mip_chunk_size(0))


step_xy = 128*2**4 # width of each chunk = x or y space between each chunk center in mip0
step_z = 256 # depth of each chunk = z space between each chunk center in mip0

start_xy = 128*2**(4-1) # first chunk center
start_z = 256*2**(-1) # first chunk center

centerX = np.arange(start_xy, X, step_xy)
centerY = np.arange(start_xy, Y, step_xy)
centerZ = np.arange(start_z, Z, step_z)

# looks okay but there can be only a few space < step/2 at the end of these sequences, causing error when making chunks
if (X - centerX[-1]) < start_xy:
    np.put(centerX, -1, X-start_xy)
else:
    centerX = np.append(centerX, X-start_xy)

if (Y - centerY[-1]) < start_xy:
    np.put(centerY, -1, Y-start_xy)
else:
    centerY = np.append(centerY, Y-start_xy)

if (Z - centerZ[-1]) < start_z:
    np.put(centerZ, -1, Z-start_z)
else:
    centerZ = np.append(centerZ, Z-start_z)

# make nx3 arrays of the chunk center coordinates
chunk_center = np.array(np.meshgrid(centerX, centerY, centerZ), dtype='uint32').T.reshape(-1,3)
len(chunk_center)

83160

In [5]:
nuclei_cv = CloudVolume(
    auth.get_cv_path('nuclei_map')['url'],
    progress=False,
    cache=True, # cache to disk to avoid repeated downloads
    use_https=True
)

seg = CloudVolume(auth.get_cv_path('FANC_production_segmentation')['url'], use_https=True, agglomerate=False, cache=True)

In [6]:
output=[]

x_thres = 33-10 # 50/(4.3*2^4/45) = 50/1.53
y_thres = 33-10
z_thres = 50-10

# thresholding cell body size
def mybbox(img):

    x = np.any(img, axis=(1, 2))
    y = np.any(img, axis=(0, 2))
    z = np.any(img, axis=(0, 1))

    xmin, xmax = np.where(x)[0][[0, -1]]
    ymin, ymax = np.where(y)[0][[0, -1]]
    zmin, zmax = np.where(z)[0][[0, -1]]

    return xmin, xmax, ymin, ymax, zmin, zmax

In [7]:
# for loop
# here we only use one chunk for testing
# candidate: 39644, 35021
# set in order zxy
# 35021 codes (50176, 84992,  1920)

nuclei = nuclei_cv.download_point(chunk_center[35021], mip=[68.8,68.8,45.0], size=(128, 128, 256) ) # mip0 and 4 only
# using mio4 to make it faster
# 4.3*(2**4)*128/45=196

In [None]:
# view(nuclei)

In [None]:
# nuclei_cv.cache.flush()

In [8]:
# cell body detection
# thresholding intensity
mask_temp = nuclei[:,:,:]
mask = np.where(mask_temp > 0.5, 1, 0)  

# print(mask.shape) 
# (128, 128, 256, 1)
mask_s = np.squeeze(mask)
# print(mask_s.shape) 
# (128, 128, 256)

In [None]:
# save images
# volume = mask_s.swapaxes(0, 2).astype('float32')
# imwrite('../Output/mask_s.tif', volume, imagej=True)

# check EM dataset
""" em_cv = CloudVolume(
    auth.get_cv_path('Image_Tokyo')['url'],
    progress=False,
    cache=True, # cache to disk to avoid repeated downloads
    use_https=True
)
em = em_cv.download_point(chunk_center[35021], mip=4, size=(128, 128, 256) )
view(em) """

In [None]:
# nuclei_cv.cache.flush()

In [9]:
# estimate connected components
# # 26-connectivity
# https://en.wikipedia.org/wiki/Pixel_connectivity
# https://github.com/seung-lab/connected-components-3d
cc_out, N = cc3d.connected_components(mask_s, return_N=True, connectivity=26) # free
# print(N)
# 12

only 10 but detecting 2 more

In [None]:
# thresholding cell body size

In [10]:
list=[]
for segid in range(1, N+1):
  extracted_image = cc_out * (cc_out == segid)
  bbox = mybbox(extracted_image)
  list.append(bbox)

print(list)

[(0, 55, 0, 14, 0, 45), (26, 113, 23, 111, 0, 38), (125, 126, 104, 104, 0, 1), (81, 127, 61, 125, 36, 140), (11, 88, 89, 127, 65, 167), (22, 116, 0, 65, 106, 236), (113, 127, 38, 81, 111, 183), (98, 127, 110, 127, 163, 246), (31, 105, 78, 127, 197, 255), (28, 57, 62, 73, 238, 245), (124, 127, 20, 43, 247, 255), (23, 49, 0, 4, 252, 255)]


In [11]:
list2=[]
for segid in range(0, N):
  xwidth = list[segid][1] - list[segid][0]
  ywidth = list[segid][3] - list[segid][2]
  zwidth = list[segid][5] - list[segid][4]
  if xwidth >= x_thres and ywidth >= y_thres and zwidth >= z_thres:
    center = ((list[segid][1] + list[segid][0])/2,
      (list[segid][3] + list[segid][2])/2,
      (list[segid][5] + list[segid][4])/2)
    list2.append(center)
  else:
    pass

print(list2)

[(104.0, 93.0, 88.0), (49.5, 108.0, 116.0), (69.0, 32.5, 171.0), (68.0, 102.5, 226.0)]


In [12]:
# calculate center coordinates of cell bodies
# assume bbox provides origin of the dataset...
origin = nuclei.bounds.minpt # 3072,5248,1792
cell_body_coordinates_mip4 = np.add(np.array(list2), origin)
cell_body_coordinates = cell_body_coordinates_mip4
cell_body_coordinates[:,0]  = (cell_body_coordinates_mip4[:,0] * 2**4)
cell_body_coordinates[:,1]  = (cell_body_coordinates_mip4[:,1] * 2**4)
cell_body_coordinates = cell_body_coordinates.astype('uint32')
print(cell_body_coordinates)

[[50816 85456  1880]
 [49944 85696  1908]
 [50256 84488  1963]
 [50240 85608  2018]]


In [13]:
# Lets get IDs using cell_body_coordinates
cell_body_IDs = IDlook.segIDs_from_pts_cv(pts=cell_body_coordinates, cv=seg) #mip0

Segmentation IDs: 100%|██████████| 4/4 [00:00<00:00,  5.37it/s]


In [16]:
# save
# type(cell_body_coordinates.shape)
cell_body_IDs_list = cell_body_IDs.tolist()
output.append(cell_body_IDs_list)
# df = pd.DataFrame({'cell_body_coordinates':cell_body_coordinates, 'cell_body_IDs':cell_body_IDs})
# print(df)

In [17]:
output

[[648518346490989503,
  648518346490988223,
  648518346492077650,
  648518346505454978],
 [648518346490989503,
  648518346490988223,
  648518346492077650,
  648518346505454978]]

In [19]:
sum = sum(output,[])

In [20]:
output_s = set(sum)
# output_s

In [37]:
output_str = [str(n) for n in output_s]
output_2D = np.array(output_str ).reshape(len(output_str ),1).tolist()
print(output_2D)

[['648518346490988223'], ['648518346492077650'], ['648518346505454978'], ['648518346490989503']]


In [39]:
with open('./output.csv', 'w') as result:
    writer = csv.writer(result)
    writer.writerows(output_2D)

In [44]:
Y

4

In [42]:
chunk_center[1]
nuclei_cv.mip_volume_size(0)
# [ 83968 223232   4390]
# why different

Vec(86016,225776,4390, dtype=int64)

In [47]:
nuclei_cv.exists(chunk_center[35021], mip=[68.8,68.8,45.0], size=(128, 128, 256) )

TypeError: exists() got an unexpected keyword argument 'mip'

In [52]:
nuclei_cv = CloudVolume(
    auth.get_cv_path('nuclei_map')['url'],
    progress=False,
    cache=True, # cache to disk to avoid repeated downloads
    use_https=True,
    autocrop=True,
    bounded=False
)

# for i in range(len(chunk_center)):
i=1
nucleit = nuclei_cv.download_point(chunk_center[i], mip=[68.8,68.8,45.0], size=(128, 128, 256))

In [53]:
nucleit.bounds.minpt

Vec(0,128,10, dtype=int32)

In [None]:
test = seg.download_point(segids=[648518346490989503], mip=0) 


In [None]:
cell body seg id overlap
cell body and neuron id overlap

In [None]:
nuclei_cv.cache.flush()