In [1]:
import sys
from collections.abc import Iterable
from agents import data_loader
from caveclient import CAVEclient
from cloudvolume import CloudVolume, VolumeCutout
import numpy as np
from tqdm import tqdm
from orphan_extension.utils.cast_to_bounds import cast_points_within_bounds
from tip_finding import tip_finding

In [2]:

class OrphanError(Exception):
    pass


In [3]:

class Orphans:
    def __init__(self, x_min, x_max, y_min, y_max, z_min, z_max):
        self.x_min = x_min
        self.x_max = x_max
        self.y_min = y_min
        self.y_max = y_max
        self.z_min = z_min
        self.z_max = z_max

    # Gets all the seg ids within a given subvolume and organizes by size of process. Returns list of tuples: (seg_id, size)
    def get_unique_seg_ids_em(self, coords=None) -> list:
        if (coords != None and len(coords) != 6):  # CHANGE THE ERROR THROWN!
            raise OrphanError("get_unqiue_seg_ids_em need 6 coordinates!!")
        if (coords != None):
            x_min = coords[0]
            x_max = coords[1]
            y_min = coords[2]
            y_max = coords[3]
            z_min = coords[4]
            z_max = coords[5]
        else:
            x_min = self.x_min
            x_max = self.x_max
            y_min = self.y_min
            y_max = self.y_max
            z_min = self.z_min
            z_max = self.z_max
        # Get entire EM data - uncomment after testing
        # em = CloudVolume('s3://bossdb-open-data/iarpa_microns/minnie/minnie65/seg', use_https=True, mip=0, parallel=True, fill_missing=True, progress=True)

        # Get seg ids in the specified subvolume
        seg_ids_sv = data_loader.get_seg(x_min, x_max, y_min,
                                         y_max, z_min, z_max)

        # Get rid of the 4th dimension since its magnitude is 1
        seg_ids_sv = np.squeeze(seg_ids_sv)

        # List of all unique seg ids in the 3d subvolume
        unique_seg_ids_sv = np.unique(seg_ids_sv)

        # Removing the first element (artifacts) of unique_seg_ids_sv
        """
        Drop all zeros in array instead of only first, handles edge cases
        unique_seg_ids_sv = np.delete(unique_seg_ids_sv, 0)
        """

        unique_seg_ids_sv = unique_seg_ids_sv[unique_seg_ids_sv != 0]
        unique_seg_ids_sv = unique_seg_ids_sv[unique_seg_ids_sv != None]

        # Organizing seg ids in subvolume by size
        seg_ids_by_size = {}
        for seg_id in (pbar := tqdm(unique_seg_ids_sv)):
            pbar.set_description('Organizing seg_ids by size')
            # seg_ids_by_size[seg_id] = int(em[em == seg_id].sum()) # Uncomment after testing to organize seg ids by size considering whole data
            seg_ids_by_size[seg_id] = [int(
                np.sum(seg_ids_sv == seg_id))]

        seg_ids_by_size = sorted(seg_ids_by_size.items(),
                                 key=lambda x: x[1], reverse=True)
        # Returns a list of tuples with first element as seg id, second elmenet of tuple is a list containing size
        return seg_ids_by_size  # Sorted in descending order

    # Get the list of orphans within a given subvolume organized by largest orphan in subvolume first
    def get_orphans(self, coords=None) -> dict:

        unique_seg_ids = self.get_unique_seg_ids_em(coords)

        # Getting all the orphans
        orphans = {}

        for seg_id_and_size in (pbar := tqdm(unique_seg_ids)):
            pbar.set_description('Finding orphans')
            seg_id = seg_id_and_size[0]
            if (data_loader.get_num_soma(str(seg_id)) == 0):
                orphans[seg_id] = [seg_id_and_size[1][0]]

        return orphans  # dict of seg_ids that are orphans in given subvolume

    # Input: processes is a dictionary with key = seg_id, value = list of attributes
    # Returns: updated processes so that value also includes the type of the process
    def get_process_type(self, processes: dict) -> dict:
        for seg_id, attributes in (pbar := tqdm(processes.items())):
            pbar.set_description('Finding process type')
            num_pre_synapses, num_post_synapses = data_loader.get_syn_counts(
                str(seg_id))
            if (num_pre_synapses > num_post_synapses):
                attributes.append('axon')
            elif (num_post_synapses > num_pre_synapses):
                attributes.append('dendrite')
            else:
                attributes.append('unconfirmed')

        return processes

    def get_pot_extensions(self, endpoint_coords):

        if (len(endpoint_coords) != 3):
            # FIX THIS - SHOULD BE DIFF TYPE OF ERROR
            raise OrphanError(
                "get_pot_extension needs all 3 coordinates of endpoint to extend!")

        # Get the coordinates of the bounding box around the endpoint
        endpoint_bounding_box_coords = bounding_box_coords(endpoint_coords)

        # Get a preliminary list of all seg ids within bounding box
        # pot_ex = self.get_unique_seg_ids_em(*endpoint_bounding_box_coords)
        pot_ex = self.get_unique_seg_ids_em(endpoint_bounding_box_coords)

        # Get seg id of current fragment
        curr_process_seg_id = data_loader.get_seg(
            endpoint_coords[0], endpoint_coords[0], endpoint_coords[1], endpoint_coords[1], endpoint_coords[2], endpoint_coords[2])

        # Get type of all processes
        pot_ex = dict(pot_ex)
        pot_ex = self.get_process_type(pot_ex)

        # Get type of current process
        curr_process_type = pot_ex[curr_process_seg_id][1]

        # Remove current seg id from the list of potential extensions - REWORK TO USE DELETE
        # pot_ex = pot_ex[str(pot_ex) != str(curr_process_seg_id)]
        del pot_ex[curr_process_seg_id]

        # Filter out all other processes whose type!= current process type
        pot_ex = remove_diff_types(curr_process_type, pot_ex)

        return pot_ex  # Return all potential extensions after removing confirmed other types


def bounding_box_coords(point: Iterable, boxdim: Iterable = [100, 100, 100]) -> list:
    # Data bounds not validated
    data_bounds = [26000, 220608, 30304, 161376, 14825, 27881]

    # Confirm that entry is 3dim
    if len(point) != 3:
        raise OrphanError(
            "Point passed to func bounding_box_coords() must be an iterable of length 3.")
    if len(boxdim) != 3:
        raise OrphanError(
            "Box dimensions passed to func bounding_box_coords() must be 3 dimensional")

    # Check bound validity and cast to new bounds
    casted_bounds = cast_points_within_bounds(point, data_bounds, boxdim)

    return casted_bounds


def remove_diff_types(process_type, pot_ex):
    for seg_id, attributes in pot_ex.items():
        if (process_type not in attributes or "unconfirmed" not in attributes):
            del pot_ex[seg_id]

    return pot_ex


In [4]:
bounds = bounding_box_coords([115267, 91839, 21305])
print(bounds)
orphanclass = Orphans(bounds[0], bounds[1],
                      bounds[2], bounds[3], bounds[4], bounds[5])
orphans = orphanclass.get_orphans()
print("Number of orphans:", len(orphans))
orphanclass.get_process_type(orphans)
print("Orphans", orphans)


[115167, 115367, 91739, 91939, 21205, 21405]


Shard Indices: 100%|██████████| 2/2 [00:00<00:00,  5.45it/s]
Minishard Indices: 100%|██████████| 4/4 [00:00<00:00, 11.91it/s]
100%|██████████| 29/29 [00:01<00:00, 26.72it/s]
Decompressing: 100%|██████████| 63/63 [00:00<00:00, 4729.74it/s]
Organizing seg_ids by size: 100%|██████████| 206/206 [00:02<00:00, 69.19it/s]
Finding orphans: 100%|██████████| 206/206 [02:39<00:00,  1.29it/s]


Number of orphans: 182


Finding process type:  92%|█████████▏| 167/182 [27:57<02:50, 11.38s/it]   

In [27]:
total_size = orphans.values()
total_size = list(total_size)
total_size = np.array(total_size)

In [28]:
int_array = total_size[:,0].astype(int)

np.sum(int_array)

4923179

In [34]:
unique_seg_ids_subvolume = orphanclass.get_unique_seg_ids_em(bounds)
unique_seg_ids_subvolume

Shard Indices: 100%|██████████| 2/2 [00:00<00:00,  7.78it/s]
Minishard Indices: 100%|██████████| 4/4 [00:00<00:00, 29.92it/s]
100%|██████████| 29/29 [00:00<00:00, 50.94it/s]
Decompressing: 100%|██████████| 63/63 [00:00<00:00, 4369.57it/s]
Organizing seg_ids by size: 100%|██████████| 206/206 [00:03<00:00, 66.03it/s]


[(864691136909215598, [1034168]),
 (864691135522074866, [365051]),
 (864691136914365806, [309716]),
 (864691135615954025, [294199]),
 (864691135715218970, [239326]),
 (864691135521264882, [232567]),
 (864691135918483376, [224137]),
 (864691135520270066, [198482]),
 (864691135648168388, [185896]),
 (864691135478423238, [181396]),
 (864691135455309298, [177146]),
 (864691136266240274, [170963]),
 (864691135631953092, [170541]),
 (864691136905965934, [133636]),
 (864691135368930546, [129328]),
 (864691135804594461, [123565]),
 (864691136311846205, [122233]),
 (864691135792802845, [116463]),
 (864691135099878176, [115771]),
 (864691135546903236, [109626]),
 (864691135478343235, [107608]),
 (864691136989968789, [107278]),
 (864691135012657910, [106875]),
 (864691135617729423, [103729]),
 (864691135447541746, [93188]),
 (864691135407645650, [86773]),
 (864691135553310962, [81079]),
 (864691135754538962, [72690]),
 (864691135407650002, [71564]),
 (864691135793272349, [66218]),
 (8646911354076

In [53]:
total_seg_ids_size = np.array(unique_seg_ids_subvolume)
total_seg_ids_size = total_seg_ids_size[:,1]
sum = 0
for i in total_seg_ids_size:
    # print(i)
    sum+=i[0]

print()
print(sum)


7800291


  total_seg_ids_size = np.array(unique_seg_ids_subvolume)


In [63]:
seg_ids = [864691136909215598, 864691135521264882, 864691135368930546, 864691135918483376, 864691135804594461, 864691136914365806, 864691135395943378, 864691135478343235, 864691135648168388, 864691136000419720, 864691135449037042, 864691136181973462, 864691135582201586, 864691133035107425, 864691136913731182, 864691135407650258, 864691135968211902, 864691132647778343,
           864691135401901778, 864691135104027483, 864691133716301031, 864691132647775271, 864691132647776807, 864691135648134852, 864691135407637970, 864691133035106657, 864691135648947396, 864691133456842377, 864691132647776295, 864691133716301287, 864691133035107169, 864691132647777575, 864691135804092189, 864691135793272349, 864691133456842633, 864691132647777063]

for i in seg_ids:
    print(data_loader.get_num_soma(i))

2
