In [1]:
"""
#building the dictionary to use for the random ordering
1) Iterate through all the categories
2) Iterate through all the boundaries
3) Get the index and segment_ids for all and store them

#the way to make the final list
1) Loop through each size category
2) Loop through each type category
3) Select a random index from size of list
4) Filter the list to neurons that have not been picked
5) If list is 0 then reset the list and filter the list again
6) Pick a random index from the list
    a. Add that segment to the list
    b. Add the neuron as already being picked
7) Repeat until the picked list is of a N length

8) Make sure at the end have something from every neuron by finding unique segments
"""



'\n#building the dictionary to use for the random ordering\n1) Iterate through all the categories\n2) Iterate through all the boundaries\n3) Get the index and segment_ids for all and store them\n\n#the way to make the final list\n1) Loop through each size category\n2) Loop through each type category\n3) Select a random index from size of list\n4) Filter the list to neurons that have not been picked\n5) If list is 0 then reset the list and filter the list again\n6) Pick a random index from the list\n    a. Add that segment to the list\n    b. Add the neuron as already being picked\n7) Repeat until the picked list is of a N length\n\n8) Make sure at the end have something from every neuron by finding unique segments\n'

In [2]:
import datajoint as dj
import numpy as np
import time
import seaborn as sns
import random
sns.set()

In [3]:
#setting the address and the username
dj.config['database.host'] = '10.28.0.34'
dj.config['database.user'] = 'celiib'
dj.config['database.password'] = 'newceliipass'
dj.config['safemode']=True
dj.config["display.limit"] = 20

schema = dj.schema("microns_pinky")
pinky = dj.create_virtual_module("pinky","microns_pinky")

Connecting celiib@10.28.0.34:3306


In [7]:
primary_key = dict(segmentation=3,decimation_ratio=0.35) 
all_IDs = (pinky.SpineValidationCompartmentFinalComponentFinal()  & primary_key).fetch("segment_id",order_by="process_order").tolist()  

In [4]:
ids,compartment,components = (pinky.SpineValidationCompartmentFinalComponentFinal & "decimation_ratio=0.35").fetch("segment_id","compartment_type","component_index",order_by="process_order")

In [4]:
total_compartments = ["Basal","Apical","Oblique"] 

compartment_size_boundaries = dict([(k,[]) for k in total_compartments])
#calculate the size groups for each of the compartments
for comp_type in compartment_size_boundaries.keys():
    search_key = dict(segmentation=3,decimation_ratio=0.35,compartment_type = comp_type)
    excitatory_n_vertices = (pinky.CompartmentFinal.ComponentFinal() & search_key).fetch("n_vertex_indices")
    excitatory_n_vertices_size_filtered = excitatory_n_vertices[excitatory_n_vertices>5000]

    #get the quartiles for that group
    q1 = np.percentile(excitatory_n_vertices_size_filtered, 25)
    q2 = np.percentile(excitatory_n_vertices_size_filtered, 50)
    q3 = np.percentile(excitatory_n_vertices_size_filtered, 75)
    iq_1 = q2 - q1
    iq_2 = q3 - q2
    iq_average = (iq_1 + iq_2)/2
    lower_bounds = q1 - iq_average
    upper_bounds = q3 + iq_average

    print(lower_bounds,q1,q2,q3,upper_bounds)

    #check that these numbers aren't too low or too high
    if lower_bounds < np.min(excitatory_n_vertices_size_filtered):
        print(f"lower bounds value of {lower_bounds} was too low so changing to the minimum value")
        lower_bounds = np.min(excitatory_n_vertices_size_filtered)

    if upper_bounds > np.max(excitatory_n_vertices_size_filtered):
        print(f"upper bounds value of {upper_bounds} was too low so changing to the minimum value")
        upper_bounds = np.max(excitatory_n_vertices_size_filtered)

    compartment_size_boundaries[comp_type] = [(lower_bounds,q1),
                                             (q1,q2),
                                             (q2,q3),
                                             (q3,upper_bounds)]
    
    
    #print("boundaries = " + str(boundaries))
    

-3611.625 28370.25 54778.5 92334.0 124315.875
lower bounds value of -3611.625 was too low so changing to the minimum value
-10802.75 31170.25 73118.0 115116.25 157089.25
lower bounds value of -10802.75 was too low so changing to the minimum value
1875.0 19128.75 32581.5 53636.25 70890.0
lower bounds value of 1875.0 was too low so changing to the minimum value


In [5]:
compartment_size_boundaries

{'Basal': [(5092, 28370.25),
  (28370.25, 54778.5),
  (54778.5, 92334.0),
  (92334.0, 124315.875)],
 'Apical': [(5078, 31170.25),
  (31170.25, 73118.0),
  (73118.0, 115116.25),
  (115116.25, 157089.25)],
 'Oblique': [(5031, 19128.75),
  (19128.75, 32581.5),
  (32581.5, 53636.25),
  (53636.25, 70890.0)]}

In [6]:
#go through and assemble the dictionary pull down the lists
total_sizes = ["group1","group2","group3","group4"]
compartment_size_lists = dict([(k,dict([(j,[]) for j in total_sizes]))
                              for k in total_compartments])

#interate through and build lists:
for y,size_group in enumerate(total_sizes):
    for comp_type in compartment_size_lists.keys():
    
        print(f"Working on comp {comp_type} {size_group}")
        #get the boundaries
        boundaries = compartment_size_boundaries[comp_type][y]
        print(f"Boundaries = {boundaries}")
        
        boundary_restrictions = "n_vertex_indices >= " + str(boundaries[0]) + " AND n_vertex_indices <= " + str(boundaries[1])
        print("boundary_restrictions = " + str(boundary_restrictions))

        search_key = dict(segmentation=3,decimation_ratio=0.35,compartment_type = comp_type)
        excitatory_n_vertices_boundary = pinky.CompartmentFinal.ComponentFinal() & search_key & boundary_restrictions#).fetch("n_vertex_indices")
        n_vertex_indices_with_boundaries = excitatory_n_vertices_boundary.fetch("n_vertex_indices")
        print("Min and Max = " + str((np.min(n_vertex_indices_with_boundaries),np.max(n_vertex_indices_with_boundaries))))

        """
        At this point have all the components within a size range of that specific compartment type but do not have them grouped by neurons
        """
        seg_ids,comp_index = excitatory_n_vertices_boundary.fetch("segment_id","component_index")
        total_pairs = np.stack([comp_index,seg_ids],axis=1)
        print("Total Number of pairs = " + str(total_pairs.shape[0]))
        compartment_size_lists[comp_type][size_group] = total_pairs



Working on comp Basal group1
Boundaries = (5092, 28370.25)
boundary_restrictions = n_vertex_indices >= 5092 AND n_vertex_indices <= 28370.25
Min and Max = (5092, 28329)
Total Number of pairs = 316
Working on comp Apical group1
Boundaries = (5078, 31170.25)
boundary_restrictions = n_vertex_indices >= 5078 AND n_vertex_indices <= 31170.25
Min and Max = (5078, 31015)
Total Number of pairs = 65
Working on comp Oblique group1
Boundaries = (5031, 19128.75)
boundary_restrictions = n_vertex_indices >= 5031 AND n_vertex_indices <= 19128.75
Min and Max = (5031, 19104)
Total Number of pairs = 64
Working on comp Basal group2
Boundaries = (28370.25, 54778.5)
boundary_restrictions = n_vertex_indices >= 28370.25 AND n_vertex_indices <= 54778.5
Min and Max = (28384, 54663)
Total Number of pairs = 316
Working on comp Apical group2
Boundaries = (31170.25, 73118.0)
boundary_restrictions = n_vertex_indices >= 31170.25 AND n_vertex_indices <= 73118.0
Min and Max = (31636, 72245)
Total Number of pairs = 64


In [7]:
pinky.CompartmentFinal.ComponentFinal()

segmentation  segmentation id,segment_id  segment id unique within each Segmentation,decimation_ratio,"compartment_type  Basal, Apical, spine head, etc.",component_index  Which sub-compartment of a certain label this is.,n_vertex_indices,n_triangle_indices,vertex_indices  preserved indices of each vertex of this sub-compartment,triangle_indices  preserved indices of each triangle of this sub-compartment
3,648518346341371119,0.35,Apical,0,115133,230349,=BLOB=,=BLOB=
3,648518346341371119,0.35,Axon-Soma,0,1567,3108,=BLOB=,=BLOB=
3,648518346341371119,0.35,Basal,0,4231,8416,=BLOB=,=BLOB=
3,648518346341371119,0.35,Basal,1,110,208,=BLOB=,=BLOB=
3,648518346341371119,0.35,Basal,2,22310,44613,=BLOB=,=BLOB=
3,648518346341371119,0.35,Basal,3,1021,1988,=BLOB=,=BLOB=
3,648518346341371119,0.35,Basal,4,40075,80121,=BLOB=,=BLOB=
3,648518346341371119,0.35,Basal,5,514,954,=BLOB=,=BLOB=
3,648518346341371119,0.35,Cilia,0,111,216,=BLOB=,=BLOB=
3,648518346341371119,0.35,Error,0,716,1423,=BLOB=,=BLOB=


In [8]:
import copy 

In [9]:
compartment_size_lists_saved = copy.deepcopy(compartment_size_lists)

In [110]:
compartment_size_lists = copy.deepcopy(compartment_size_lists_saved)

In [112]:
segmentation = 3
decimation_ratio = 0.35

already_used = []
total_list = []
length_threshold = 400
total_compartments = ["Basal","Apical","Oblique"] 
total_sizes = ["group1","group2","group3","group4"]
counter = 0
while(len(total_list) < length_threshold):
    for size in total_sizes:
        for comp_type in total_compartments:
            repeat = True
            while repeat == True:
                
                #print(f"Working on {comp_type} {size}")
                #get the current list of neurons
                current_array = compartment_size_lists[comp_type][size][:,1]


                indices = np.where(np.invert(np.in1d(current_array,np.array(already_used))))[0]
    #             #print("indices = " + str(indices))
    #             #print("len(indices) = " + str(len(indices)))
    #             current_filtered_array = current_array[indices]

    #             #print(current_filtered_array)
    #             #check if any in the already_used:
    #             current_filtered_array_list = list(current_filtered_array)
    #             #print("current_filtered_array_list = " + str(current_filtered_array_list))
    #             #print("already_used = " + str(already_used))
    #             print("intersection = " + str(set(current_filtered_array_list).intersection(set(already_used))))
    #             for i in current_filtered_array_list:
    #                 if i in already_used:
    #                     raise Exception("Not allowed Neuron")



                if len(indices) <= 0:
                    print("Resetting the already_used list when it's current length is " + str(len(already_used)))
                    already_used = []
                    indices = np.where(np.invert(np.in1d(current_array,np.array(already_used))))[0]

                #get the random int 
                random_index = random.randint(0,len(indices) - 1)
                neuron_index = indices[random_index]

                #pick the neuron
                picked_neuron = compartment_size_lists[comp_type][size][neuron_index]

                final_dict = dict(process_order = counter,
                                      segmentation=segmentation,
                                      segment_id=picked_neuron[1],
                                      decimation_ratio=decimation_ratio,
                                      compartment_type=comp_type,
                                      component_index=picked_neuron[0])
                
                #check that this hasn't already been added to the list
                repeat = False
                for t in total_list:
                    if t == final_dict:
                        print("Found a repeat")
                        repeat = True
                        break

            
            total_list.append(final_dict)
            counter += 1
            #delete the neruon from the list
            compartment_size_lists[comp_type][size] = np.delete(compartment_size_lists[comp_type][size],random_index,axis=0)

            #add the segment_id to those already visited
            already_used.append(picked_neuron[1])
            
            #print("Length of already_used= " + str(len(np.unique(already_used))))
        

Resetting the already_used list when it's current length is 166
Resetting the already_used list when it's current length is 132
Resetting the already_used list when it's current length is 108


# HAD TO MANUALLY STOP THE ONE ABOVE

In [113]:
len(total_list)

408

In [None]:
"""
segmentation         : tinyint unsigned             # segmentation id
    segment_id           : bigint unsigned              # segment id unique within each Segmentation
    decimation_ratio     : decimal(3,2) 
    compartment_type     : varchar(16)                  # Basal, Apical, spine head, etc.
    component_index      : smallint unsigned            # Which sub-compartment of a certain label this is.
"""

In [122]:
SpineValidationCompartmentFinalComponentFinal.drop()

`microns_pinky`.`spine_validation_compartment_final_component_final` (408 tuples)


Proceed? [yes, No]:  yes


Tables dropped.  Restart kernel.


In [123]:
# create a new datajoint table to store the values
schema = dj.schema("microns_pinky")

@schema
class SpineValidationCompartmentFinalComponentFinal(dj.Manual):
    definition="""
    process_order        : bigint unsigned              # segment id unique within each Segmentation
    -> pinky.CompartmentFinal.ComponentFinal
    ---
    
    """

In [121]:
help(SpineValidationCompartmentFinalComponentFinal.insert)

Help on method insert in module datajoint.table:

insert(rows, replace=False, skip_duplicates=False, ignore_extra_fields=False, ignore_errors=False, allow_direct_insert=None) method of __main__.SpineValidationCompartmentFinalComponentFinal instance
    Insert a collection of rows.
    
    :param rows: An iterable where an element is a numpy record, a dict-like object, or an ordered sequence.
        rows may also be another relation with the same heading.
    :param replace: If True, replaces the existing tuple.
    :param skip_duplicates: If True, silently skip duplicate inserts.
    :param ignore_extra_fields: If False, fields that are not in the heading raise error.
    :param allow_direct_insert: applies only in auto-populated tables. Set True to insert outside populate calls.
    
    Example::
    >>> relation.insert([
    >>>     dict(subject_id=7, species="mouse", date_of_birth="2014-09-01"),
    >>>     dict(subject_id=8, species="mouse", date_of_birth="2014-09-02")])



In [119]:
SpineValidationCompartmentFinalComponentFinal.insert(total_list)

In [None]:
#might have to save this file as a list

In [129]:
spine_validation_table = (SpineValidationCompartmentFinalComponentFinal() * pinky.CompartmentFinal.ComponentFinal).proj("n_vertex_indices","n_triangle_indices")




In [130]:
whole_data = spine_validation_table.fetch(order_by="process_order")

In [131]:
#demonstrates that have effectively created an ordered list 
import pandas as pd
df = pd.DataFrame(whole_data)
df

Unnamed: 0,process_order,segmentation,segment_id,decimation_ratio,compartment_type,component_index,n_vertex_indices,n_triangle_indices
0,0,3,648518346349472574,0.35,Basal,3,19599,39187
1,1,3,648518346349509855,0.35,Apical,0,25997,51914
2,2,3,648518346349503766,0.35,Oblique,0,14236,28502
3,3,3,648518346349511321,0.35,Basal,2,52016,104025
4,4,3,648518346349479478,0.35,Apical,0,34679,69321
5,5,3,648518346349511983,0.35,Oblique,0,26003,52016
6,6,3,648518346349496103,0.35,Basal,4,61838,123779
7,7,3,648518346349497094,0.35,Apical,0,90660,181379
8,8,3,648518346349509410,0.35,Oblique,1,50036,100123
9,9,3,648518346349504065,0.35,Basal,0,101674,203443
