In [None]:
from caveclient import CAVEclient
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
client = CAVEclient(global_only=True)

In [None]:
# https://annotationframeworkclient.readthedocs.io/en/stable/guide/authentication.html#new-token
client = CAVEclient('minnie65_phase3_v1')
client.info.get_datastack_info()

In [None]:
#client.auth.get_new_token()

In [None]:
# "the token is saved to ~/.cloudvolume/secrets/chunkedgraph-secret.json as a string under the key token"
# pls don't regenerate
#client.auth.save_token(token="cbf0c535f12057f40e895d1d04dcf79e",overwrite=True)

In [None]:
client.materialize.get_tables()

In [None]:
client.materialize.get_table_metadata('allen_soma_coarse_cell_class_model_v1')

In [None]:
client.materialize.get_table_metadata('nucleus_neuron_svm')

    Summary:
    
    - allen_v1_column_types_slanted have been manually examined for cell type

    - synapses_pni_2 is a table of the locations of a target cell's synapse according to its input cell

    - allen_soma_coarse_cell_class_model_v1 and v2 is 99% pre-proofread and has the location of all cell bodies in the dataset - difference is that v1 has specific cell types, while v2 is just excitatory/inhibitory

    - nucleus_neuron_svm is a table where each row represents a soma

    positions can be copied n pasted into neuroglancer!
    
In order to access these tables, query_table will download the whole thing, unless any of the 3 filters are applied:

- filter_equal_dict : returns a row only if it contains specified value
- filter_in_dict : returns a row only if its value is within allowed constraint
- filter_not_in_dict : returns a row only if its value is NOT within allowed constraint

further, you can use select_columns = ['id','pre_pt_root_id', 'pre_pt_position'] if you only want those columns, and split_columns=True if you want position to be split into x, y, and z columns (and recombined with concatenate_position_columns())

In [None]:
#materialization engine - v120 most recent - 1 exact timestamp
df = client.materialize.query_table('allen_v1_column_types_slanted')
#client.materialize.query_table? #- a fun way to get info about panda command + args 
df[0:3]

In [None]:
np.unique(df.classification_system)

### Cell type key:

    Number refers to layer, P refers to pyramidal (broadly), IT = intertelencephalon

output cells =
- NP = near-projecting (spooky) (spider) (im scared)
- PT = pyramidal tract
- CT = cortico-thalamic, projects to the thalamus (sensory->thalamus->cortex->loops)
- BC = basket-cell (primarily target the cell body of excitatory neurons), strategically positioned to control spiking and strongly interconnected
- BPC = bipolar cell (VIP = molecular term, dendrites goin up n down) (controls SST) (only some target E), 15% of all interneurons, gain control
- MC = Martinotti (SST = molec term) (primary source of inhib.) targetting distill dendrites & apocal dend., powerful boys, project to layer 1 mediate di-synaptic inhibition between neighboring pyramidal cells
- NGC = neurogliaform cell (instead of pre/post-synaptic, they do volume transmission) spewing GABA, no specific target, (meh categorization)

In [None]:
np.unique(df.cell_type)

## Working with a Starter Neuron

    We'll be using a basket cell, as they are well-proofread with a large axon.

In [None]:
root_id = [864691135428608048]
boy = df.query("pt_root_id in @root_id")
boy

    Now to look for all cells where the pre-synaptic partner has the same root_id as our start cell...

In [None]:
syn = client.materialize.query_table('synapses_pni_2',filter_equal_dict={'pre_pt_root_id':root_id[0]})
syn[0:3]

#### synapses table key:

- pre_pt_root_id = pre-synaptic (input cell) we selected for
- post_pt_root_id = post-synaptic cell that the queried cell connects to
- size = volume of synapse in units of supervoxels (most of the time is proportional to strength/weight)

#### synapse has 3 pts associated: 
- 1 just inside the pre-syn side of synapse (on dendrites)
- 1 is just inside the post-syn
- 1 is center point

In [None]:
unique_syn = np.unique(syn.post_pt_root_id)
print("There are {0:g} synaptic connections from {1:g} cells.".format(len(syn),len(unique_syn)))

    Now we're going to query the nuclei table, so that we have access to the position of the somas whose dendrites have the post-synaptic connections we just found.

In [None]:
# edge-list network representation
# for each root_id, how many synapses are there?
#edgelist = syn_df.groupby([‘pre_pt_root_id’, ‘post_pt_root_id’]).count()[‘valid’]).reset_index()
#edgelist = syn_df.groupby([‘pre_pt_root_id’, ‘post_pt_root_id’]).count()[‘valid’]).reset_index().rename(columns={‘valid’: ‘num_syn’})

In [None]:
nuclei_unmasked = client.materialize.query_table('nucleus_neuron_svm')#,filter_in_dict = {'pt_root_id':unique_syn})
# new df of just neurons (no glial cells)
nuclei = nuclei_unmasked.query('cell_type == "neuron"').reset_index(drop=True)
# new column saying how many neurons have the same root_id
nuclei['num_soma'] = nuclei.groupby('pt_root_id').transform('count')['valid']
# mask the df to throw out merged nuclei (same root_id being assigned to multiple neurons)
mask_nuclei = nuclei['num_soma'] < 2
nuclei_full = nuclei[mask_nuclei].reset_index(drop=True)

In [None]:
unique_nuc = np.unique(nuclei_full.pt_root_id)
soma_full = client.materialize.query_table('allen_soma_coarse_cell_class_model_v1',filter_in_dict = {'pt_root_id':unique_nuc})
unique_soma = np.unique(soma_full.pt_root_id)

In [None]:
print(len(nuclei_full))
print(len(soma_full))

In [None]:
syn_nuc = syn.query("post_pt_root_id in @unique_nuc").reset_index(drop=True)
syn_soma = syn.query("post_pt_root_id in @unique_soma").reset_index(drop=True)

In [None]:
print(len(syn_nuc))
print(len(np.unique(syn_nuc.post_pt_root_id)))
print(len(syn_soma))
print(len(np.unique(syn_soma.post_pt_root_id)))

In [None]:
unique_syn_nuc = np.unique(syn_nuc.post_pt_root_id)
unique_syn_soma = np.unique(syn_soma.post_pt_root_id)

In [None]:
# don't do this - query the large table for all valid nuclei, then say whether its T/F if connected to boy
# need to query another table that actually has cell types
post_nuclei = client.materialize.query_table('allen_soma_coarse_cell_class_model_v1',filter_in_dict = {'pt_root_id':unique_syn_nuc})
post_somas = client.materialize.query_table('allen_soma_coarse_cell_class_model_v1',filter_in_dict = {'pt_root_id':unique_syn_soma})
# not many will be in here bc of proofreading stuff
post_slant = client.materialize.query_table('allen_v1_column_types_slanted',filter_in_dict = {'pt_root_id':unique_syn_soma})

In [None]:
def class_spitter(df):
    classes = np.unique(df.classification_system)
    cellarray = []
    for i in range(len(classes)):
        new = df.query(f"classification_system in @classes[{i}]")
        cellarray.append(new)
    return cellarray

def type_spitter(df):
    types = np.unique(df.cell_type)
    cellarray = []
    for i in range(len(types)):
        new = df.query(f"cell_type in @types[{i}]")
        cellarray.append(new)
    return cellarray

In [None]:
post_slant_class = class_spitter(post_slant)
post_som_class = class_spitter(post_somas)
post_nuc_class = class_spitter(post_nuclei)

post_slant_type = type_spitter(post_slant)
post_som_type = type_spitter(post_somas)
post_nuc_type = type_spitter(post_nuclei)

    "Distance in the y dimension (depth) is quite different from x and z (radial directions). For example, cell types often live specifically at a particular depth, but are scattered widely across the x and z dimensions."

In [None]:
def Eucdistance(pre,post):
    pre_pt = np.array(pre.pt_position) * 4
    x_pre,y_pre,z_pre = pre_pt[0][0],pre_pt[0][1],pre_pt[0][2]*10

    post_pt = post.pt_position.reset_index(drop=True)
    x_pos,y_pos,z_pos = np.zeros(len(post_pt)),np.zeros(len(post_pt)),np.zeros(len(post_pt))
    for i in range(len(post_pt)):
        x_pos[i] = post_pt[i][0] * 4
        y_pos[i] = post_pt[i][1] * 4
        z_pos[i] = post_pt[i][2] * 40
    
    d = np.zeros(len(post_pt))
    for i in range(len(post_pt)):
        d[i] = np.sqrt((x_pre-x_pos[i])**2 + (y_pre-y_pos[i])**2 + (z_pre-z_pos[i])**2)
    
    return d

def Raddistance(pre,post):
    pre_pt = np.array(pre.pt_position) * 4
    x_pre,z_pre = pre_pt[0][0],pre_pt[0][2]*10

    post_pt = post.pt_position.reset_index(drop=True)
    x_pos,z_pos = np.zeros(len(post_pt)),np.zeros(len(post_pt))
    for i in range(len(post_pt)):
        x_pos[i] = post_pt[i][0] * 4
        z_pos[i] = post_pt[i][2] * 40
    
    d = np.zeros(len(post_pt))
    for i in range(len(post_pt)):
        d[i] = np.sqrt((x_pre-x_pos[i])**2 + (z_pre-z_pos[i])**2)
    
    return d

def distance(pre,post):
    d,r = [],[]
    for i in range(len(post)):
        # divide by 1,000 to convert nm to microns
        d_oneclass = Eucdistance(pre,post[i])/1000.
        d.append(d_oneclass)
        r_oneclass = Raddistance(pre,post[i])/1000.
        r.append(r_oneclass)
    return d,r

In [None]:
slant_d_class,slant_r_class = distance(boy,post_slant_class)
som_d_class,som_r_class = distance(boy,post_som_class)
nuc_d_class,nuc_r_class = distance(boy,post_nuc_class)

slant_d_type,slant_r_type = distance(boy,post_slant_type)
som_d_type,som_r_type = distance(boy,post_som_type)
nuc_d_type,nuc_r_type = distance(boy,post_nuc_type)

In [None]:
total = len(slant_d_class[0])+len(slant_d_class[1])#+len(slant_d_class[2])
print("{0:.2%} of proofread post-synaptic partners are excitatory,".format(len(slant_d_class[0])/total))
print("{0:.2%} are inhibitory.\n".format(len(slant_d_class[1])/total))
#print("{0:.2%} are unknown.\n".format(len(slant_d_class[2])/total))

total_som = len(som_d_class[0])+len(som_d_class[1])
print("{0:.2%} of un-proofread post-synaptic partners are excitatory,".format(len(som_d_class[0])/total_som))
print("{0:.2%} are inhibitory. (from soma table)\n".format(len(som_d_class[1])/total_som))

total_nuc = len(nuc_d_class[0])+len(nuc_d_class[1])
print("{0:.2%} of un-proofread post-synaptic partners are excitatory,".format(len(nuc_d_class[0])/total_nuc))
print("{0:.2%} are inhibitory. (from nucleus table)".format(len(nuc_d_class[1])/total_nuc))

In [None]:
fig, ax = plt.subplots(2,1)
fig.set_size_inches(18,10)

ax[0].hist(slant_d_class[0], 200, density=False, label='Excitatory', alpha=.7)
ax[0].hist(slant_d_class[1], 100, density=False, label='Inhibitory', alpha=.7)
ax[0].set_title("Euclidean Distance to Post-Synaptic Partners (Proofread)", fontsize=16)
ax[0].set_xlabel(r'$\mu$m', fontsize=12)
ax[0].legend()
ax[0].grid()

ax[1].hist(slant_r_class[0], 40, density=False, label='Excitatory', alpha=.7)
ax[1].hist(slant_r_class[1], 20, density=False, label='Inhibitory', alpha=.7)
ax[1].set_title("Radial Distance to Post-Synaptic Partners (Proofread)", fontsize=16)
ax[1].set_xlabel(r'$\mu$m', fontsize=12)
ax[1].legend()
ax[1].grid()

#ax[1].set_ylabel("Log Frequency", fontsize=16)
#ax[1].set_yscale('log')

ax[0].set_xlim(-10,600)
ax[1].set_xlim(-10,600)
plt.show()

In [None]:
fig, ax = plt.subplots(2,1)
fig.set_size_inches(18,10)

ax[0].hist(nuc_d_class[0], 100, label='Euclidean Distance', density=True, alpha=0.6)
ax[0].hist(nuc_r_class[0], 80, label='Radial Distance', density=True, alpha=0.6)
ax[0].set_title("Distance to Excitatory Post-Synaptic Partners (Un-Proofread)", fontsize=20)
ax[0].set_xlabel(r'$\mu$m', fontsize=14)
ax[0].legend()
ax[0].grid()

ax[1].hist(nuc_d_class[1], 80, label='Euchlidean Distance', density=True, alpha=0.6)
ax[1].hist(nuc_r_class[1], 40, label='Radial Distance', density=True, alpha=0.6)
ax[1].set_title("Distance to Inhibitory Post-Synaptic Partners (Un-Proofread)", fontsize=20)
ax[1].set_xlabel(r'$\mu$m', fontsize=14)
ax[1].legend()
ax[1].grid()


#ax.set_ylabel("Log Frequency", fontsize=16)
#plt.yscale('log')

ax[0].set_xlim(-10,600)
ax[1].set_xlim(-10,600)
plt.show()

In [None]:
fig, ax = plt.subplots(5,2)
fig.set_size_inches(23,18)

nuc_types = np.unique(post_nuclei.cell_type)
for i in range(10):
    if i == 0:
        bins = int(np.ceil(len(nuc_d_type[i])/30))
        ax[i,0].hist(nuc_d_type[i],bins=bins,density=True,label='Euclidean Distance',alpha=0.6);
        ax[i,0].hist(nuc_r_type[i],bins=bins,density=True,label='Radial Distance',alpha=0.6);
        ax[i,0].set_title(nuc_types[i],fontsize=18)
        ax[i,0].grid()
        ax[i,0].legend()
        ax[i,0].set_xlim(-10,600)
    if 1 <= i < 5:
        bins = int(np.ceil(len(nuc_d_type[i])/5))
        ax[i,0].hist(nuc_d_type[i],bins=bins,density=True,label='Euclidean Distance',alpha=0.6);
        ax[i,0].hist(nuc_r_type[i],bins=bins,density=True,label='Radial Distance',alpha=0.6);
        ax[i,0].set_title(nuc_types[i],fontsize=18)
        ax[i,0].grid()
        ax[i,0].legend()
        ax[i,0].set_xlim(-10,600)
    if 5 <= i < 10:
        bins = int(np.ceil(len(nuc_d_type[i])/5))
        ax[i-5,1].hist(nuc_d_type[i],bins=bins,density=True,label='Euchlidean Distance',alpha=0.6);
        ax[i-5,1].hist(nuc_r_type[i],bins=bins,density=True,label='Radial Distance',alpha=0.6);
        ax[i-5,1].set_title(nuc_types[i],fontsize=18)
        ax[i-5,1].grid()
        ax[i-5,1].legend()
        ax[i-5,1].set_xlim(-10,600)

fig.tight_layout()
plt.show()