In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns
from sklearn.preprocessing import normalize

In [2]:
import fanc
client = fanc.get_caveclient()


In [3]:
client

<caveclient.frameworkclient.CAVEclientFull at 0x30a2ce1d0>

In [4]:
version_number=client.materialize.version
version_number

1036

In [5]:
#available version
#client.materialize.get_versions()

In [6]:
#confirm version_number
#version_number = 1034

In [7]:
#confirm which date version is used.
#date = '2024-08-15'

In [8]:
date=datetime.now().date()
date

datetime.date(2024, 8, 17)

In [9]:
def post_neurons_2(neuron_a, sc_limit):
    """
    Find downstream neurons of the neuron_a, which has more than 'sc_limit' synapses with neuron_a.

        Args: 
            neuron_a (int) : segment ID # of a neuron of interest.
            sc_limit (int) : minimum synaps count for cutting off.

        Return: a table of downstream neurons and synaps counts as DataFrame."""
            
    # Get synapse position table of downstream neurons of neuron_a. Each row represents one synaps position.
    post_df = client.materialize.synapse_query(pre_ids = neuron_a, materialization_version=version_number)
    if len(post_df) == 0:
        print('Check if ID# is current or No downstream neurons of ' + str(neuron_a))
    
    # Choose oly required columns.
    post_df2 = post_df[['id','pre_pt_root_id', 'post_pt_root_id']]

    # Data type of id#s will be changed from 'int' to 'str' to avoid any problem caused by transformation etc.
    post_df2[['id','pre_pt_root_id','post_pt_root_id' ]] = post_df2[['id','pre_pt_root_id','post_pt_root_id' ]].astype('str')

    # Transform # of rows to synapse count. Then add these numbers to a new column 'synaps_count' and sort them as descending.
    synaps_count = post_df2.groupby('post_pt_root_id').transform(len)['id']
    post_df2['synaps_count'] = synaps_count
    post_df3 = post_df2.sort_values('synaps_count', ascending=False)

    # Make one row represent one downstream neuron. Clean the table by dropping unnecessary columns. \
    # Cut off neurons which have syanpse count fewer than 'sc_limit'.
    post_df4 = post_df3.drop_duplicates(subset='post_pt_root_id', keep='first')
    post_df5 = post_df4.loc[post_df4['synaps_count'] >= sc_limit].drop (columns = 'id')
    
    return post_df5
    

In [10]:
def pre_neurons(neuron_a, sc_limit):
    """
    Find upstream neurons of the neuron_a, which has more than 'sc_limit' synapses with neuron_a.

        Args: 
            neuron_a (int) : segment ID # of a neuron of interest.
            sc_limit (int) : minimum synaps count for cutting off.

        Return: a table of upstream neurons and synaps counts as DataFrame."""
            
    # Get synapse position table of upstream neurons of neuron_a. Each row represents one synaps position.
    pre_df = client.materialize.synapse_query(post_ids = neuron_a, materialization_version=version_number)
    if len(pre_df) == 0:
        print('Check if ID# is current or No upstream neurons of ' + str(neuron_a))
    
    # Choose oly required columns.
    pre_df2 = pre_df[['id','pre_pt_root_id', 'post_pt_root_id']]

    # Data type of id#s will be changed from 'int' to 'str' to avoid any problem caused by transformation etc.
    pre_df2[['id','pre_pt_root_id','post_pt_root_id' ]] = pre_df2[['id','pre_pt_root_id','post_pt_root_id' ]].astype('str')

    # Transform # of rows to synapse count. Then add these numbers to a new column 'synaps_count' and sort them as descending.
    synaps_count = pre_df2.groupby('pre_pt_root_id').transform(len)['id']
    pre_df2['synaps_count'] = synaps_count
    pre_df3 = pre_df2.sort_values('synaps_count', ascending=False)

    # Make one row represent one downstream neuron. Clean the table by dropping unnecessary columns. \
    # Cut off neurons which have syanpse count fewer than 'sc_limit'.
    pre_df4 = pre_df3.drop_duplicates(subset='pre_pt_root_id', keep='first')
    pre_df5 = pre_df4.loc[pre_df4['synaps_count'] >= sc_limit].drop (columns = 'id')
    
    return pre_df5
    

In [11]:
# set up threashold of synapse count. Only more than this s.c connection is considered to be meaningful connection.
sc_limit = 10

In [12]:
neuron_1 =  648518346490605578
#IN_011_left

In [13]:
# Find downstream neurons of neuron_1 using function 'post_neurons_2'.
neuron_1_post_df = post_neurons_2(neuron_1,sc_limit )
neuron_1_post_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  post_df2[['id','pre_pt_root_id','post_pt_root_id' ]] = post_df2[['id','pre_pt_root_id','post_pt_root_id' ]].astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  post_df2['synaps_count'] = synaps_count


Unnamed: 0,pre_pt_root_id,post_pt_root_id,synaps_count
410,648518346490605578,648518346507449887,193
8606,648518346490605578,648518346517536554,120
11254,648518346490605578,648518346514740167,114
10354,648518346490605578,648518346495874250,105
564,648518346490605578,648518346475521848,94
...,...,...,...
93,648518346490605578,648518346489732460,10
14783,648518346490605578,648518346491712454,10
5891,648518346490605578,648518346502051434,10
10199,648518346490605578,648518346481310095,10


In [14]:
#Find downstream neurons of neuron_1 using function 'pre_neurons'.
neuron_1_pre_df= pre_neurons(neuron_1,sc_limit)
neuron_1_pre_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pre_df2[['id','pre_pt_root_id','post_pt_root_id' ]] = pre_df2[['id','pre_pt_root_id','post_pt_root_id' ]].astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pre_df2['synaps_count'] = synaps_count


Unnamed: 0,pre_pt_root_id,post_pt_root_id,synaps_count
1666,648518346493712270,648518346490605578,382
929,648518346512848030,648518346490605578,88
1723,648518346488457609,648518346490605578,70
2800,648518346490249143,648518346490605578,70
1257,648518346486928865,648518346490605578,68
...,...,...,...
2937,648518346504880243,648518346490605578,10
1678,648518346472231218,648518346490605578,10
2297,648518346496495445,648518346490605578,10
1836,648518346493383154,648518346490605578,10


In [15]:
# select downstream neurons of neuron_1 , which connects with synaps_cout >= 100.
neuron_1_post_sc100_df = neuron_1_post_df.loc[neuron_1_post_df['synaps_count'] >= 100]
neuron_1_post_sc100_df

Unnamed: 0,pre_pt_root_id,post_pt_root_id,synaps_count
410,648518346490605578,648518346507449887,193
8606,648518346490605578,648518346517536554,120
11254,648518346490605578,648518346514740167,114
10354,648518346490605578,648518346495874250,105


In [16]:
# convert pandas Series to "array" format to be able to use for looping.
post_segids = neuron_1_post_sc100_df['post_pt_root_id'].astype(np.int64).array
post_segids

<PandasArray>
[648518346507449887, 648518346517536554, 648518346514740167,
 648518346495874250]
Length: 4, dtype: int64

In [57]:
# find downstream neurons in above "array" ('648518346507449887, 648518346517536554, 648518346514740167 etc) automatically by "for loop".
#download downstream neurons dataframe automatically as "csv" in your "Downloads" folder as name 'Downstream Neurons of".....
for neuron in post_segids: 
    df = post_neurons_2(neuron, sc_limit)
    name = '~/Downloads/Downstream Neurons of'+ '_segid='+str(neuron)+'_sc>='+str(sc_limit)+'_v#='+ str(version_number)+'_'\
    + str(date)+'.csv'
    df.to_csv(name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  post_df2[['id','pre_pt_root_id','post_pt_root_id' ]] = post_df2[['id','pre_pt_root_id','post_pt_root_id' ]].astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  post_df2['synaps_count'] = synaps_count
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  post_df2[['id','pre_pt_root_id','post_pt_ro