In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import os
import time
from scipy.spatial import distance


In [2]:
# given neuron node and its position coordinates of the terminal endpoints(branch points with strahler numebr 1)
# using threshold distance stored in the neuron attributes csv file, it separates the terminal endpoints
# into axon terminals and dendritic endpoints.

def get_endpoints(node, df_neuron_attr, fpath):
    df = pd.read_csv(fpath+"%s.swc"%str(node))
    
    soma_id = df_neuron_attr.loc[df_neuron_attr.id==node, "soma"].tolist()[0]
    
    if np.isnan( soma_id ):
        
        dendrite_xyz = np.zeros((1,3))#float("nan")
        axon_xyz = np.array( df.loc[:, ["x","y","z"]] )
        if len(axon_xyz)==0:
            axon_xyz=np.zeros((1,3))#float("nan")
        
        return(dendrite_xyz, axon_xyz)

    else:
        soma_id = int(soma_id)
        soma_index = df[df.node_id==soma_id].index[0]
        soma_xyz = np.array( df.loc[soma_index, ["x","y","z"]].tolist() )
        soma_xyz = np.array( df.loc[soma_index, ["x",'y',"z"]].tolist() )
        soma_xyz = np.array([ [i for i in soma_xyz] ])

        # remove soma from df
        df = df.drop(soma_index)
        # remove all SN>1 from df
        df = df[df.strah_num==1]

        neurites_xyz = np.array( df.loc[:, ["x","y","z"]] )
        thresh_val = df_neuron_attr[df_neuron_attr.id==node]['thresh_val'].tolist()[0]
        

        neurites_xyz = np.array( df.loc[:, ["x","y","z"]] )

        dist = distance.cdist(soma_xyz, neurites_xyz, 'euclidean')[0]
        #print(dist.shape, thresh_val)
        dendrite_xyz = neurites_xyz[dist<=thresh_val]
        axon_xyz = neurites_xyz[dist>thresh_val]
        
        if len(dendrite_xyz)==0:
            dendrite_xyz=np.zeros((1,3))#float("nan")
        if len(axon_xyz)==0:
            axon_xyz=np.zeros((1,3))#float("nan")
        return (dendrite_xyz, axon_xyz)

In [3]:
# consider there exist connection between axon terminal of neuron 1 and dendritic endpoint of neuron 2 
# only if the euclidean distance between these two is less than or equal to proximity range( say 5 um ).
def count_connections(axon_xyz1, dendrite_xyz2, synaptic_cleft):
    if np.all(axon_xyz1==0) or np.all(dendrite_xyz2==0):
        
        return "NA"
    else:
        dist = distance.cdist(axon_xyz1, dendrite_xyz2)
        #print(dist)
        #print()
        connections = np.count_nonzero(dist <= synaptic_cleft)
        if connections==0:
            return "NA"
        else:
            return connections

In [4]:
# parallelise using multiprocess to store edges in a text file.

import multiprocess
from functools import partial

def write_line_temp(i, pr, nodes, df_neuron_attr, path, new_swc_dir, store_dir):
    node1 = nodes[i]
    synaptic_cleft = pr*1000/8 # 1000/8 is a factor used to keep in um(micrometer) dimension. 
    j_list = list(range(i+1, len(nodes)))

    for j in j_list:
        #print(node1, node2)
        node2 = nodes[j]

        dendrite_xyz1, axon_xyz1 = get_endpoints(node1, df_neuron_attr, path+new_swc_dir)
        dendrite_xyz2, axon_xyz2 = get_endpoints(node2, df_neuron_attr, path+new_swc_dir)

        weight_12 = count_connections(axon_xyz1, dendrite_xyz2, synaptic_cleft)
        if weight_12!="NA":
            line = str(node1) + "\t" + str(node2) + "\t" + str(weight_12) + "\n"
            with open(store_dir+"network_%.1fum.txt"%pr, "a") as file:
                file.write(line)

        weight_21 = count_connections(axon_xyz2, dendrite_xyz1, synaptic_cleft)
        if weight_21!="NA":
            line = str(node2) + "\t" + str(node1) + "\t" + str(weight_21) + "\n"
            with open(store_dir+"network_%.1fum.txt"%pr, "a") as file:
                file.write(line)



In [5]:
directory = './module_8/'
# Check if the directory already exists
if not os.path.exists(directory):
    # Create the directory
    os.makedirs(directory)


In [6]:
%%time
# store the reconstructed subnetworks corresponding to communities from EM drosophilia
# which uses strahler numbering, threshold distance and proximity range.
# networks with 2 proximity sizes are stored i.e. 1um and 5um.

# corresponds to module 8
j=7
#for j in range(8):
print("Comm %d"%(j+1))
for pr in [0.2, 0.4, 0.6, 0.8, 1.0, 1.2]:
    print("prox range: %.1f um"%pr)
    start = time.time()
    
    nodes = pd.read_csv("./EM_communities/averageSubComm%d_drosophilia.csv"%j).Node.to_list()
    path = "./janelia_Comm%d/"%j
    new_swc_dir = "janelia dataset with strahler lastcolumn/"

    df_neuron_attr=pd.read_csv(path+"neu_attrJanelia_updated.csv")

    #node_wSoma = df_neuron_attr[~np.isnan( df_neuron_attr.soma )].id.tolist()

    i_list = list(range(len(nodes)))
    
    pool = multiprocess.Pool()
    write_line=partial(write_line_temp, pr = pr, nodes = nodes, \
                   df_neuron_attr=df_neuron_attr, path=path, \
                       new_swc_dir=new_swc_dir, store_dir=directory)

    pool.map(write_line, i_list)
    pool.close()
    pool.join()
   
    end = time.time()
    print("PR %.1fum, Total time: %.2f hrs\n"%(pr,(end-start)/3600) )
    print("\n")

Comm 8
prox range: 0.2 um
PR 0.2um, Total time: 0.23 hrs



prox range: 0.4 um
PR 0.4um, Total time: 0.24 hrs



prox range: 0.6 um
PR 0.6um, Total time: 0.28 hrs



prox range: 0.8 um
PR 0.8um, Total time: 0.31 hrs



prox range: 1.0 um
PR 1.0um, Total time: 0.32 hrs



prox range: 1.2 um
PR 1.2um, Total time: 0.32 hrs



CPU times: user 1.23 s, sys: 283 ms, total: 1.51 s
Wall time: 1h 42min 7s
