In [None]:
import os

import navis
import navis.interfaces.neuprint as neu

import seaborn as sns
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np


NC = neu.NeuronCriteria
client = neu.Client('https://neuprint.janelia.org',
                    token='eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJlbWFpbCI6ImphY2t3bGluZHNleUBnbWFpbC5jb20iLCJsZXZlbCI6Im5vYXV0aCIsImltYWdlLXVybCI6Imh0dHBzOi8vbGgzLmdvb2dsZXVzZXJjb250ZW50LmNvbS9hL0FBVFhBSnhPaGpOWFpQZFdOc1cwdVJpLWtkcDNkR0Vfa0R1Z2pHVlVuaWFKPXM5Ni1jP3N6PTUwP3N6PTUwIiwiZXhwIjoxODE5MzczNTU3fQ.eRZd9IomfWpk6TSvop_h7i79YIXKJcgwa4b09CNfk2Y',
                    dataset='hemibrain:v1.1')



In [None]:

all_meta1, roi1 = neu.fetch_neurons(NC(min_post=1))
all_meta2, roi2 = neu.fetch_neurons(NC(min_pre=1))

# Combine above dataframes
all_roi = pd.concat([roi1, roi2], axis=0).drop_duplicates(['bodyId', 'roi'])
meta = pd.concat([all_meta1, all_meta2], axis=0).drop_duplicates('bodyId')

all_roi.to_csv("all_roi.csv")
meta.to_csv("meta.csv")
print("Number of neurons: ", meta.shape[0])


In [None]:
export = '/Users/jacklindsey/MBEM_navis/edges'
has_type = ~meta['type'].isnull()
is_traced =  meta.statusLabel.isin(['Roughly traced', 'Traced'])
criteria = NC(bodyId=meta[has_type | is_traced].bodyId)

print("Number of traced neurons: ", meta[has_type | is_traced].shape[0])

In [None]:
# Uncomment to download connectivity data
#_, edges = neu.fetch_adjacencies(criteria, criteria, include_nonprimary=False, export_dir=export, batch_size=200)

# Uncomment to load connectivity data from file
edges = pd.read_csv(f'{os.path.expanduser(export)}/total-connections.csv')

In [None]:
print("Number of synapses: ", edges.shape[0])

In [None]:
#synaptic inputs per neuron
num_inputs = edges.groupby('bodyId_post').weight.sum().sort_values(ascending=False)


#normalized synaptic strengths -- fraction of input synapses
edges['weight_normalized'] = edges.weight / edges.bodyId_post.map(num_inputs)


In [None]:
print(np.unique(edges[['bodyId_pre', 'bodyId_post']].values.flatten()).shape[0], 'unique neurons')
print(edges.shape[0], 'connections')
print(edges.weight.sum(), 'synapses')

In [None]:
is_pam = meta.type.str.startswith('PAM', na='').astype(bool)
is_ppl = meta.type.str.startswith('PPL', na='').astype(bool)

dans = meta[is_pam | is_ppl]

is_mbon = meta.type.str.startswith('MBON', na='').astype(bool)
mbons = meta[is_mbon]

In [None]:
unique_neurons = np.unique(edges[['bodyId_pre', 'bodyId_post']].values.flatten())

In [None]:
#Uncomment to save out files needed for Matrix_alg code
'''
unique_neurons = np.unique(edges[['bodyId_pre', 'bodyId_post']].values.flatten())
N = len(unique_neurons)
W = np.zeros([N, N])
for e in range(len(edges)):
    #print(e, len(edges))
    n1 = np.where(unique_neurons==edges.bodyId_pre.values[e])[0][0]
    n2 = np.where(unique_neurons==edges.bodyId_post.values[e])[0][0]
    W[n1, n2] = edges.weight_normalized.values[e]
    
np.save("W.npy", W)
np.save("unique_neurons.npy", unique_neurons)
np.save("mbon_ids.npy", mbons.bodyId.values)
np.save("dan_ids.npy", dans.bodyId.values)
unique_types = []
for n in unique_neurons:
    unique_types.append((meta.type.values[np.where(meta.bodyId.values==n)[0]][0]))
np.save("unique_neurons_types.npy", unique_types)
'''

In [None]:

#Subsample graphs with specified number of nodes for scaling analyses
N_options = np.logspace(8, 12, 5, base=2).astype(int)

edge_subsets = []

for N in N_options:
    
    node_subset = list(np.random.choice(list(set(np.concatenate([edges['bodyId_pre'], edges['bodyId_post']], 0))), size=(N,), replace=False))
    node_subset.extend(mbons.bodyId)
    node_subset.extend(dans.bodyId)
    
    edge_mask = np.logical_and(np.isin(edges['bodyId_pre'].values, node_subset), np.isin(edges['bodyId_post'].values, node_subset))
    edge_subset = edges[edge_mask]
    num_inputs = edge_subset.groupby('bodyId_post').weight.sum().sort_values(ascending=False)


    edge_subset['weight_normalized'] = edge_subset.weight / edge_subset.bodyId_post.map(num_inputs)
    print(len(edge_subset))
    
    edge_subsets.append(edge_subset)
    

In [None]:
#Scaling analysis -- runtime of sampling algorithm as function of number of nodes in graph

%load_ext autoreload
%autoreload 2
import sampling_utils
import importlib
importlib.reload(sampling_utils)
import time

n_proc_options = [1, 2, 4, 8]

time_results = np.zeros([len(edge_subsets), len(n_proc_options)])
ii = -1
for edge_subset in edge_subsets:
    ii += 1
    jj = -1
    
    #vary number of processes in parallel implementation
    for n_proc in n_proc_options:
        jj += 1
        start_time = time.monotonic()

        model_edges_dan = edge_subset[['bodyId_pre', 'bodyId_post', 'weight_normalized']].rename({'bodyId_pre': 'target',
                                                                                   'bodyId_post': 'source'}, axis=1)


        paths_model_dan = sampling_utils.TraversalModel(edges=model_edges_dan,
                             seeds=dans.bodyId.values,
                             terminals=mbons.bodyId.values,
                             weights='weight_normalized',
                             max_steps=10)

        paths_model_dan.run_parallel(iterations=1000, n_cores=n_proc)
        end_time = time.monotonic()
        total_time = end_time - start_time
        print(ii, jj, total_time)
        time_results[ii, jj] = total_time

In [None]:
#Scaling analysis plotting code

for n in range(len(n_proc_options)):
    plt.plot(np.log2(time_results[:, n]))



plt.xticks(range(len(N_options)), N_options, fontsize=14)
plt.xlabel("Graph size\n(# nodes)", fontsize=14)
yticks = range(3, 8)
plt.yticks(yticks, np.power(2, yticks).astype(int), fontsize=14)
plt.ylabel("Runtime (s)", fontsize=14)
plt.legend(n_proc_options, title="# processes", title_fontsize=14)
plt.tight_layout()
plt.savefig("ScalingFig1.pdf")
plt.show()

for N_idx in range(len(N_options)):
    plt.plot(np.log2(time_results[N_idx, :]))

plt.xticks(range(len(n_proc_options)), n_proc_options, fontsize=14)
plt.xlabel("Number of processes", fontsize=14)
yticks = range(3, 8)
plt.yticks(yticks, np.power(2, yticks).astype(int), fontsize=14)
plt.ylabel("Runtime (s)", fontsize=14)
plt.legend(N_options, title="Graph size\n(# nodes)", title_fontsize=14)
plt.tight_layout()
plt.savefig("ScalingFig2.pdf")
plt.show()

In [None]:
#Run sampling algorithm on full graph

%load_ext autoreload
%autoreload 2
import sampling_utils
import importlib
importlib.reload(sampling_utils)


model_edges_dan = edges[['bodyId_pre', 'bodyId_post', 'weight_normalized']].rename({'bodyId_pre': 'target',
                                                                               'bodyId_post': 'source'}, axis=1)


paths_model_dan = sampling_utils.TraversalModel(edges=model_edges_dan,
                     seeds=dans.bodyId.values,
                     terminals=mbons.bodyId.values,
                     weights='weight_normalized',
                     max_steps=10)

# Run model with 1000 iterations using num processors = num cores
paths_model_dan.run_parallel(iterations=1000, n_cores=os.cpu_count())


In [None]:
import numpy as np

k_max = 10
lengths = np.array([len(res) for res in paths_model_dan.results])

fractional_contributions = [np.mean(lengths==k) for k in range(k_max+1)]
for k in range(2, k_max+1):
    plt.bar(k, fractional_contributions[k], color="tab:blue")
plt.xticks(range(2, k_max+1), np.arange(1, k_max))
plt.xlabel("Number of steps", fontsize=16)
plt.ylabel("Fractional contribution\nto interaction", fontsize=16)

plt.tight_layout()
plt.savefig("Sampling_alg_steps_contribution.pdf")
plt.show()

In [None]:
#Code for aggregating contribution of different cell types to paths of different lengths k between sources and targets

import re
type_count = {}
for k in range(11):
    type_count[k] = []
    
type_count_overall = []
for path in paths_model_dan.results:
    path_length = len(path)
    for node in path[1:-1]:
        typ = meta.type.values[np.where(meta.bodyId.values==node)[0]][0]
        if typ is None:
            continue
        coarse_typ = ""
        for t in typ:
            if t.isalpha() and t.isupper():
                coarse_typ = coarse_typ + t
        type_count[path_length].append(coarse_typ)
        
        type_count_overall.append(coarse_typ)

In [None]:
#Plot centrality analysis

from collections import Counter, OrderedDict

k = 3
counts = OrderedDict(Counter(type_count_overall).most_common())
df = pd.DataFrame.from_dict(counts, orient='index')
df[0] = df[0].values / np.sum(df[0].values)
fig = plt.figure(figsize=(6, 5))
df = df[:20]
df.plot(kind='bar', figsize=(10, 5), legend=False)

plt.ylabel("Centrality", fontsize=16)
plt.xlabel("Cell type", fontsize=16)
plt.tight_layout()
plt.savefig("Centrality_fig_sampling_alg.pdf")




plt.show()
