In [None]:
"""
Purpose: To look at the initial stats of the 
graph from autoproofreading


"""

In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
from os import sys
sys.path.append("/meshAfterParty/meshAfterParty")
sys.path.append("/meshAfterParty/meshAfterParty/graph/")

In [6]:
import datajoint_utils as du
minnie,schema = du.configure_minnie_vm()

INFO - 2021-02-26 04:19:29,433 - settings - Setting database.host to at-database.ad.bcm.edu
INFO - 2021-02-26 04:19:29,435 - settings - Setting database.user to celiib
INFO - 2021-02-26 04:19:29,436 - settings - Setting database.password to newceliipass
INFO - 2021-02-26 04:19:29,441 - settings - Setting stores to {'minnie65': {'protocol': 'file', 'location': '/mnt/dj-stor01/platinum/minnie65', 'stage': '/mnt/dj-stor01/platinum/minnie65'}, 'meshes': {'protocol': 'file', 'location': '/mnt/dj-stor01/platinum/minnie65/02/meshes', 'stage': '/mnt/dj-stor01/platinum/minnie65/02/meshes'}, 'decimated_meshes': {'protocol': 'file', 'location': '/mnt/dj-stor01/platinum/minnie65/02/decimated_meshes', 'stage': '/mnt/dj-stor01/platinum/minnie65/02/decimated_meshes'}, 'skeletons': {'protocol': 'file', 'location': '/mnt/dj-stor01/platinum/minnie65/02/skeletons'}}
INFO - 2021-02-26 04:19:29,442 - settings - Setting enable_python_native_blobs to True
INFO - 2021-02-26 04:19:29,460 - connection - Connect

Connecting celiib@at-database.ad.bcm.edu:3306


INFO - 2021-02-26 04:19:29,886 - settings - Setting enable_python_native_blobs to True
INFO - 2021-02-26 04:19:29,920 - settings - Setting enable_python_native_blobs to True
INFO - 2021-02-26 04:19:30,365 - settings - Setting enable_python_native_blobs to True


In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import networkx_utils as xu

# Getting the Statistics on the Initial Graph

In [8]:
orig_presyn,orig_postsyn = (du.m65mat.NucleusSynapse() & "presyn != postsyn").fetch("presyn","postsyn")

In [9]:
original_edges = np.vstack([orig_presyn,orig_postsyn]).T
original_edges

array([[864691136056368856, 864691135462260637],
       [864691135593498923, 864691136723541757],
       [864691136951642335, 864691136816114148],
       ...,
       [864691135341031877, 864691136784133998],
       [864691136286708675, 864691135065040964],
       [864691135497618195, 864691135065040964]])

In [10]:
orig_G = nx.MultiDiGraph()
orig_G.add_edges_from(original_edges)
print(len(orig_G.nodes()),len(orig_G.edges()))

G_undirected_orig = nx.Graph(orig_G)
print(len(G_undirected_orig.nodes()),len(G_undirected_orig.edges()))

91565 17133898
91565 11482430


In [None]:
DiG_undirected_orig = nx.DiGraph(orig_G)
print(len(DiG_undirected_orig.nodes()),len(DiG_undirected_orig.edges()))

In [None]:
degree_distribution = np.array(xu.get_node_degree(orig_G,list(orig_G.nodes())))
degree_distribution_filtered = degree_distribution[degree_distribution<np.percentile(degree_distribution,99.5)]

In [None]:
np.mean(degree_distribution_filtered),np.median(degree_distribution_filtered)

In [None]:
#For connected components cannot be a directed graph


# The Sizes of the components in the graph
conn_comp = list(nx.connected_components(G_undirected_orig))
conn_comp_size = [len(k) for k in conn_comp]
print(f"conn_comp_size = {conn_comp_size}")

# Pull Down and Calculating the Initial Graph

In [None]:
synapse_data = minnie.SynapseProofread.fetch(as_dict=True)
synapse_data_df = pd.DataFrame.from_dict(synapse_data)
synapse_data_df

In [None]:
uniq_synapse_ids,unique_synapse_indexes,inverse_indices,synapse_id_counts = np.unique(synapse_data_df["synapse_id"].to_numpy(),
                           return_index=True,
                           return_inverse=True,
                            return_counts=True)

In [None]:
direct_connection_synapse_idx = np.where(synapse_id_counts==2)[0]
direct_connection_synapse_mask = np.zeros(len(synapse_id_counts))
direct_connection_synapse_mask[direct_connection_synapse_idx] = 1
direct_connection_synapse_mask = direct_connection_synapse_mask.astype("bool")
row_filter = direct_connection_synapse_mask[inverse_indices]

direct_connections_df = synapse_data_df.iloc[row_filter]
direct_connections_df

In [None]:
# Combining the table's presyns and postsyns onto the same row

df_tables = []
for synapse_type in ["presyn","postsyn"]:
    direct_connections_df_init = direct_connections_df[direct_connections_df["synapse_type"]==synapse_type]
    
    direct_connections_df_init_sorted = direct_connections_df_init.sort_values(by=['synapse_id']).reset_index(drop=True)
    
    direct_connections_df_presyn = direct_connections_df_init_sorted[[k for k in direct_connections_df_init_sorted.columns if k != "synapse_type"] ]
    
    #column_mapper = dict([(k,f"{k}_{synapse_type}") if k != "synapse_id"  else (k,k) for k in direct_connections_df_presyn.columns])
    column_mapper = dict([(k,f"{synapse_type}_{k}") for k in direct_connections_df_presyn.columns])

    presyn_df = direct_connections_df_presyn.rename(columns=column_mapper)
    df_tables.append(presyn_df)
    
direct_conn_df = pd.concat(df_tables,axis=1)
direct_conn_df

In [None]:
edges_df = direct_conn_df[["presyn_nucleus_id","postsyn_nucleus_id"]]
connectome_edges = edges_df.to_numpy()
connectome_edges

# Preliminary Look at the Graph

In [None]:
import networkx as nx

In [None]:
G = nx.MultiDiGraph()
G.add_edges_from(connectome_edges)
len(G.nodes()),len(G.edges())

In [None]:
G_undirected_non_multi = nx.Graph(G)
len(G_undirected_non_multi.nodes()),len(G_undirected_non_multi.edges())

In [None]:
G_directed_non_multi = nx.DiGraph(G)
len(G_directed_non_multi.nodes()),len(G_directed_non_multi.edges())

In [None]:
#For connected components cannot be a directed graph
G_undirected = nx.Graph(G)

n_components = nx.number_connected_components(G_undirected)
print(f"Number of connected components = {n_components}")

# The Sizes of the components in the graph
conn_comp = list(nx.connected_components(G_undirected))
conn_comp_size = [len(k) for k in conn_comp]
print(f"conn_comp_size = {conn_comp_size}")

In [None]:
fig,ax = plt.subplots(1,1)
ax.hist(conn_comp_size)
ax.set_title("Number of Neurons in Connected Components")
ax.set_xlabel("Number of Neurons")
plt.show()

# Selecting the largest component

In [None]:
G_largest = G.subgraph(conn_comp[0])
node_names_largest_comp = np.array(G_largest.nodes())
node_names_largest_comp.shape

# Degree Distribution

In [None]:
undirectional_graph = nx.Graph(G_largest)
no_selflooped_graph = xu.remove_selfloops(undirectional_graph)
no_selflooped_graph

In [None]:
degree_distribution = np.array(xu.get_node_degree(no_selflooped_graph,node_names_largest_comp))
degree_distribution_filtered = degree_distribution[degree_distribution<np.percentile(degree_distribution,99.5)]

In [None]:
print(f"Average Degree of Node: {np.mean(degree_distribution_filtered)}")
print(f"Median Degree of Node: {np.median(degree_distribution_filtered)}")

In [None]:
fig,ax = plt.subplots(1,1)
ax.hist(degree_distribution_filtered,bins=50,density=True)
ax.set_title("Degree Distribution of Neurons \n Unique Edges, No Self Loops")
ax.set_xlabel("Degree of Neuron")
ax.set_ylabel("Density")
plt.show()

# Degree Distribution In and Out

In [None]:
np.sum(degree_distribution(G,degree_type="in")),np.sum(degree_distribution(G,degree_type="out"))

In [None]:
np.sum(degree_distribution(G,degree_type="in_and_out"))

In [None]:
import graph_analysis as ga
import graph_visualizations as gviz

In [None]:
from importlib import reload
gviz = reload(gviz)

In [None]:
in_degree = np.array(list(dict(G.in_degree()).values()))
in_degree

In [None]:
import matplotlib.pyplot as plt
import matplotlib_utils as mu

import numpy as np

mu.histogram(in_degree,bin_max = 700,
         bin_width = 10,return_fig_ax=True,logscale=True)

In [None]:
ga.degree_distribution(G,
                                                       degree_type="in",
                                                       percentile=95)

In [None]:
gviz.plot_degree_distribution(G,degree_type="in",
                             percentile=100,
                              bin_width = 10,
                              bin_max = 600,
                             )

In [None]:
gviz.plot_degree_distribution(G,degree_type="out",
                             percentile=100,
                              bin_width = 10,
                              bin_max = 600,
                             )

In [None]:
gviz.plot_degree_distribution(G,degree_type="in",
                         bin_width=5,
                         bin_max = 800,
                        percentile=99)

In [None]:
gviz.plot_degree_distribution(G,degree_type="out",
                         bin_width=5,
                         bin_max = 800,
                        percentile=99)

In [None]:
gviz.plot_degree_distribution(G,degree_type="in",title="In-Degree Distribution",
                         bin_width=5,
                         bin_max = 500,
                        percentile=99)

In [None]:
plot_degree_distribution(G,degree_type="out",title="Out-Degree Distribution",
                        percentile=99)

In [None]:
fig,ax = plt.subplots(1,1)
ax.hist(degree_distribution_filtered,bins=50,density=False)
ax.set_title("Degree Distribution of Neurons \n Unique Edges, No Self Loops")
ax.set_xlabel("Degree of Neuron")
ax.set_ylabel("Count")
ax.set_yscale("log")
plt.show()

# Getting the Soma to Soma Distance

In [None]:
"""
Psuedoode: Pull down the Nucleus IDs and Nucleus Centers

1) Get the nucleus ids and centers
2) Adjust the centers for nm


"""

nuc_table = du.configure_nucleus_table() & "segment_id>0"
nuc_table

In [None]:
nucleu_ids,nuc_x,nuc_y,nuc_z = nuc_table.fetch("nucleus_id","nucleus_x","nucleus_y","nucleus_z")

In [None]:
nucleus_centers = np.vstack([nuc_x,nuc_y,nuc_z]).T * [4,4,40]
nucleus_centers

In [None]:
nuc_id_to_center = dict([(k,v) for k,v in zip(nucleu_ids,nucleus_centers)])

In [None]:
len(G_undirected_non_multi.edges())

In [None]:
print("hi")

In [None]:
import matplotlib.pyplot as plt
def graph_to_soma_to_soma_distance(G):
    total_unique_edges = np.array(list(G.edges()))
    edges_mask = np.all(total_unique_edges>0,axis=1)
    soma_distances = np.array([[nuc_id_to_center[k],nuc_id_to_center[v]] for k,v in total_unique_edges[edges_mask]])
    soma_distances_norm = np.linalg.norm(soma_distances[:,0,:] - soma_distances[:,1,:],axis=1)

    
    fig,ax = plt.subplots(1,1)
    ax.hist(soma_distances_norm/1000,bins=100,density=False)
    ax.set_title("Soma to Soma Distance of Unique Direct Connections \n After Auto Proofreading")
    ax.set_xlabel("Soma to Soma Distance (um)")
    ax.set_ylabel("Counts")
    plt.yscale("log")
    plt.xscale("linear")
    plt.show()

graph_to_soma_to_soma_distance(G)

In [None]:
graph_to_soma_to_soma_distance(G_undirected_non_multi)

In [None]:
total_unique_edges = np.array(list(G_undirected_non_multi.edges()))


In [None]:
edges_mask = np.all(total_unique_edges>0,axis=1)
edges_mask

In [None]:
soma_distances = np.array([[nuc_id_to_center[k],nuc_id_to_center[v]] for k,v in total_unique_edges[edges_mask]])
soma_distances

In [None]:
soma_distances.shape

In [None]:
soma_distances_norm = np.linalg.norm(soma_distances[:,0,:] - soma_distances[:,1,:],axis=1)
soma_distances_norm.shape

In [None]:
import matplotlib.pyplot as plt
fig,ax = plt.subplots(1,1)
ax.hist(soma_distances_norm/1000,bins=100,density=False)
ax.set_title("Soma to Soma Distance of Unique Direct Connections \n After Auto Proofreading")
ax.set_xlabel("Soma to Soma Distance (um)")
ax.set_ylabel("Counts")
plt.yscale("log")
plt.xscale("linear")
plt.show()

In [None]:
alpha_level = 0.7

import matplotlib.pyplot as plt
fig,ax = plt.subplots(1,1)
ax.hist(total_distances_no_self_loops/1000,bins=100,density=False,color="blue",label="Before Auto Proofreading",alpha=alpha_level)
ax.hist(total_distances_no_self_loops_auto/1000,bins=100,density=False,color="red",label="After Auto Proofreading",alpha=alpha_level)
ax.set_title("Soma to Soma Distance of Direct Connections")
ax.set_xlabel("Soma to Soma Distance (um)")
ax.set_ylabel("Counts")

ax.set_yscale("log")
ax.set_xscale("linear")
ax.legend()
plt.show()