## Functional analysis

**NOTE: This ipynb can only be embedded in others.**

We start by making all spiketrains of the the same length and binary.

In [None]:
# structural analysis
print("... Functional correlation analysis (functional structure)")

# make binary spiketrains
print("    binary spiketrains")
binary_spiketrains = np.zeros( (len(spiketrains),len(time)+2) )
print("    shape:", binary_spiketrains.shape)
for row,train in enumerate(spiketrains):
    # iterate over spiketrains assigning 1 to the binary_spiketrains at the corresponding position
    tidxs = np.trunc(np.array(train)/frame_duration).astype(int) - int(exp_tstart/frame_duration)
    tidxs[tidxs>len(time)] = len(time) 
    binary_spiketrains[row][tidxs] = 1

Then we build an adjacency matrix based on 

In [None]:
print("    starting functional adjacency matrix")
functional_adjacency_matrix = []
for irow,bsti in enumerate(binary_spiketrains):
    row_xcorr = []
    for jrow,bstj in enumerate(binary_spiketrains):
        if irow==jrow:
            row_xcorr.append(0.0) # no self connections
            continue
        row_xcorr.append(crosscorrelation(bsti, bstj, maxlag=1, mode='corr')[2])
    functional_adjacency_matrix.append(row_xcorr)
functional_adjacency_matrix = np.array(functional_adjacency_matrix)
print("    full adjacency matrix:",functional_adjacency_matrix.shape)

# To ensure sparseness of the matrix, discard weak correlations (<0.4, Sadovsky and MacLean 2013)
functional_adjacency_matrix[ functional_adjacency_matrix <= functional_adjacency_matrix.max()*perc_corr ] = 0.0
np.save(exp_path+"/results/functional_adjacency_matrix_%s.npy"%(scan_id), functional_adjacency_matrix)

# plot
fig = plt.figure()
plt.pcolormesh(functional_adjacency_matrix)
cbar = plt.colorbar()
fig.savefig(exp_path+'/results/adjacency_matrix_%s.png'%(scan_id), transparent=True)
plt.close()
fig.clear()
fig.clf()

### is the cross-correlation between cells significant to justify a functional connectivity analysis?
Before performing functional connectivity let's check that the correlogram of the best correlated cells is beyond their shuffled surrogates.

In [None]:
print("    checking details of best cross-correlation pairs ")
# pick highly correlated cells for further inspection
# thresholds for the top and bottom percentiles
top_threshold = np.percentile(functional_adjacency_matrix, 95)
highly_correlated_indices = np.where(functional_adjacency_matrix > top_threshold)

bin_size = 1 # can be made lower
lags = np.arange(-50, 51, bin_size)
num_pairs = 30 if len(highly_correlated_indices[0])>30 else len(highly_correlated_indices[0])-1
co_occurrence_counts = []
surrogates_co_occurrence_counts = []
for i in range(num_pairs):
    spike_times1 = binary_spiketrains[highly_correlated_indices[0][i]].astype(int)
    spike_times2 = binary_spiketrains[highly_correlated_indices[1][i]].astype(int)
    co_occurrence_counts_per_lag = np.convolve(spike_times1, spike_times2[::-1], mode='same')
    co_occurrence_counts.append(co_occurrence_counts_per_lag)
    # geenrate surrogates by reshuffling
    surrogates_co_occurrences = []
    for j in range(10):
        surrspike_times1 = np.random.permutation(spike_times1)
        surrspike_times2 = np.random.permutation(spike_times2)
        co_occurrence_counts_per_lag = np.convolve(surrspike_times1, surrspike_times2[::-1], mode='same')
        surrogates_co_occurrences.append(co_occurrence_counts_per_lag)
    surrogates_co_occurrence_counts.append(np.mean(surrogates_co_occurrences, axis=0))   
    
# grid of individual cross-correlations
num_plots = len(co_occurrence_counts)
num_rows = int(np.sqrt(num_plots))
num_cols = int(np.ceil(num_plots / num_rows))
fig = plt.figure(figsize=(15, 10))
for i, co_occurrence_counts_pair in enumerate(co_occurrence_counts):
    plt.subplot(num_rows, num_cols, i+1)
    plt.plot(lags, co_occurrence_counts_pair[:len(lags)], color='blue')
    plt.plot(lags, surrogates_co_occurrence_counts[i][:len(lags)], color='red', linestyle='dashed')
fig.text(0.5, 0.04, 'Lag (ms)', ha='center')
fig.text(0.04, 0.5, 'Correlation', va='center', rotation='vertical')
plt.tight_layout()
fig.savefig(exp_path+'/results/correlogram_%s.svg'%(scan_id), transparent=True)
plt.close()
fig.clear()
fig.clf()

### creating the graph

Creating graph from functional_adjacency_matrix as in Sadovsky and MacLean 2013

In [None]:
# functional_adjacency_matrix[ functional_adjacency_matrix <= functional_adjacency_matrix.max()*perc_corr ] = 0.0
functional_adjacency_matrix[ functional_adjacency_matrix >= functional_adjacency_matrix.max()*perc_corr ] = 1.0

dgraph = ig.Graph.Weighted_Adjacency(functional_adjacency_matrix, mode='directed')
ig.plot(dgraph, exp_path+'/results/ring_%s.png'%(scan_id), layout=dgraph.layout("circle"), edge_curved=0.2, edge_color='#000', edge_width=0.5, edge_arrow_size=0.1, vertex_size=5, vertex_color='#000', margin=50)
print('    preparing vertex labels for cores and others')
dgraph.vs["ophys_cell_id"] = ophys_cell_ids
is_id_core = np.array( [0] * len(ophys_cell_ids) )
is_id_core[core_indexes] = 1
dgraph.vs["is_core"] = is_id_core.tolist()

### Modularity

The relationship between degree and local clustering coefficient of nodes should be log-linear to have a modular network.

In [None]:
degrees = np.array(dgraph.degree())
print("    Degree distributions")
# https://igraph.org/python/api/latest/igraph._igraph.GraphBase.html#degree
degdist = dgraph.degree_distribution(bin_width=5)
degree_counts = [bi[2] for bi in degdist.bins()]
fig = plt.figure()
plt.plot(range(len(degree_counts)), degree_counts, linewidth=3.0)
plt.ylabel('Number of vertices')
plt.xlabel('Degree')
plt.xscale('log')
plt.yscale('log')
plt.savefig(exp_path+'/results/degree_distribution_%s.png'%(scan_id), transparent=True, dpi=300)
plt.close()

# Clustering Coefficient of only excitatory cells
print('    Local Clustering Coefficient (cores too)')
local_clustering_coefficients = np.array(dgraph.transitivity_local_undirected(vertices=None, mode="zero"))

### Cores are the connectors between modules

High information flow nodes often arise from modular networks, characterized by a log-linear relationship between local clustering coefficient and degree. In particular, high information flow nodes have low cluster coefficient, acting as connectors between modules.

Here, on the left, we color the core units while displaying them over the hierarchical modularity plot.      
On the right, we plot the cores local clustering histogram on a linear scale, to appreciate their distribution skewed towards low values.

In [None]:
# assign cores to their lcc and degree
# print(core_indexes)
core_local_clustering_coefficients = np.array(dgraph.transitivity_local_undirected(vertices=core_indexes, mode="zero"))
core_degrees = np.array(dgraph.degree(vertices=core_indexes, mode="all"))

# figure
fig, (hmmap, chist) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [6, 1]})
# hierarchy
hmmap.scatter( degrees, local_clustering_coefficients, marker='o', facecolor='#111111', s=50, edgecolors='none', alpha=0.5) 
hmmap.scatter( core_degrees, core_local_clustering_coefficients, marker='o', facecolor='none', s=50, edgecolors='forestgreen') 
hmmap.set_yscale('log')
hmmap.set_ylim([0.02,1.1])
hmmap.set_xscale('log')
hmmap.spines['top'].set_visible(False)
hmmap.spines['right'].set_visible(False)
hmmap.set_ylabel('LCC')
hmmap.set_xlabel('degree')
hmmap.tick_params(axis='both', bottom='on', top='on', left='off', right='off')
# core lcc histogram
bins = np.linspace(0.02,1,50)
barheight = (max(local_clustering_coefficients)-min(local_clustering_coefficients))/50
lcc_hist, lcc_binedges = np.histogram(core_local_clustering_coefficients, bins)
chist.barh(bins[:-1], lcc_hist, height=barheight, align='center', color='green', linewidth=0)
chist.spines['top'].set_visible(False)
chist.spines['right'].set_visible(False)
chist.tick_params(axis='x', which='both', bottom=True, top=False, labelsize='x-small')
chist.tick_params(axis='y', which='both', left=True, right=False, labelleft=True)
chist_ticks = chist.get_xticks()
chist.set_ylim([0.01,1.1])
chist.set_ylabel('LCC')
chist.set_xlabel('count')
chist.yaxis.set_label_position("right")
chist.spines['top'].set_visible(False)
chist.spines['right'].set_visible(False)
chist.spines['bottom'].set_visible(False)
plt.tight_layout()
fig.savefig(exp_path+"/results/cores_hierarchical_modularity_%s.svg"%(scan_id), transparent=True)
plt.close()
fig.clear()
fig.clf()

In [None]:
# Flow analysis
print("... Flow Analysis")

if len(core_indexes)>1 and len(other_indexes)>1:
    # The amount of flow on an edge cannot exceed the capacity of the edge.
    # therefore, edges with high capacity will be more important for the flow.
    # here we test the hypothesis that edges towards cores have higher capacity
    # or that the sum of edges towards cores have a higher total capacity
    cell_total_capacity = {cid:list() for cid in ophys_cell_ids}
    edges_sourcing = {cid:0 for cid in ophys_cell_ids}
    edges_targeting = {cid:0 for cid in ophys_cell_ids}

    for cluster_k,events_cellids in scan_clustered_spectrums.items(): # we consider only the scan 0 because it's the largest (for now)
        cluster_k = cluster_k.split(',')[0]

        if cluster_k == 'gray':
            continue

        for vnt in events_cellids:
            for posi,vidj in enumerate(vnt[1:]):
                vidi = vnt[posi] # enumerate will go from 0
                # print(vidi, vidj)

                # check beginning and end are not the same
                if dgraph.vs.find(ophys_cell_id=vidi).index == dgraph.vs.find(ophys_cell_id=vidj).index:
                    continue
                # # check there is a path between the two
                # if len(spinesgraph.get_all_shortest_paths(spinesgraph.vs.find(name=vidi).index, to=spinesgraph.vs.find(name=vidj).index, weights=None, mode='out'))>0:
                #     continue

                # Take the maximum flow between the previous and next vertices
                mfres = dgraph.maxflow(dgraph.vs.find(ophys_cell_id=vidi).index, dgraph.vs.find(ophys_cell_id=vidj).index)
                # print(mfres)
                # returns a tuple containing the following:
                # graph - the graph on which this flow is defined
                # value - the value (capacity) of the maximum flow between the given vertices
                # flow - the flow values on each edge. For directed graphs, this is simply a list where element i corresponds to the flow on edge i.
                # cut - edge IDs in the minimal cut corresponding to the flow.
                # partition - vertex IDs in the parts created after removing edges in the cut
                # es - an edge selector restricted to the edges in the cut.

                # we get a flow value for each edge contributing to the flow.
                # source
                mfres_value = mfres.value
                if vidi in np.array(ophys_cell_ids)[core_indexes]:
                    mfres_value /= len(core_indexes)
                else:
                    mfres_value /= len(other_indexes)
                cell_total_capacity[vidi].append(mfres_value)
                # target
                mfres_value = mfres.value
                if vidj in np.array(ophys_cell_ids)[core_indexes]:
                    mfres_value /= len(core_indexes)
                else:
                    mfres_value /= len(other_indexes)
                cell_total_capacity[vidj].append(mfres_value)

                # Iterate over the edges identified by the flow.
                # count the edges sourcing from cores, and those targeting cores. Which is more?
                for edge in mfres.es:
                    sourceid = int(dgraph.vs[edge.source]['ophys_cell_id'])
                    targetid = int(dgraph.vs[edge.target]['ophys_cell_id'])
                    if sourceid in cell_total_capacity.keys():
                        edges_sourcing[sourceid] +=1 # just count
                    if targetid in cell_total_capacity.keys():
                        edges_targeting[targetid] +=1 # just count

    # # Flow
    # # print(cell_total_capacity)
    # flowvalue_cores = []
    # for cid in np.array(ophys_cell_ids)[core_indexes]:
    #     flowvalue_cores.extend(cell_total_capacity[cid])
    # flowvalue_others = []
    # for cid in np.array(ophys_cell_ids)[other_indexes]:
    #     flowvalue_others.extend(cell_total_capacity[cid])
    # # description
    # print("    Flow cores: "+str(stats.describe(flowvalue_cores)) )
    # print("    Flow others: "+str(stats.describe(flowvalue_others)) )
    # # significativity
    # print("    Welch t test:  %.3f p= %.3f" % stats.ttest_ind(flowvalue_cores, flowvalue_others, equal_var=False))
    # d,_ = stats.ks_2samp(flowvalue_cores, flowvalue_others) # non-parametric measure of effect size [0,1]
    # print('    Kolmogorov-Smirnov Effect Size: %.3f' % d)
    # fig, ax = plt.subplots()
    # xs = np.random.normal(1, 0.04, len(flowvalue_cores))
    # plt.scatter(xs, flowvalue_cores, alpha=0.3, c='forestgreen')
    # xs = np.random.normal(2, 0.04, len(flowvalue_others))
    # plt.scatter(xs, flowvalue_others, alpha=0.3, c='silver')
    # vp = ax.violinplot([flowvalue_cores,flowvalue_others], widths=0.15, showextrema=False, showmeans=True)
    # for pc in vp['bodies']:
    #     pc.set_edgecolor('black')
    # for pc,cb in zip(vp['bodies'],['#228B224d','#D3D3D34d']):
    #     pc.set_facecolor(cb)
    # vp['cmeans'].set_color('orange')
    # # vp['cmedians'].set_linewidth(2.)
    # ax.spines['top'].set_visible(False)
    # ax.spines['bottom'].set_visible(False)
    # ax.spines['left'].set_visible(False)
    # ax.spines['right'].set_visible(False)
    # plt.ylabel('Normalized flow value')
    # plt.xticks([1, 2], ["core\n(n={:d})".format(len(flowvalue_cores)), "other\n(n={:d})".format(len(flowvalue_others))])
    # fig.savefig(exp_path+"/results/global_cores_others_flowvalue_%s.svg"%(scan_id), transparent=True)
    # plt.show()
    # # fig.clf()
    # # plt.close()

    # Cuts
    # print(edges_sourcing)
    # print(edges_targeting)
    flowcuts_core_sources = []
    flowcuts_core_targets = []
    for cid in np.array(ophys_cell_ids)[core_indexes]:
        flowcuts_core_sources.append(edges_sourcing[cid]/len(core_indexes))
        flowcuts_core_targets.append(edges_targeting[cid]/len(core_indexes))
    flowcuts_other_sources = []
    flowcuts_other_targets = []
    for cid in np.array(ophys_cell_ids)[other_indexes]:
        flowcuts_other_sources.append(edges_sourcing[cid]/len(other_indexes))
        flowcuts_other_targets.append(edges_targeting[cid]/len(other_indexes))

    # description
    print("    Cut edges sourcing from cores: "+str(stats.describe(flowcuts_core_sources)) )
    print("    Cut edges targeting cores: "+str(stats.describe(flowcuts_core_targets)) )
    print("    Cut edges sourcing from others: "+str(stats.describe(flowcuts_other_sources)) )
    print("    Cut edges targeting others: "+str(stats.describe(flowcuts_other_targets)) )
    # significativity
    print("    Core targets vs sources Welch t test:  %.3f p= %.3f" % stats.ttest_ind(flowcuts_core_targets, flowcuts_core_sources, equal_var=False))
    d,_ = stats.ks_2samp(flowcuts_core_targets, flowcuts_core_sources) # non-parametric measure of effect size [0,1]
    print('    Kolmogorov-Smirnov Effect Size: %.3f' % d)

    print("    Core targets vs Other targets Welch t test:  %.3f p= %.3f" % stats.ttest_ind(flowcuts_core_targets, flowcuts_other_targets, equal_var=False))
    d,_ = stats.ks_2samp(flowcuts_core_targets, flowcuts_other_targets) # non-parametric measure of effect size [0,1]
    print('    Kolmogorov-Smirnov Effect Size: %.3f' % d)

    fig, ax = plt.subplots()
    xs = np.random.normal(1, 0.04, len(flowcuts_core_sources))
    plt.scatter(xs, flowcuts_core_sources, alpha=0.3, c='forestgreen')
    xs = np.random.normal(2, 0.04, len(flowcuts_core_targets))
    plt.scatter(xs, flowcuts_core_targets, alpha=0.3, c='forestgreen')
    xs = np.random.normal(3, 0.04, len(flowcuts_other_sources))
    plt.scatter(xs, flowcuts_other_sources, alpha=0.3, c='silver')
    xs = np.random.normal(4, 0.04, len(flowcuts_other_targets))
    plt.scatter(xs, flowcuts_other_targets, alpha=0.3, c='silver')
    vp = ax.violinplot([flowcuts_core_sources,flowcuts_core_targets,flowcuts_other_sources,flowcuts_other_targets], widths=0.15, showextrema=False, showmeans=True)
    for pc in vp['bodies']:
        pc.set_edgecolor('black')
    for pc in vp['bodies'][0:2]:
        pc.set_facecolor('#228B224d')
    for pc in vp['bodies'][2:]:
        pc.set_facecolor('#D3D3D34d')
    vp['cmeans'].set_color('orange')
    # vp['cmedians'].set_linewidth(2.)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.ylabel('Normalized edges in the cut')
    plt.xticks([1, 2, 3, 4], ["core as\nsource", "core as\ntarget", "other as\nsource", "other as\ntarget"])
    fig.savefig(exp_path+"/results/global_cores_others_cutvalue_%s.svg"%(scan_id), transparent=True)
    plt.show()
    # fig.clf()
    # plt.close()

### PageRank

If a network has high flow nodes, they will also score high in the [PageRank algorithm](https://en.wikipedia.org/wiki/PageRank).    
Core neurons having both high flow values and high cut values should have a higher pagerank value than others.

In [None]:
print('... PageRank centrality')
pagerank_cores = []
pagerank_others = []

if len(core_indexes)>1:
    pagerank_cores = np.array(dgraph.personalized_pagerank(vertices=core_indexes, directed=True, damping=0.85, reset="is_core"))
if len(other_indexes)>1:
    pagerank_others = np.array(dgraph.personalized_pagerank(vertices=other_indexes, directed=True, damping=0.85, reset="is_core"))
    
if len(core_indexes)>1 and len(other_indexes)>1:
    
    # description
    print("    cores: "+str(stats.describe(pagerank_cores)) )
    print("    others: "+str(stats.describe(pagerank_others)) )
    # significativity
    print("    Kruskal-Wallis test:  %.3f p= %.3f" % stats.kruskal(pagerank_cores, pagerank_others))
    d,_ = stats.ks_2samp(pagerank_cores, pagerank_others) # non-parametric measure of effect size [0,1]
    print('    Kolmogorov-Smirnov Effect Size: %.3f' % d)

    fig, ax = plt.subplots()
    xs = np.random.normal(1, 0.04, len(pagerank_cores))
    plt.scatter(xs, pagerank_cores, alpha=0.3, c='forestgreen')
    xs = np.random.normal(2, 0.04, len(pagerank_others))
    plt.scatter(xs, pagerank_others, alpha=0.3, c='silver')
    vp = ax.violinplot([pagerank_cores,pagerank_others], widths=0.15, showextrema=False, showmedians=True)
    for pc in vp['bodies']:
        pc.set_edgecolor('black')
    for pc,cb in zip(vp['bodies'],['#228B224d','#D3D3D34d']):
        pc.set_facecolor(cb)
    vp['cmedians'].set_color('orange')
    vp['cmedians'].set_linewidth(2.)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)
    plt.ylabel('PageRank')
    plt.xticks([1, 2], ["core\n(n={:d})".format(len(pagerank_cores)), "other\n(n={:d})".format(len(pagerank_others))])
    fig.savefig(exp_path+"/results/global_cores_others_pagerank_%s.svg"%(scan_id), transparent=True)
    plt.show()
