Variables to be updated/configured:

In [None]:
WES = False # False if running for the larger epilepsy-autism multiplex network, True if running for the WES multiplex network

if WES:
    FIGURES_DIR = "./../figures_wes" # path to directory where figures will the saved (creates the directory if it doesn't exist)
    COMS_DIR = "./../communities_wes" # path to directory containing information on the communities in the network
else:
    FIGURES_DIR = "./../figures" # path to directory where figures will the saved (creates the directory if it doesn't exist)
    COMS_DIR = "./../communities" # path to directory containing information on the communities in the network
    
GRAPH_DIR = "./../gexf_files" # path to directory where the .gexf files are located
NET_GEN_DIR = "./../network_generation" # path to directory for network generation

# Setup

In [None]:
# network packages
from cdlib import algorithms, evaluation
import igraph as ig
import louvain
import networkx as nx
from networkx.readwrite.gexf import read_gexf

# visualization packages
import matplotlib
import matplotlib.pyplot as plt

# other packages
import os
import numpy as np
import pandas as pd

In [None]:
if not os.path.exists(FIGURES_DIR):
    os.makedirs(FIGURES_DIR)

In [None]:
if WES:
    gene_phenotype_filename = 'gene-phenotype-wes-1-500.gexf'
    gene_ppi_filename = "gene-ppi-wes-700.gexf"
    gene_union_filename = 'gene-union-wes.gexf'
    gene_list_filename = "genes_wes.txt"
else:
    gene_phenotype_filename = 'gene-phenotype-1-1000.gexf'
    gene_ppi_filename = 'gene-ppi-700.gexf'
    gene_union_filename = 'gene-union.gexf'
    gene_list_filename = "genes.txt"
      
gene_phenotype = read_gexf(os.path.join(GRAPH_DIR, gene_phenotype_filename))
gene_ppi = read_gexf(os.path.join(GRAPH_DIR, gene_ppi_filename))
node_df = pd.read_csv(os.path.join(NET_GEN_DIR, gene_list_filename))
gene_phenotype_filename_update = gene_phenotype_filename.replace('.gexf', '-update.gexf')
gene_ppi_filename_update = gene_ppi_filename.replace('.gexf', '-update.gexf')

In [None]:
gene_to_id = pd.Series(node_df.index.values,index=node_df['gene_symbol']).to_dict()
id_to_gene = pd.Series(node_df['gene_symbol'].values).to_dict()

In [None]:
# wrapper for communities
class Coms:
    def __init__(self, communities):
        self.communities = communities
        self.overlap = None
        
# get Coms class with genes from annotated networkx graph
def get_coms_from_graph(G):    
    max_module = max([G.nodes[node]['module'] for node in G.nodes])
    partition = []
    for i in range(max_module):
        partition.append([])
    for node in G.nodes:
        mod = G.nodes[node]['module']
        partition[mod-1] = partition[mod-1] + [node]
    coms = Coms(partition)
    return coms

# takes partition with IDs and converts to Coms class with genes
def partition_to_genes(partition):
    partition_genes = []
    for com in partition:
        com_genes = []
        for g in com:
            com_genes.append(id_to_gene[g])
        partition_genes.append(com_genes) 
    coms = Coms(list(partition_genes))
    return coms

In [None]:
# returns best partition of a graph using Louvain given resolution
def test_resolution(resolutions, G_igraph, G, num_trials=1000):
    result = []
    best_partitions = []
    for idx, r in enumerate(resolutions):
        print(idx, r)
        modularities = []
        num_coms = []
        
        max_mod = -1
        best_partition = None
        
        for t in range(num_trials):
            partition = louvain.find_partition(G_igraph, louvain.RBConfigurationVertexPartition, resolution_parameter=r)
            coms = partition_to_genes(partition)
            mod = evaluation.newman_girvan_modularity(G, coms).score
            modularities.append(mod)
            num_coms.append(len([com for com in list(partition) if len(com) > 1]))
            
            if mod > max_mod:
                best_partition = partition
                max_mod = mod
            
        result.append((r, np.mean(modularities), np.std(modularities), np.mean(num_coms), np.std(num_coms)))
        best_partitions.append(best_partition)
     
    return result, best_partitions

In [None]:
# returns the best partition of a graph using Louvain and annotates the modules in the graph
def get_best_partition(resolutions, G_igraph, G, G_name, filename, num_trials=1000):
    result, best_partitions = test_resolution(resolutions, G_igraph, G, num_trials)
    result_df = pd.DataFrame(result)
    result_df.columns = ["resolution", "modularity_mean", "modularity_std", "num_coms_mean", "num_coms_std"]
    result_df.to_csv(filename, index=False)
    
    best_index = result_df.sort_values(by=["modularity_mean"], ascending=False).head(1).index.values[0]
    best_partition = best_partitions[best_index]
    coms = partition_to_genes(best_partition)
    
    for mod_num, com in enumerate(coms.communities, 1):
        for g in com:
            G.nodes[g]['module'] = mod_num
    
    print('Writing to', G_name)
    nx.write_gexf(G, G_name)
    return G

# PPI and phenotype network community detection

In [None]:
# networkx to igraph
G_phenotype = ig.Graph()
G_phenotype.add_vertices([gene_to_id[i] for i in gene_phenotype.nodes])
G_phenotype.add_edges([(gene_to_id[n1], gene_to_id[n2]) for n1, n2 in gene_phenotype.edges])

G_ppi = ig.Graph()
G_ppi.add_vertices([gene_to_id[i] for i in gene_ppi.nodes])
G_ppi.add_edges([(gene_to_id[n1], gene_to_id[n2]) for n1, n2 in gene_ppi.edges])

In [None]:
resolutions = np.arange(0.2, 2.1, 0.2)
gene_phenotype = get_best_partition(resolutions, G_phenotype, gene_phenotype, os.path.join(GRAPH_DIR, gene_phenotype_filename_update), os.path.join(COMS_DIR, gene_phenotype_filename + '_resolution_scan.csv'), num_trials=1000)


In [None]:
resolutions = np.arange(0.2, 2.1, 0.2)
gene_ppi = get_best_partition(resolutions, G_ppi, gene_ppi, os.path.join(GRAPH_DIR, gene_ppi_filename_update), os.path.join(COMS_DIR, gene_ppi_filename + '_resolution_scan.csv'), num_trials=1000)


### Plot resolution vs modularity and resolution vs module size figures

In [None]:
gene_phenotype_resolutions = pd.read_csv(os.path.join(COMS_DIR, gene_phenotype_filename + '_resolution_scan.csv'))
gene_phenotype_res = gene_phenotype_resolutions.sort_values(by=["modularity_mean"], ascending=False).head(1)['resolution'].values[0]
gene_ppi_resolutions = pd.read_csv(os.path.join(COMS_DIR, gene_ppi_filename + '_resolution_scan.csv'))
gene_ppi_res = gene_ppi_resolutions.sort_values(by=["modularity_mean"], ascending=False).head(1)['resolution'].values[0]


In [None]:
fig, ax1 = plt.subplots(figsize=(10,8))

x = list(gene_phenotype_resolutions['resolution'])
ax1.set_xlabel('Louvain resolution')
ax1.set_ylabel('Modularity')

ax1.errorbar(x, gene_ppi_resolutions['modularity_mean'], yerr=gene_ppi_resolutions['modularity_std'], fmt='o', label="PPI")
ax1.errorbar(x, gene_phenotype_resolutions['modularity_mean'], yerr=gene_phenotype_resolutions['modularity_std'], fmt='o', label="Phenotype")

plt.legend(loc='lower right')
plt.savefig(os.path.join(FIGURES_DIR, "resolution_vs_modularity.png"), dpi=300)
plt.show()

In [None]:
fig, ax1 = plt.subplots(figsize=(10,8))

x = list(gene_phenotype_resolutions['resolution'])
ax1.set_xlabel('Louvain resolution')
ax1.set_ylabel('Number of modules (at least 2 genes)')

ax1.errorbar(x, gene_ppi_resolutions['num_coms_mean'], yerr=gene_ppi_resolutions['num_coms_std'], fmt='o', label="PPI")
ax1.errorbar(x, gene_phenotype_resolutions['num_coms_mean'], yerr=gene_phenotype_resolutions['num_coms_std'], fmt='o',label="Phenotype")

plt.legend(loc='lower right')
plt.savefig(os.path.join(FIGURES_DIR, "resolution_vs_num_coms.png"), dpi=300)
plt.show()

# Generate multiplex communities

In [None]:
def get_coms_from_membership(membership):
    partition = [[] for _ in range(max(membership)+1)]
    for id_, mod in enumerate(membership):
        gene = id_to_gene[id_]
        partition[mod] = partition[mod] + [gene]
    coms = Coms(partition)
    return coms

In [None]:
# find multiplex networks
num_trials = 1000
max_mod = -1
best_membership = None
for i in range(num_trials):
    if i % 100 == 0:
        print("Trial", i)
    
    membership, improv = louvain.find_partition_multiplex([G_phenotype, G_ppi],louvain.ModularityVertexPartition)
    coms_multiplex = get_coms_from_membership(membership)
    mod_phenotype = evaluation.newman_girvan_modularity(gene_phenotype, coms_multiplex).score
    mod_ppi = evaluation.newman_girvan_modularity(gene_ppi, coms_multiplex).score
    total_mod = mod_phenotype + mod_ppi
    if total_mod > max_mod:
        max_mod = total_mod
        best_membership = membership

# Annotate graph (.gexf) nodes and edges 

In [None]:
# update gexf files with module associations
def annotate_coms(coms, G, module_type, filename):
    for mod_num, com in enumerate(coms, 1):
        for g in com:
            G.nodes[g][module_type] = mod_num
    nx.write_gexf(G, filename)

In [None]:
gene_phenotype = read_gexf(os.path.join(GRAPH_DIR, gene_phenotype_filename_update))
gene_ppi = read_gexf(os.path.join(GRAPH_DIR, gene_ppi_filename_update))
coms_phenotype = get_coms_from_graph(gene_phenotype)
coms_ppi = get_coms_from_graph(gene_ppi)

In [None]:
annotate_coms(coms_multiplex.communities, gene_ppi, 'multiplex_module', os.path.join(GRAPH_DIR, gene_ppi_filename_update))
annotate_coms(coms_ppi.communities, gene_ppi, 'ppi_module',  os.path.join(GRAPH_DIR, gene_ppi_filename_update))
annotate_coms(coms_phenotype.communities, gene_ppi, 'phenotype_module',  os.path.join(GRAPH_DIR, gene_ppi_filename_update))

annotate_coms(coms_multiplex.communities, gene_phenotype, 'multiplex_module', os.path.join(GRAPH_DIR, gene_phenotype_filename_update))
annotate_coms(coms_ppi.communities, gene_phenotype, 'ppi_module', os.path.join(GRAPH_DIR, gene_phenotype_filename_update))
annotate_coms(coms_phenotype.communities, gene_phenotype, 'phenotype_module', os.path.join(GRAPH_DIR, gene_phenotype_filename_update))

In [None]:
G_intersection = nx.intersection(gene_phenotype, gene_ppi)
gene_ppi_specific = nx.difference(gene_ppi, G_intersection)
gene_phenotype_specific = nx.difference(gene_phenotype, G_intersection)
intersecting_edges = G_intersection.edges

G_union = nx.Graph()
G_union.add_nodes_from(gene_ppi.nodes)
G_union.add_edges_from(intersecting_edges)
G_union.add_edges_from(gene_ppi_specific.edges)
G_union.add_edges_from(gene_phenotype_specific.edges)

for e in gene_phenotype_specific.edges:
    G_union[e[0]][e[1]]['edge_type'] = 'phenotype'
for e in gene_ppi_specific.edges:
    G_union[e[0]][e[1]]['edge_type'] = 'ppi'
for e in intersecting_edges:
    G_union[e[0]][e[1]]['edge_type'] = 'both'


In [None]:
print(nx.info(G_union))

In [None]:
annotate_coms(coms_multiplex.communities, G_union, 'module', os.path.join(GRAPH_DIR, gene_union_filename))
annotate_coms(coms_multiplex.communities, G_union, 'multiplex_module', os.path.join(GRAPH_DIR, gene_union_filename))
annotate_coms(coms_ppi.communities, G_union, 'ppi_module', os.path.join(GRAPH_DIR, gene_union_filename))
annotate_coms(coms_phenotype.communities, G_union, 'phenotype_module', os.path.join(GRAPH_DIR, gene_union_filename))