In [None]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from rwr_functions import *
from constants import *

from sknetwork.data import from_edge_list
from networkx import pagerank, from_pandas_edgelist


# Create supra-adjacency matrix

In [None]:
def load_multilayer_graph(selected_networks):
    gene_nets = {}

    # Intranetwork edges
    intra_edges = pd.DataFrame()
    for net in selected_networks:
        print("Loading {} graph".format(net))
        df = pd.read_csv(network_files[net], dtype={'node1': str, 'node2': str})[["node1", "node2"]].astype(str)    
        graph_nodes = set(df["node1"]).union(set(df["node2"]))
        for nd in graph_nodes:
            if nd in gene_nets.keys():
                gene_nets[nd].append(net)
            else:
                gene_nets[nd] = [net]

        df["node1"] = net+"_"+df["node1"]
        df["node2"] = net+"_"+df["node2"]
        intra_edges = pd.concat((intra_edges, df))

    # Find inter-network edges
    inter_edges = []
    tot_multigraph_genes = 0
    for gn, nn in gene_nets.items():
        if len(nn)<2:
            continue
        tot_multigraph_genes += 1
        for i in range(0, len(nn)-1):
            for j in range(i+1, len(nn)):
                inter_edges.append((nn[i]+"_"+gn, nn[j]+"_"+gn))
    inter_edges = pd.DataFrame(inter_edges, columns=["node1", "node2"])

    # Complete Supra-matrix
    supadj_edges = pd.concat((intra_edges, inter_edges))
    
    
    # graph = from_edge_list(supadj_edges.values.astype(str))
    graph = from_pandas_edgelist(supadj_edges, source="node1", target="node2")
    return graph


def init_multilayer_scores(graph, data):
    ncbi2gene = dict(zip(data.NCBI_id, data.Gene))
    ncbi_genes = set(data.NCBI_id)
    pegasus_scores = dict(zip(data.NCBI_id, data.Score))
    pagerank_seeds = {}
    for node in graph.nodes: 
        node_gene = node.split("_", 1)[-1]
        if node_gene in ncbi_genes:
            pagerank_seeds[node] = pegasus_scores[node_gene]
        else:
            pagerank_seeds[node] = 0.0
    return pagerank_seeds


In [None]:
graph = load_multilayer_graph(NETWORKS)

In [None]:
results = []

for disease in tqdm(DISEASES):
    pegasus_data = load_pegasus_results(disease)
    pagerank_seeds = init_multilayer_scores(graph, pegasus_data)
    gene_seeds_ncbi, ncbi_targets = load_seeds_and_targets(disease)
    ncbi_targets = list(set(ncbi_targets))
    for alpha in tqdm(ALPHAS):
        rwr_scores = perform_rwr_nx(alpha, graph, pagerank_seeds)
        rwr_results = process_rwr_results_nx(rwr_scores, graph, pegasus_data, pagerank_seeds) 
        rwr_results[["Network", "Gene NCBI ID"]] = rwr_results["Gene NCBI ID"].str.split("_", n=1, expand=True)
        rwr_results = rwr_results.drop_duplicates("Gene NCBI ID", keep="first")
        for K in Ks:
            metrics = calculate_metrics(rwr_results, K, gene_seeds_ncbi, ncbi_targets, "Multilayer", alpha, disease, scoring="Score")
            results.append(metrics)
                
results = pd.concat(results)
results

In [None]:
os.makedirs("outputs", exist_ok=True)
results.to_csv("outputs/multilayer_metrics.csv", index=False)