In [None]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from rwr_functions import *
from constants import *
import json
import os
from sknetwork.data import from_edge_list

In [None]:
with open("processed_data/networks/networks_n_edges.json", "r") as f:
    networks_n_edges = json.load(f)
with open("processed_data/networks/networks_n_nodes.json", "r") as f:
    networks_n_nodes = json.load(f)
    
networks_n_nodes

In [None]:
results = []

for disease in tqdm(DISEASES):
    pegasus_data = load_pegasus_results(disease)
    gene_seeds_ncbi, ncbi_targets = load_seeds_and_targets(disease)
    
    ncbi_targets = list(set(ncbi_targets))

    for alpha in tqdm(ALPHAS):
        genes_ranks = {}
        genes_scores = {}
        for netname in NETWORKS:
            graph = load_graph_nx(netname)
            pagerank_seeds = init_rwr_scores_nx(graph, pegasus_data)
            rwr_scores = perform_rwr_nx(alpha, graph, pagerank_seeds)
            rwr_results = process_rwr_results_nx(rwr_scores, graph, pegasus_data, pagerank_seeds) 
            rank = 0
            
            score_norm = np.sum(list(pagerank_seeds.values())) * 0.01
            n_nodes = networks_n_nodes[netname]
            for i, row in rwr_results.iterrows():
                gn = str(row["Gene NCBI ID"])
                rank += 1
                if gn not in genes_ranks:
                    genes_ranks[gn] = [rank/n_nodes]
                else:
                    genes_ranks[gn].append(rank/n_nodes)


                if gn not in genes_scores:
                    genes_scores[gn] = [row["Final Score"]/score_norm]
                else:
                    genes_scores[gn].append(row["Final Score"]/score_norm)
                    

        genes_ranks_df = []
        for k, v in genes_ranks.items():
            if len(v)<2:
                continue
            genes_ranks_df.append({"Gene NCBI ID": k, "Avg. Rank": np.mean(v), "Method": "Score", "Disease": disease, "Alpha": alpha})
        genes_ranks_df = pd.DataFrame(genes_ranks_df).sort_values(by="Avg. Rank", ascending=True)

        genes_scores_df = []
        for k, v in genes_scores.items():
            if len(v)<2:
                continue
            genes_scores_df.append({"Gene NCBI ID": k, "Avg. Score": np.mean(v), "Method": "Score", "Disease": disease, "Alpha": alpha})
        genes_scores_df = pd.DataFrame(genes_scores_df).sort_values(by="Avg. Score", ascending=False)
        for K in Ks:
            metrics = calculate_metrics(genes_ranks_df, K, gene_seeds_ncbi, ncbi_targets, "Multilayer", alpha, disease, scoring="Avg. Rank")
            results.append(metrics)
            metrics = calculate_metrics(genes_scores_df, K, gene_seeds_ncbi, ncbi_targets, "Multilayer", alpha, disease, scoring="Avg. Score")
            results.append(metrics)

                
results = pd.concat(results)
results

In [None]:
os.makedirs("outputs", exist_ok=True)
results.to_csv("outputs/AVG_ensembles_metrics.csv", index=False)