In [1]:
import pandas as pd
from statsmodels.stats.multitest import multipletests
import seaborn as sns
import os
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
import decoupler as dc
import mygene
import seaborn as sns
from scipy.stats import wilcoxon
import matplotlib.pyplot as plt
from statannotations.Annotator import Annotator

In [2]:
collectri = dc.op.collectri(organism="human")
print(len(collectri))

42990


In [113]:
tissue = "Breast"
tissue_path = f"gtex_fdr_results/{tissue}/random_100_with_shuffled_occurences/fdr_grn_nontf_100_numtf_-1.csv"
tissue_df = pd.read_csv(tissue_path)

# Filter to top 1000 highest expressed targets.
print("Num edges before filtering: ", len(tissue_df))
high_exp_file = os.path.join("gtex", tissue, f"{tissue}_highly_expressed_targets.csv")
high_df = pd.read_csv(high_exp_file)
top_targets = list(high_df['target'])[:1000]
tissue_df = tissue_df[tissue_df['target'].isin(top_targets)]
print("Num edges after subsetting to highest expressed genes: ", len(tissue_df))

# Filter top 10% of targets for each TF.
density = 1.0
tissue_df = (
    tissue_df.groupby("TF", group_keys=False)
      .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
)
print(f"Num edges after filtering to top {density} edges per TF: ", len(tissue_df))

Num edges before filtering:  1746068
Num edges after subsetting to highest expressed genes:  109710
Num edges after filtering to top 1.0 edges per TF:  109710


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))


In [114]:
# Translate ensemble gene IDs to gene symbols.
to_translate = list(set(tissue_df['TF']).union(set(tissue_df['target'])))

mg = mygene.MyGeneInfo()
# Query Ensembl IDs and return gene symbols
result = mg.querymany(to_translate, scopes='ensembl.gene', fields='symbol', species='human')

# Convert to a mapping dictionary
ensembl_to_symbol = {r['query']: r.get('symbol', None) for r in result}
# Filter out genes that have no matching gene symbol.
failure_genes = [x for x,y in ensembl_to_symbol.items() if not y]
success_genes = list(set(to_translate) - set(failure_genes))
# Subset GRN to genes that have matching translation.
tissue_df = tissue_df[tissue_df['TF'].isin(success_genes)]
tissue_df = tissue_df[tissue_df['target'].isin(success_genes)]
# Apply the translation to both columns
tissue_df['TF'] = tissue_df['TF'].map(ensembl_to_symbol)
tissue_df['target'] = tissue_df['target'].map(ensembl_to_symbol)


2025-10-28 10:02:01 | [INFO] querying 1-1000 ...
2025-10-28 10:02:03 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-10-28 10:02:04 | [INFO] querying 1001-2000 ...
2025-10-28 10:02:05 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-10-28 10:02:06 | [INFO] querying 2001-2596 ...
2025-10-28 10:02:07 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-10-28 10:02:08 | [INFO] Finished.
2025-10-28 10:02:08 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


In [121]:
# FDR control of remaining edges.
_, pvals_bh, _, _ = multipletests(tissue_df['pvalue'], method='fdr_bh')
tissue_df['pvalue_bh'] = pvals_bh
print("Minimal occuring p-value: ", min(tissue_df['pvalue_bh']))
signif_df = tissue_df[tissue_df['pvalue_bh']<=0.05].copy()
print("Number of total edges: ", len(tissue_df))
print("Number of signif. edges: ", len(signif_df))

Minimal occuring p-value:  0.012201273114412216
Number of total edges:  109677
Number of signif. edges:  20364


In [120]:
# Compute intersection of non-thresholded and thresholded edges.
all_edges = set(zip(tissue_df['TF'], tissue_df['target']))
signif_edges = set(zip(signif_df['TF'], signif_df['target']))
collectri_edges = set(zip(collectri['source'], collectri['target']))

# Find the intersection
all_common = all_edges & collectri_edges
signif_common = signif_edges & collectri_edges

# Compute precisions.
precision_all = len(all_common) / len(all_edges)
precision_signif = len(signif_common) / len(signif_edges)

print("Precision of all edges: ", precision_all)
print("Precision of significant edges: ", precision_signif)
print("Total amount of all edges: ", len(all_common))
print("Total amound of signif edges: ", len(signif_common))

Precision of all edges:  0.0008844151462932064
Precision of significant edges:  0.0015590200445434299
Total amount of all edges:  97
Total amound of signif edges:  14


In [3]:
tissues = ["Breast", "Kidney", "Testis"]
res_dict = {'tissue': [], 'type' : [], 'precision' : [], 'recall' : []}

for tissue in tissues:
    print("Processing tissue ", tissue)
    for i in range(10):
        print("Num run = ", i)
        tissue_path = f"gtex_fdr_results/{tissue}/random_robustness_check/fdr_grn_nontf_100_numtf_-1_input_{i}.csv"
        tissue_df = pd.read_csv(tissue_path)

        # Filter to top 1000 highest expressed targets.
        print("Num edges before filtering: ", len(tissue_df))
        high_exp_file = os.path.join("gtex", tissue, f"{tissue}_highly_expressed_targets.csv")
        high_df = pd.read_csv(high_exp_file)
        top_targets = list(high_df['target'])[:1000]
        tissue_df = tissue_df[tissue_df['target'].isin(top_targets)]
        print("Num edges after subsetting to highest expressed genes: ", len(tissue_df))

        # Filter top 10% of targets for each TF.
        density = 1.0
        tissue_df = (
            tissue_df.groupby("TF", group_keys=False)
            .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
        )
        print(f"Num edges after filtering to top {density} edges per TF: ", len(tissue_df))
        
        # Translate ensemble gene IDs to gene symbols.
        to_translate = list(set(tissue_df['TF']).union(set(tissue_df['target'])))

        mg = mygene.MyGeneInfo()
        # Query Ensembl IDs and return gene symbols
        result = mg.querymany(to_translate, scopes='ensembl.gene', fields='symbol', species='human')

        # Convert to a mapping dictionary
        ensembl_to_symbol = {r['query']: r.get('symbol', None) for r in result}
        # Filter out genes that have no matching gene symbol.
        failure_genes = [x for x,y in ensembl_to_symbol.items() if not y]
        success_genes = list(set(to_translate) - set(failure_genes))
        # Subset GRN to genes that have matching translation.
        tissue_df = tissue_df[tissue_df['TF'].isin(success_genes)]
        tissue_df = tissue_df[tissue_df['target'].isin(success_genes)]
        # Apply the translation to both columns
        tissue_df['TF'] = tissue_df['TF'].map(ensembl_to_symbol)
        tissue_df['target'] = tissue_df['target'].map(ensembl_to_symbol)
        
        # FDR control of remaining edges.
        _, pvals_bh, _, _ = multipletests(tissue_df['pvalue'], method='fdr_bh')
        tissue_df['pvalue_bh'] = pvals_bh
        signif_df = tissue_df[tissue_df['pvalue_bh']<=0.05].copy()
        print("Number of total edges: ", len(tissue_df))
        print("Number of signif. edges: ", len(signif_df))
        
        # Compute intersection of non-thresholded and thresholded edges.
        all_edges = set(zip(tissue_df['TF'], tissue_df['target']))
        signif_edges = set(zip(signif_df['TF'], signif_df['target']))
        collectri_edges = set(zip(collectri['source'], collectri['target']))
        num_collectri_edges = len(collectri_edges)

        # Find the intersection
        all_common = all_edges & collectri_edges
        signif_common = signif_edges & collectri_edges

        # Compute precisions.
        precision_all = len(all_common) / len(all_edges)
        precision_signif = len(signif_common) / len(signif_edges)
        
        recall_all = len(all_common) / num_collectri_edges
        recall_signif = len(signif_common) / num_collectri_edges
        
        res_dict['tissue'].append(tissue)
        res_dict['type'].append('all')
        res_dict['precision'].append(precision_all)
        res_dict['recall'].append(recall_all)
        
        res_dict['tissue'].append(tissue)
        res_dict['type'].append('signif')
        res_dict['precision'].append(precision_signif)
        res_dict['recall'].append(recall_signif)
    
    

Processing tissue  Breast
Num run =  0
Num edges before filtering:  1746068
Num edges after subsetting to highest expressed genes:  109710


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:53:37 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  109710


2025-11-07 16:53:41 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:53:42 | [INFO] querying 1001-2000 ...
2025-11-07 16:53:43 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:53:44 | [INFO] querying 2001-2596 ...
2025-11-07 16:53:45 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:53:46 | [INFO] Finished.
2025-11-07 16:53:46 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  109677
Number of signif. edges:  20317
Num run =  1
Num edges before filtering:  1800126
Num edges after subsetting to highest expressed genes:  115173


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:53:48 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  115173


2025-11-07 16:53:50 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:53:51 | [INFO] querying 1001-2000 ...
2025-11-07 16:53:52 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:53:53 | [INFO] querying 2001-2596 ...
2025-11-07 16:53:54 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:53:55 | [INFO] Finished.
2025-11-07 16:53:55 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  115149
Number of signif. edges:  21170
Num run =  2
Num edges before filtering:  1765320
Num edges after subsetting to highest expressed genes:  109699


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:53:58 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  109699


2025-11-07 16:54:00 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:01 | [INFO] querying 1001-2000 ...
2025-11-07 16:54:02 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:03 | [INFO] querying 2001-2596 ...
2025-11-07 16:54:04 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:05 | [INFO] Finished.
2025-11-07 16:54:05 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  109674
Number of signif. edges:  20371
Num run =  3
Num edges before filtering:  1765677
Num edges after subsetting to highest expressed genes:  111176


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:54:08 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  111176


2025-11-07 16:54:10 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:11 | [INFO] querying 1001-2000 ...
2025-11-07 16:54:12 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:13 | [INFO] querying 2001-2596 ...
2025-11-07 16:54:14 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:15 | [INFO] Finished.
2025-11-07 16:54:15 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  111147
Number of signif. edges:  20716
Num run =  4
Num edges before filtering:  1727254
Num edges after subsetting to highest expressed genes:  109301


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:54:17 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  109301


2025-11-07 16:54:19 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:20 | [INFO] querying 1001-2000 ...
2025-11-07 16:54:21 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:22 | [INFO] querying 2001-2596 ...
2025-11-07 16:54:23 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:24 | [INFO] Finished.
2025-11-07 16:54:24 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  109270
Number of signif. edges:  20304
Num run =  5
Num edges before filtering:  1740641
Num edges after subsetting to highest expressed genes:  110095


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:54:26 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  110095


2025-11-07 16:54:28 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:29 | [INFO] querying 1001-2000 ...
2025-11-07 16:54:30 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:31 | [INFO] querying 2001-2596 ...
2025-11-07 16:54:32 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:33 | [INFO] Finished.
2025-11-07 16:54:33 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  110075
Number of signif. edges:  20306
Num run =  6
Num edges before filtering:  1775769
Num edges after subsetting to highest expressed genes:  112796


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:54:35 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  112796


2025-11-07 16:54:37 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:38 | [INFO] querying 1001-2000 ...
2025-11-07 16:54:39 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:40 | [INFO] querying 2001-2596 ...
2025-11-07 16:54:41 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:42 | [INFO] Finished.
2025-11-07 16:54:42 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  112752
Number of signif. edges:  20919
Num run =  7
Num edges before filtering:  1775574
Num edges after subsetting to highest expressed genes:  113244


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:54:44 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  113244


2025-11-07 16:54:46 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:47 | [INFO] querying 1001-2000 ...
2025-11-07 16:54:48 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:49 | [INFO] querying 2001-2596 ...
2025-11-07 16:54:50 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:51 | [INFO] Finished.
2025-11-07 16:54:51 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  113207
Number of signif. edges:  21366
Num run =  8
Num edges before filtering:  1711074
Num edges after subsetting to highest expressed genes:  106369


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:54:54 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  106369


2025-11-07 16:54:56 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:57 | [INFO] querying 1001-2000 ...
2025-11-07 16:54:58 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:54:59 | [INFO] querying 2001-2596 ...
2025-11-07 16:55:00 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:01 | [INFO] Finished.
2025-11-07 16:55:01 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  106350
Number of signif. edges:  19737
Num run =  9
Num edges before filtering:  1719606
Num edges after subsetting to highest expressed genes:  108976


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:55:03 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  108976


2025-11-07 16:55:05 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:06 | [INFO] querying 1001-2000 ...
2025-11-07 16:55:08 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:09 | [INFO] querying 2001-2596 ...
2025-11-07 16:55:09 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:10 | [INFO] Finished.
2025-11-07 16:55:10 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  108924
Number of signif. edges:  19970
Processing tissue  Kidney
Num run =  0
Num edges before filtering:  2079742
Num edges after subsetting to highest expressed genes:  132867


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:55:13 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  132867


2025-11-07 16:55:15 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:17 | [INFO] querying 1001-2000 ...
2025-11-07 16:55:18 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:19 | [INFO] querying 2001-2603 ...
2025-11-07 16:55:20 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:21 | [INFO] Finished.
2025-11-07 16:55:21 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  132675
Number of signif. edges:  20001
Num run =  1
Num edges before filtering:  2180651
Num edges after subsetting to highest expressed genes:  139179


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:55:23 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  139179


2025-11-07 16:55:25 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:26 | [INFO] querying 1001-2000 ...
2025-11-07 16:55:28 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:29 | [INFO] querying 2001-2603 ...
2025-11-07 16:55:29 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:30 | [INFO] Finished.
2025-11-07 16:55:30 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  138935
Number of signif. edges:  22312
Num run =  2
Num edges before filtering:  2121221
Num edges after subsetting to highest expressed genes:  132457


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:55:33 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  132457


2025-11-07 16:55:35 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:36 | [INFO] querying 1001-2000 ...
2025-11-07 16:55:37 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:38 | [INFO] querying 2001-2603 ...
2025-11-07 16:55:39 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:40 | [INFO] Finished.
2025-11-07 16:55:40 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  132227
Number of signif. edges:  20144
Num run =  3
Num edges before filtering:  2094371
Num edges after subsetting to highest expressed genes:  132616


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:55:43 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  132616


2025-11-07 16:55:44 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:45 | [INFO] querying 1001-2000 ...
2025-11-07 16:55:46 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:47 | [INFO] querying 2001-2603 ...
2025-11-07 16:55:48 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:49 | [INFO] Finished.
2025-11-07 16:55:49 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  132458
Number of signif. edges:  20088
Num run =  4
Num edges before filtering:  2128972
Num edges after subsetting to highest expressed genes:  134239


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:55:52 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  134239


2025-11-07 16:55:54 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:55 | [INFO] querying 1001-2000 ...
2025-11-07 16:55:56 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:57 | [INFO] querying 2001-2603 ...
2025-11-07 16:55:58 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:55:59 | [INFO] Finished.
2025-11-07 16:55:59 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  134063
Number of signif. edges:  21435
Num run =  5
Num edges before filtering:  2132521
Num edges after subsetting to highest expressed genes:  135846


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:56:01 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  135846


2025-11-07 16:56:03 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:04 | [INFO] querying 1001-2000 ...
2025-11-07 16:56:05 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:06 | [INFO] querying 2001-2603 ...
2025-11-07 16:56:07 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:08 | [INFO] Finished.
2025-11-07 16:56:08 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  135630
Number of signif. edges:  20471
Num run =  6
Num edges before filtering:  2170837
Num edges after subsetting to highest expressed genes:  137485


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:56:11 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  137485


2025-11-07 16:56:13 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:14 | [INFO] querying 1001-2000 ...
2025-11-07 16:56:15 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:16 | [INFO] querying 2001-2603 ...
2025-11-07 16:56:17 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:18 | [INFO] Finished.
2025-11-07 16:56:18 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  137285
Number of signif. edges:  22093
Num run =  7
Num edges before filtering:  2102217
Num edges after subsetting to highest expressed genes:  132822


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:56:20 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  132822


2025-11-07 16:56:22 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:23 | [INFO] querying 1001-2000 ...
2025-11-07 16:56:24 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:25 | [INFO] querying 2001-2603 ...
2025-11-07 16:56:26 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:27 | [INFO] Finished.
2025-11-07 16:56:27 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  132643
Number of signif. edges:  21371
Num run =  8
Num edges before filtering:  2042928
Num edges after subsetting to highest expressed genes:  128542


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:56:30 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  128542


2025-11-07 16:56:32 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:33 | [INFO] querying 1001-2000 ...
2025-11-07 16:56:34 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:35 | [INFO] querying 2001-2603 ...
2025-11-07 16:56:36 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:37 | [INFO] Finished.
2025-11-07 16:56:37 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  128372
Number of signif. edges:  19210
Num run =  9
Num edges before filtering:  2111047
Num edges after subsetting to highest expressed genes:  134006


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:56:39 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  134006


2025-11-07 16:56:41 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:42 | [INFO] querying 1001-2000 ...
2025-11-07 16:56:43 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:44 | [INFO] querying 2001-2603 ...
2025-11-07 16:56:45 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:46 | [INFO] Finished.
2025-11-07 16:56:46 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  133831
Number of signif. edges:  21428
Processing tissue  Testis
Num run =  0
Num edges before filtering:  2049277
Num edges after subsetting to highest expressed genes:  112339


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:56:49 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  112339


2025-11-07 16:56:51 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:52 | [INFO] querying 1001-2000 ...
2025-11-07 16:56:53 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:54 | [INFO] querying 2001-2827 ...
2025-11-07 16:56:55 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:56:56 | [INFO] Finished.
2025-11-07 16:56:56 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  112301
Number of signif. edges:  23378
Num run =  1
Num edges before filtering:  2058555
Num edges after subsetting to highest expressed genes:  113842


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:56:59 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  113842


2025-11-07 16:57:01 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:02 | [INFO] querying 1001-2000 ...
2025-11-07 16:57:03 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:04 | [INFO] querying 2001-2827 ...
2025-11-07 16:57:05 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:06 | [INFO] Finished.
2025-11-07 16:57:06 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  113805
Number of signif. edges:  24082
Num run =  2
Num edges before filtering:  2009603
Num edges after subsetting to highest expressed genes:  108295


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:57:09 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  108295


2025-11-07 16:57:11 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:12 | [INFO] querying 1001-2000 ...
2025-11-07 16:57:13 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:14 | [INFO] querying 2001-2827 ...
2025-11-07 16:57:15 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:16 | [INFO] Finished.
2025-11-07 16:57:16 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  108279
Number of signif. edges:  23007
Num run =  3
Num edges before filtering:  1973554
Num edges after subsetting to highest expressed genes:  109269


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:57:19 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  109269


2025-11-07 16:57:21 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:22 | [INFO] querying 1001-2000 ...
2025-11-07 16:57:23 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:24 | [INFO] querying 2001-2827 ...
2025-11-07 16:57:25 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:26 | [INFO] Finished.
2025-11-07 16:57:26 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  109231
Number of signif. edges:  22970
Num run =  4
Num edges before filtering:  2042899
Num edges after subsetting to highest expressed genes:  110653


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:57:29 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  110653


2025-11-07 16:57:31 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:32 | [INFO] querying 1001-2000 ...
2025-11-07 16:57:33 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:34 | [INFO] querying 2001-2826 ...
2025-11-07 16:57:35 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:36 | [INFO] Finished.
2025-11-07 16:57:36 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  110632
Number of signif. edges:  22963
Num run =  5
Num edges before filtering:  2086126
Num edges after subsetting to highest expressed genes:  116373


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:57:39 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  116373


2025-11-07 16:57:41 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:42 | [INFO] querying 1001-2000 ...
2025-11-07 16:57:43 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:44 | [INFO] querying 2001-2827 ...
2025-11-07 16:57:45 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:46 | [INFO] Finished.
2025-11-07 16:57:46 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  116351
Number of signif. edges:  24044
Num run =  6
Num edges before filtering:  1996884
Num edges after subsetting to highest expressed genes:  109732


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:57:50 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  109732


2025-11-07 16:57:52 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:53 | [INFO] querying 1001-2000 ...
2025-11-07 16:57:54 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:55 | [INFO] querying 2001-2827 ...
2025-11-07 16:57:56 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:57:57 | [INFO] Finished.
2025-11-07 16:57:57 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  109702
Number of signif. edges:  22861
Num run =  7
Num edges before filtering:  2183466
Num edges after subsetting to highest expressed genes:  120069


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:58:00 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  120069


2025-11-07 16:58:02 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:03 | [INFO] querying 1001-2000 ...
2025-11-07 16:58:04 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:05 | [INFO] querying 2001-2827 ...
2025-11-07 16:58:06 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:07 | [INFO] Finished.
2025-11-07 16:58:07 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  120047
Number of signif. edges:  25200
Num run =  8
Num edges before filtering:  2048629
Num edges after subsetting to highest expressed genes:  111991


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:58:10 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  111991


2025-11-07 16:58:12 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:13 | [INFO] querying 1001-2000 ...
2025-11-07 16:58:15 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:16 | [INFO] querying 2001-2827 ...
2025-11-07 16:58:17 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:18 | [INFO] Finished.
2025-11-07 16:58:18 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  111952
Number of signif. edges:  23477
Num run =  9
Num edges before filtering:  2077780
Num edges after subsetting to highest expressed genes:  115130


  .apply(lambda g: g.nlargest(max(1, int(len(g) * density)), "importance"))
2025-11-07 16:58:21 | [INFO] querying 1-1000 ...


Num edges after filtering to top 1.0 edges per TF:  115130


2025-11-07 16:58:22 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:23 | [INFO] querying 1001-2000 ...
2025-11-07 16:58:25 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:26 | [INFO] querying 2001-2827 ...
2025-11-07 16:58:27 | [INFO] HTTP Request: POST https://mygene.info/v3/query/ "HTTP/1.1 200 OK"
2025-11-07 16:58:28 | [INFO] Finished.
2025-11-07 16:58:28 | [INFO] Pass "returnall=True" to return complete lists of duplicate or missing query terms.


Number of total edges:  115103
Number of signif. edges:  24277


In [4]:
result_df = pd.DataFrame(res_dict)
result_df.to_csv("precision_recall_results_against_collectri.csv", index=False)