In [1]:
import pybel
import pathway_forte
import heme_knowledge
from bio2bel_kegg.manager import Manager as KeggManager
from bio2bel_reactome.manager import Manager as ReactomeManager
from bio2bel_wikipathways.manager import Manager as WPManager

In [2]:
from pathway_forte.pathway_enrichment.over_representation import perform_hypergeometric_test

In [3]:
heme = heme_knowledge.get_graph()

In [4]:
heme.summarize()

Heme Curation v0.0.1-dev
Number of Nodes: 764
Number of Edges: 2999
Number of Citations: 46
Number of Authors: 330
Network Density: 5.14E-03
Number of Components: 10


In [5]:
names = pybel.struct.summary.node_summary.get_names(heme)

In [6]:
heme_geneset = names["HGNC"]

Getting pathway genesets out of the databases

In [7]:
kegg_manager = KeggManager()
reactome_manager = ReactomeManager()
wp_manager = WPManager()

In [8]:
kegg_gene_sets = kegg_manager.export_gene_sets()
reactome_gene_sets = reactome_manager.export_gene_sets()
wp_gene_sets = wp_manager.export_gene_sets()

In [9]:
def preprocess_genesets(dictionary):
    return {
        (key, "fake_id"): value
        for key, value in dictionary.items()
        if len(value) < 200
    }

In [10]:
kegg_results = perform_hypergeometric_test(
    heme_geneset, preprocess_genesets(kegg_gene_sets), apply_threshold=True
)
reactome_results = perform_hypergeometric_test(
    heme_geneset, preprocess_genesets(reactome_gene_sets), apply_threshold=True
)
wp_results = perform_hypergeometric_test(
    heme_geneset, preprocess_genesets(wp_gene_sets), apply_threshold=True
)

In [11]:
kegg_results.sort_values(by=['qval'], ascending=True).head()

Unnamed: 0,database,pathway_id,pval,qval
264,fake_id,Malaria - Homo sapiens (human),8.531881e-33,2.6278189999999998e-30
165,fake_id,Complement and coagulation cascades - Homo sap...,7.127936e-31,1.0977020000000001e-28
169,fake_id,Toll-like receptor signaling pathway - Homo sa...,1.53356e-30,1.5744550000000001e-28
272,fake_id,Influenza A - Homo sapiens (human),5.425161e-28,4.1773739999999997e-26
259,fake_id,Pertussis - Homo sapiens (human),6.3684010000000006e-27,3.922935e-25


In [12]:
reactome_results.sort_values(by=['qval'], ascending=True).head()

Unnamed: 0,database,pathway_id,pval,qval
1963,fake_id,Toll-like Receptor Cascades,2.608813e-25,5.598512e-22
1385,fake_id,Platelet degranulation,4.9490280000000005e-23,5.3103069999999997e-20
1623,fake_id,Response to elevated platelet cytosolic Ca2+,9.399391e-23,6.723697999999999e-20
738,fake_id,Formation of Fibrin Clot (Clotting Cascade),2.1204229999999998e-20,1.1376070000000001e-17
1957,fake_id,Toll Like Receptor 4 (TLR4) Cascade,1.773519e-19,7.611943000000001e-17


In [13]:
wp_results.sort_values(by=['qval'], ascending=True).head()

Unnamed: 0,database,pathway_id,pval,qval
111,fake_id,Toll-like Receptor Signaling Pathway,1.53356e-30,7.913169e-28
247,fake_id,Regulation of toll-like receptor signaling pat...,1.694942e-29,4.37295e-27
270,fake_id,Human Complement System,6.260545e-29,1.076814e-26
341,fake_id,Complement and Coagulation Cascades,1.133983e-26,1.462838e-24
64,fake_id,Selenium Micronutrient Network,8.509793e-24,8.782107e-22


The results of the enrichment analysis highlight the Toll-like receptor signaling pathway. This pathway is the top enriched pathway in both Reactome and WikiPathways and the third one in KEGG.