# ORA
the general and specfic for KEGG, REACTOME and GO

In [2]:
import pandas as pd
from pypathway import Reactome, GO, KEGG, ORA
from pypathway import ColorectalCancer, IdMapping, GMTUtils
from pypathway import EnrichmentExport
import os

In [3]:
# load a gmt file.
gmt = GMTUtils.parse_gmt_file("../../pypathway/tests/gmt_file/h.all.v6.0.entrez.gmt")

In [3]:
# load the example
c = ColorectalCancer()

In [4]:
# infomation of datasets
len(c.deg_list), len(c.background)

(5320, 17216)

In [5]:
res_h = ORA.run(c.deg_list, c.background, gmt)

In [6]:
res_h.table.head()

Unnamed: 0,name,mapped,number in study,p-value,fdr
0,HALLMARK_IL2_STAT5_SIGNALING,188,84,4.488864e-05,0.000107
1,HALLMARK_APOPTOSIS,146,74,4.48256e-07,1e-06
2,HALLMARK_NOTCH_SIGNALING,30,12,0.1874339,0.246624
3,HALLMARK_KRAS_SIGNALING_DN,184,43,0.9907251,0.999262
4,HALLMARK_INTERFERON_ALPHA_RESPONSE,87,31,0.1989797,0.255102


In [7]:
# inline bar plot.
res_h.plot()

## KEGG

In [4]:
# kegg enrichment for certain organism
r_kg = KEGG.run(c.deg_list, c.background, 'hsa')

In [9]:
r_kg.table.head()

Unnamed: 0,ID,Name,mapped,deg,p-value,fdr
0,hsa04215,Apoptosis - multiple species - Homo sapiens (h...,27,14,0.01854966,0.06119475
1,hsa05320,Autoimmune thyroid disease - Homo sapiens (human),41,10,0.8592329,0.9576304
2,hsa04932,Non-alcoholic fatty liver disease (NAFLD) - Ho...,132,73,4.781813e-09,3.06036e-07
3,hsa04071,Sphingolipid signaling pathway - Homo sapiens ...,104,41,0.03957681,0.1047602
4,hsa05167,Kaposi's sarcoma-associated herpesvirus infect...,168,67,0.008118105,0.03608047


In [10]:
r_kg.plot()

## Reactome

In [11]:
# the Example of using the warpper of Reactome gene set enrichment analysis

In [12]:
sybs = [x[1][0] for x in IdMapping.convert(input_id=c.deg_list, organism='hsa', source='ENTREZID', target='SYMBOL') if x[1]]

In [13]:
# the input is a list of symbol
r = Reactome.run(sybs[:10], organism='Homo sapiens')

4


In [14]:
# the result
r.table.head()

Unnamed: 0,name,dbId,found,p-value,fdr,species
0,Acetylation,156582,2,0.000722,0.031066,Homo sapiens
1,Defective SLC6A2 causes orthostatic intoleranc...,5619109,1,0.007301,0.153318,Homo sapiens
2,Amino acid transport across the plasma membrane,352230,2,0.012057,0.168804,Homo sapiens
3,Astrocytic Glutamate-Glutamine Uptake And Meta...,210455,1,0.024136,0.18406,Homo sapiens
4,Neurotransmitter uptake and metabolism In glia...,112313,1,0.024136,0.18406,Homo sapiens


## Gene ontology

In [4]:
# make the association file using the id_mapping function
# detail using will be shown in the utils section
r = IdMapping.convert_to_dict(input_id=c.background, source='ENTREZID', target="GO", organism='hsa')

In [5]:
# run go enrichment analysis via goatools 
# the inputs of study, pop, and assoc is list, list,  dict.
# the path is the folder of go obo file
# the path should be a valid filesystem path
path = os.getcwd() + "/go.obo"
rg = GO.run([str(x) for x in c.deg_list], [str(x) for x in c.background], r, obo=path)

load obo file /Users/yangxu/PyPathway/examples/analysis/go.obo
/Users/yangxu/PyPathway/examples/analysis/go.obo: fmt(1.2) rel(2017-04-28) 48,589 GO Terms
fisher module not installed.  Falling back on scipy.stats.fisher_exact


Propagating term counts to parents ..


15,253 out of 17,216 population items found in association
Calculating uncorrected p-values using fisher_scipy_stats
 5,104 out of  5,320 study items found in association
Running multitest correction: local bonferroni
Running multitest correction: local sidak
Running multitest correction: local holm
  15,739 GO terms are associated with 5,104 of 5,320 study items
  20,709 GO terms are associated with 15,253 of 17,216 population items


In [8]:
# the file input of study, pop and assoc
# this function is the warpper of the Goatools
# Github: https://github.com/tanghaibao/goatools
# cite: Haibao Tang et al. (2015). GOATOOLS: Tools for Gene Ontology. Zenodo. 10.5281/zenodo.31628.

path = "/Users/yangxu/goatools/"
rg = GO.run(path + 'data/study', path + 'data/population', path + 'data/association',
        obo=path + 'go-basic.obo')

load obo file /Users/yangxu/goatools/go-basic.obo
/Users/yangxu/goatools/go-basic.obo: fmt(1.2) rel(2017-04-28) 48,589 GO Terms
fisher module not installed.  Falling back on scipy.stats.fisher_exact


Propagating term counts to parents ..
goids not found: {'GO:0022625', 'GO:0022627'}


31,855 out of 33,239 population items found in association
Calculating uncorrected p-values using fisher_scipy_stats
   269 out of    276 study items found in association
Running multitest correction: local bonferroni
Running multitest correction: local sidak
Running multitest correction: local holm
  797 GO terms are associated with 269 of 276 study items
  6,122 GO terms are associated with 31,855 of 33,239 population items


In [8]:
rg.table.head()

Unnamed: 0,ID,NS,enrichment,name,ratio_in_study,ratio_in_pop,p_uncorrected,depth,study_count,p_bonferroni,p_sidak,p_holm,hit
0,GO:0006464,BP,e,cellular protein modification process,33/276,1727/33239,8e-06,n.a.,33,0.0505,0.0492,0.0505,"AT1G13580, AT1G66610, AT1G66860, AT1G66980, AT..."
1,GO:0036211,BP,e,protein modification process,33/276,1727/33239,8e-06,n.a.,33,0.0505,0.0492,0.0505,"AT1G13580, AT1G66610, AT1G66860, AT1G66980, AT..."
2,GO:0006468,BP,e,protein phosphorylation,22/276,922/33239,1.1e-05,n.a.,22,0.066,0.0643,0.066,"AT1G66980, AT2G29220, AT2G41140, AT2G41970, AT..."
3,GO:0016310,BP,e,phosphorylation,22/276,996/33239,3.5e-05,n.a.,22,0.213,0.207,0.212,"AT1G66980, AT2G29220, AT2G41140, AT2G41970, AT..."
4,GO:0043412,BP,e,macromolecule modification,33/276,1877/33239,5.7e-05,n.a.,33,0.351,0.342,0.351,"AT1G13580, AT1G66610, AT1G66860, AT1G66980, AT..."


In [9]:
# the interactive graph display the significance of the result by color
rg.graph()

In [7]:
# test export
c = EnrichmentExport.export([rg, rg])