In [1]:
#gseapy

In [2]:
import sys
import os
from itertools import chain
from collections import defaultdict
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('precision', 3)
import gseapy as gp
import goatools

obodag = goatools.obo_parser.GODag('go-basic.obo')

load obo file go-basic.obo
go-basic.obo: fmt(1.2) rel(2017-03-16) 48,478 GO Terms


In [3]:
sys.path.insert(0, "/home/gstupp/projects/metaproteomics")
from metaproteomics import utils
#from metaproteomics.analysis import build_loci

BASE = '../out/'
grouped_loci = utils.load(os.path.join(BASE,"grouped_loci_filt_norm.pkl.gz"))

In [4]:
def make_go2Gene_map(grouped_loci, ontology='MF'):    

    ontology_map = {'MF': 'molecular_function', 'BP': 'biological_process', 'CC': 'cellular_component'}
    
    out = defaultdict(set)    
    for l in grouped_loci:
        if 'go' in l.annotations:
            for go in l.annotations['go']:
                if obodag[go].namespace == ontology_map[ontology]:
                    out[go].add(l.cluster_id)
                    for parent in obodag[go].get_all_parents():
                        if obodag[parent].namespace == ontology_map[ontology]:
                            out[parent].add(l.cluster_id)
                
    return dict(out)

def filter_go2gene_map(go_locus):
    
    # Remove "very broad" gene sets. Arbitrary definition: gene sets that emcompass >50% of all IDs
    all_ids = set(chain(*go_locus.values()))
    go_locus = {key: value for (key, value) in go_locus.items() if len(value) / len(all_ids) <= 0.5}

    # Remove terms with less than 5 members: changed from 10 to 5 becasue small #s of proteins compared
    # to what you would find wiht genes
    go_locus = {key: value for (key, value) in go_locus.items() if len(value) >= 5}

    # Remove child terms with identical gene sets as their parents
    to_remove = set()
    for parent in go_locus.keys():
        # If child term has exact same members as parent, remove
        child_ids = [x.id for x in obodag[parent].children if x.id in go_locus.keys()]
        for child in child_ids:
            if go_locus[child] == go_locus[parent]:
                to_remove.add(child)
    go_locus = {key: value for (key, value) in go_locus.items() if key not in to_remove}

    # Remove sibling terms with identical gene sets
    to_remove = set()
    to_keep = set()
    for brother in go_locus.keys():
        to_keep.add(brother) # make sure filtered out siblings don't filter out ones we want to keep
        for parent in obodag[brother].parents:
            siblings = set([y.id for y in parent.children])
            siblings.remove(brother)
            for sibling in siblings:
                if sibling in go_locus.keys() and go_locus[brother] == go_locus[sibling] and not sibling in to_keep:
                    to_remove.add(sibling)
    go_locus = {key: value for (key, value) in go_locus.items() if key not in to_remove}

    return go_locus

def gomap_to_csv(go2gene, out_file = 'test.tsv'):

    out = ""
    for term, loci in go2gene.items():
        out += "{}\t".format(term)
        out += "{}\t".format(obodag[term].name)
        out += '\t'.join(list(map(str,loci)))
        out += '\n'
        
    with open(out_file, 'w') as fout:
        fout.write(out)

In [5]:
def run_go_gsea(rank_df, g2g_map, seed, outdir='tmp'):
    """
    A ranked df and go2gene mapping returns the result dataframe for GSEA against all go-Terms
    
    loci must be grouped such that avg_ratio and p-values are correct for 1 phenotype
    see rt_unenr_grouped_loci above for example
    """
    import gseapy as gp
        
    # save the go 2 gene map, since gseapy doesn't seem to be able to use one already in memory
    gomap_to_csv(g2g_map, 'temp.gmt')
    
    res = gp.prerank(rnk=rank_df, gene_sets='temp.gmt', outdir=outdir, min_size = 5, max_size=500, 
                     permutation_n = 10000, graph_num = len(g2g_map), seed=seed)
      
    def get_go_name(term):
        return obodag[term].name
    
    res['name'] = res.index.map(get_go_name)
    
    return res.sort_values('nes', ascending=False)

def plot_gsea_result(row, rank):
    return gp.plot.gsea_plot(rank, row['name'], row.hit_index, row.nes, row.pval, row.fdr, row.rank_ES, phenoPos='Tcell', phenoNeg='RAG')

In [6]:
mf_map = make_go2Gene_map(grouped_loci)
mf_map_f = filter_go2gene_map(mf_map)

print('Unfiltered: {}\tFiltered: {}'.format(len(mf_map), len(mf_map_f)))

bp_map = make_go2Gene_map(grouped_loci, 'BP')
bp_map_f = filter_go2gene_map(bp_map)

print('Unfiltered: {}\tFiltered: {}'.format(len(bp_map), len(bp_map_f)))

cc_map = make_go2Gene_map(grouped_loci, 'CC')
cc_map_f = filter_go2gene_map(cc_map)

print('Unfiltered: {}\tFiltered: {}'.format(len(cc_map), len(cc_map_f)))

Unfiltered: 596	Filtered: 241
Unfiltered: 751	Filtered: 271
Unfiltered: 103	Filtered: 44


In [8]:
out_dir = "RT_control_gsea"
df = pd.read_csv(os.path.join(BASE,"RT_control_results_named_annot.csv"))
df = df[(df.padj.abs()<=0.2)]
df = df[~df.human_mouse]
df['log2FoldChange'] = -1 * df['log2FoldChange']
rank_df = df[['Unnamed: 0', 'log2FoldChange']].rename(columns={'Unnamed: 0': 'gene_name', 'log2FoldChange': 'rank'})
rank_df = rank_df.sort_values('rank').reset_index(drop=True)

In [9]:
mf_res = run_go_gsea(rank_df, mf_map_f, seed=1111, outdir=out_dir)
bp_res = run_go_gsea(rank_df, bp_map_f, seed=1111, outdir=out_dir)
cc_res = run_go_gsea(rank_df, cc_map_f, seed=1111, outdir=out_dir)
mf_rt = mf_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
mf_rag = mf_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)
bp_rt = bp_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
bp_rag = bp_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)
cc_rt = cc_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
cc_rag = cc_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)

2017-03-16 20:29:46,029 Parsing data files for GSEA.............................
  mask |= (ar1 == a)
2017-03-16 20:29:46,194 0054 gene_sets used for further statistical testing.....
2017-03-16 20:29:46,195 Start to run GSEA...Might take a while..................
2017-03-16 20:30:31,852 Start to generate gseapy reports, and produce figures...
2017-03-16 20:30:49,223 Congratulations...GSEAPY run successfully...............
2017-03-16 20:30:49,241 Parsing data files for GSEA.............................
2017-03-16 20:30:49,419 0063 gene_sets used for further statistical testing.....
2017-03-16 20:30:49,420 Start to run GSEA...Might take a while..................
2017-03-16 20:31:39,639 Start to generate gseapy reports, and produce figures...
2017-03-16 20:31:57,516 Congratulations...GSEAPY run successfully...............
2017-03-16 20:31:57,522 Parsing data files for GSEA.............................
2017-03-16 20:31:57,559 0024 gene_sets used for further statistical testing.....
2017-03

In [10]:
mf_rt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0003723,0.497,2.713,0.0,0.0,367,51,"[7655641, 64669875, 20323736, 33709212, 101851...",RNA binding
GO:0004618,0.644,2.616,0.0,5.094e-05,78,19,"[30875488, 17585926, 8107297, 14006785, 454308...",phosphoglycerate kinase activity
GO:0016774,0.582,2.472,0.0,0.0001019,111,22,"[30875488, 17585926, 8107297, 14006785, 454308...","phosphotransferase activity, carboxyl group as..."
GO:0001882,0.465,2.212,0.0002914,0.002623,398,31,"[5888442, 80382425, 10185112, 21138590, 692059...",nucleoside binding
GO:0016301,0.443,2.174,0.0002895,0.003199,480,34,"[30875488, 17585926, 8107297, 14006785, 454308...",kinase activity
GO:0019001,0.449,2.064,0.001937,0.00781,248,28,"[5888442, 80382425, 10185112, 21138590, 692059...",guanyl nucleotide binding
GO:0001883,0.449,2.062,0.001185,0.006782,248,28,"[5888442, 80382425, 10185112, 21138590, 692059...",purine nucleoside binding
GO:0003735,0.334,1.963,0.001743,0.01406,360,65,"[7655641, 64669875, 20323736, 33709212, 191078...",structural constituent of ribosome
GO:0022892,0.504,1.875,0.006516,0.02511,89,15,"[3885199, 1994037, 29978920, 33721585, 3821095...",substrate-specific transporter activity
GO:0000287,0.437,1.717,0.02134,0.06799,113,17,"[29629717, 9654148, 23089117, 25933508, 175893...",magnesium ion binding


In [11]:
mf_rag

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0004553,-0.48,-1.767,0.018,0.345,75,11,"[11310495, 11310502, 167639021, 61777171, 6337...","hydrolase activity, hydrolyzing O-glycosyl com..."
GO:0016798,-0.48,-1.765,0.019,0.175,76,11,"[11310495, 11310502, 167639021, 61777171, 6337...","hydrolase activity, acting on glycosyl bonds"
GO:0050662,-0.328,-1.734,0.017,0.138,235,27,"[29001612, 27290903, 26904631, 29983272, 20853...",coenzyme binding
GO:0009055,-0.541,-1.733,0.027,0.104,38,8,"[63305114, 63379850, 62942791, 49911241, 63626...",electron carrier activity
GO:0016903,-0.366,-1.589,0.048,0.173,282,16,"[36107728, 6997781, 43414939, 3740901, 1586034...","oxidoreductase activity, acting on the aldehyd..."


In [12]:
bp_rt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0009132,0.408,2.117,0.0008412,0.033,304,41,"[30875488, 17585926, 8107297, 14006785, 454308...",nucleoside diphosphate metabolic process
GO:0046939,0.408,2.113,0.0008461,0.017,304,41,"[30875488, 17585926, 8107297, 14006785, 454308...",nucleotide phosphorylation
GO:0009135,0.408,2.111,0.0009901,0.012,302,41,"[30875488, 17585926, 8107297, 14006785, 454308...",purine nucleoside diphosphate metabolic process
GO:0072524,0.401,2.091,0.0007072,0.01,305,42,"[30875488, 17585926, 8107297, 14006785, 454308...",pyridine-containing compound metabolic process
GO:0006733,0.401,2.089,0.000705,0.008,305,42,"[30875488, 17585926, 8107297, 14006785, 454308...",oxidoreduction coenzyme metabolic process
GO:0044267,0.352,2.088,0.0,0.007,413,69,"[7655641, 64669875, 20323736, 33709212, 272121...",cellular protein metabolic process
GO:0009141,0.392,2.087,0.0005639,0.006,381,45,"[30875488, 17585926, 8107297, 14006785, 454308...",nucleoside triphosphate metabolic process
GO:0046034,0.392,2.086,0.001533,0.005,379,45,"[30875488, 17585926, 8107297, 14006785, 454308...",ATP metabolic process
GO:0009126,0.385,2.057,0.00152,0.007,409,46,"[30875488, 17585926, 8107297, 14006785, 454308...",purine nucleoside monophosphate metabolic process
GO:0009123,0.385,2.045,0.00126,0.007,412,46,"[30875488, 17585926, 8107297, 14006785, 454308...",nucleoside monophosphate metabolic process


In [13]:
bp_rag

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


In [14]:
cc_rt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0015934,0.66,2.114,0.0006653,0.012,63,10,"[7655641, 64669875, 20323736, 33709212, 828372...",large ribosomal subunit
GO:0044444,0.335,2.014,0.00171,0.015,422,70,"[7655641, 64669875, 20323736, 33709212, 296297...",cytoplasmic part
GO:0044446,0.491,1.988,0.002516,0.013,169,19,"[7655641, 64669875, 20323736, 33709212, 828372...",intracellular organelle part
GO:0044391,0.491,1.987,0.001997,0.01,159,19,"[7655641, 64669875, 20323736, 33709212, 828372...",ribosomal subunit
GO:1990904,0.334,1.952,0.002281,0.01,360,65,"[7655641, 64669875, 20323736, 33709212, 191078...",ribonucleoprotein complex
GO:0005622,0.377,1.932,0.003536,0.01,306,41,"[7655641, 64669875, 20323736, 33709212, 191078...",intracellular
GO:0043232,0.312,1.764,0.01199,0.031,320,57,"[7655641, 64669875, 20323736, 33709212, 191078...",intracellular non-membrane-bounded organelle
GO:0005840,0.312,1.762,0.01024,0.028,319,57,"[7655641, 64669875, 20323736, 33709212, 191078...",ribosome
GO:0043229,0.312,1.758,0.009684,0.025,327,57,"[7655641, 64669875, 20323736, 33709212, 191078...",intracellular organelle
GO:0044422,0.374,1.65,0.02639,0.045,402,25,"[7655641, 64669875, 20323736, 33709212, 828372...",organelle part


In [15]:
cc_rag

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0016020,-0.466,-1.946,0.007,0.043,75,15,"[21357752, 21687303, 167156307, 61699598, 1666...",membrane
GO:0043190,-0.527,-1.786,0.021,0.061,115,9,"[63132923, 47513944, 11992086, 56076521, 17618...",ATP-binding cassette (ABC) transporter complex
GO:0098797,-0.527,-1.77,0.018,0.045,117,9,"[63132923, 47513944, 11992086, 56076521, 17618...",plasma membrane protein complex
GO:0044459,-0.527,-1.77,0.024,0.034,119,9,"[63132923, 47513944, 11992086, 56076521, 17618...",plasma membrane part
GO:1904949,-0.527,-1.767,0.019,0.027,117,9,"[63132923, 47513944, 11992086, 56076521, 17618...",ATPase complex
GO:1990351,-0.527,-1.766,0.021,0.023,117,9,"[63132923, 47513944, 11992086, 56076521, 17618...",transporter complex


In [16]:
out_dir = "Rag_WT_gsea"
df = pd.read_csv(os.path.join(BASE,"Rag_WT_results_named_annot.csv"))
df = df[(df.padj.abs()<=0.2)]
df = df[~df.human_mouse]
df['log2FoldChange'] = -1 * df['log2FoldChange']
rank_df = df[['Unnamed: 0', 'log2FoldChange']].rename(columns={'Unnamed: 0': 'gene_name', 'log2FoldChange': 'rank'})
rank_df = rank_df.sort_values('rank').reset_index(drop=True)

In [17]:
mf_res = run_go_gsea(rank_df, mf_map_f, seed=1111, outdir=out_dir)
bp_res = run_go_gsea(rank_df, bp_map_f, seed=1111, outdir=out_dir)
cc_res = run_go_gsea(rank_df, cc_map_f, seed=1111, outdir=out_dir)
mf_rag = mf_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
mf_wt = mf_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)
bp_rag = bp_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
bp_wt = bp_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)
cc_rag = cc_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
cc_wt = cc_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)

2017-03-16 20:32:23,399 Parsing data files for GSEA.............................
  mask |= (ar1 == a)
2017-03-16 20:32:23,795 0074 gene_sets used for further statistical testing.....
2017-03-16 20:32:23,796 Start to run GSEA...Might take a while..................
2017-03-16 20:35:17,273 Start to generate gseapy reports, and produce figures...
2017-03-16 20:35:40,732 Congratulations...GSEAPY run successfully...............
2017-03-16 20:35:40,756 Parsing data files for GSEA.............................
2017-03-16 20:35:41,175 0103 gene_sets used for further statistical testing.....
2017-03-16 20:35:41,176 Start to run GSEA...Might take a while..................
2017-03-16 20:39:36,383 Start to generate gseapy reports, and produce figures...
2017-03-16 20:40:06,703 Congratulations...GSEAPY run successfully...............
2017-03-16 20:40:06,709 Parsing data files for GSEA.............................
2017-03-16 20:40:06,786 0026 gene_sets used for further statistical testing.....
2017-03

In [25]:
mf_rag

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0016903,0.427,2.458,0.0,0.0001541,282,75,"[57377692, 57602627, 20987406, 10318713, 61599...","oxidoreductase activity, acting on the aldehyd..."
GO:0009055,0.73,2.457,0.0,7.706e-05,38,12,"[63098496, 64646230, 21834593, 6263025, 630796...",electron carrier activity
GO:0051540,0.479,2.329,0.0,0.000411,149,38,"[62751142, 63901915, 167744672, 62114668, 2842...",metal cluster binding
GO:0016820,0.46,2.183,0.0001898,0.002138,190,35,"[62344870, 168161151, 28240379, 40674602, 6254...","hydrolase activity, acting on acid anhydrides,..."
GO:0016868,0.613,2.061,0.001738,0.005379,79,12,"[167401290, 39279903, 62071220, 168126288, 415...","intramolecular transferase activity, phosphotr..."
GO:0016866,0.613,2.041,0.002309,0.005381,112,12,"[167401290, 39279903, 62071220, 168126288, 415...",intramolecular transferase activity
GO:0022804,0.426,2.033,0.001715,0.005075,205,36,"[62344870, 168161151, 28240379, 40674602, 6254...",active transmembrane transporter activity
GO:0016620,0.433,1.951,0.001539,0.009536,101,30,"[10318713, 6997781, 166781730, 44171740, 61717...","oxidoreductase activity, acting on the aldehyd..."
GO:0022857,0.384,1.849,0.004042,0.01968,212,37,"[62344870, 168161151, 28240379, 40674602, 6254...",transmembrane transporter activity
GO:0050660,0.543,1.817,0.01345,0.02266,43,12,"[7011122, 166616289, 62304018, 63105175, 11316...",flavin adenine dinucleotide binding


In [26]:
mf_wt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0097747,-0.653,-4.453,0.0,0.0,345,143,"[61571198, 61950469, 47461425, 15059448, 82917...",RNA polymerase activity
GO:0016779,-0.614,-4.359,0.0,0.0,463,171,"[18523127, 28436439, 40551040, 59156012, 63039...",nucleotidyltransferase activity
GO:0003677,-0.622,-4.303,0.0,0.0,369,155,"[66926567, 68934118, 51907206, 56510025, 68645...",DNA binding
GO:0004634,-0.87,-3.487,0.0,0.0,43,19,"[62743325, 62658191, 22213675, 18120033, 66797...",phosphopyruvate hydratase activity
GO:0000287,-0.669,-3.414,0.0,0.0,113,42,"[39279903, 62071220, 168126288, 41534199, 8102...",magnesium ion binding
GO:0016835,-0.77,-3.278,0.0,0.0,85,23,"[62217311, 57701171, 62743325, 62658191, 20611...",carbon-oxygen lyase activity
GO:0016781,-0.431,-3.183,0.0,0.0,302,218,"[68757582, 48551975, 59573933, 36437635, 64455...","phosphotransferase activity, paired acceptors"
GO:0016301,-0.385,-2.821,0.0,0.0,480,214,"[21799656, 22349603, 62247377, 64925467, 20722...",kinase activity
GO:0016829,-0.486,-2.564,0.0,1.394e-05,279,46,"[15754209, 18137146, 168075980, 62217311, 3701...",lyase activity
GO:0004654,-0.702,-2.563,0.0,1.255e-05,45,15,"[61793630, 44355835, 49566268, 43390620, 48648...",polyribonucleotide nucleotidyltransferase acti...


In [27]:
bp_rag

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0040011,0.543,3.224,0.0,0.0,274,85,"[63837786, 165990924, 167953213, 168171488, 16...",locomotion
GO:0006928,0.534,3.148,0.0,0.0,270,81,"[167953213, 168171488, 167966912, 13586931, 62...",movement of cell or subcellular component
GO:0048870,0.534,3.142,0.0,0.0,269,81,"[167953213, 168171488, 167966912, 13586931, 62...",cell motility
GO:0042558,0.648,2.104,0.000587,0.007,40,11,"[31379006, 61964112, 62018486, 65135677, 63995...",pteridine-containing compound metabolic process
GO:0042559,0.64,2.002,0.002145,0.014,29,10,"[61964112, 62018486, 65135677, 63995561, 17571...",pteridine-containing compound biosynthetic pro...
GO:0006575,0.64,1.988,0.002322,0.013,30,10,"[61964112, 62018486, 65135677, 63995561, 17571...",cellular modified amino acid metabolic process
GO:1901605,0.438,1.807,0.00834,0.049,110,22,"[63889213, 165956827, 63092817, 166239197, 276...",alpha-amino acid metabolic process
GO:0051234,0.328,1.796,0.00329,0.047,331,61,"[167285315, 13587543, 167348034, 165992613, 16...",establishment of localization
GO:0051179,0.328,1.795,0.00355,0.042,332,61,"[167285315, 13587543, 167348034, 165992613, 16...",localization
GO:0022900,0.518,1.784,0.01424,0.041,72,13,"[62114668, 61599909, 61599853, 61601993, 61603...",electron transport chain


In [28]:
bp_wt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0032774,-0.653,-4.449,0.0,0.0,345,143,"[61571198, 61950469, 47461425, 15059448, 82917...",RNA biosynthetic process
GO:0072350,-0.796,-3.228,0.0,0.0,29,20,"[38181119, 62737033, 40713606, 38860663, 61303...",tricarboxylic acid metabolic process
GO:0009259,-0.498,-2.877,0.0,0.0,414,66,"[167029093, 167526299, 165866251, 62344870, 64...",ribonucleotide metabolic process
GO:0009123,-0.498,-2.875,0.0,0.0,412,66,"[167029093, 167526299, 165866251, 62344870, 64...",nucleoside monophosphate metabolic process
GO:0019693,-0.486,-2.818,0.0,0.0,416,67,"[167029093, 167526299, 63863560, 165866251, 62...",ribose phosphate metabolic process
GO:0009132,-0.498,-2.788,0.0,0.0,304,58,"[167029093, 167526299, 165866251, 64711323, 63...",nucleoside diphosphate metabolic process
GO:0046939,-0.498,-2.78,0.0,0.0,304,58,"[167029093, 167526299, 165866251, 64711323, 63...",nucleotide phosphorylation
GO:0009135,-0.498,-2.769,0.0,0.0,302,58,"[167029093, 167526299, 165866251, 64711323, 63...",purine nucleoside diphosphate metabolic process
GO:0046034,-0.478,-2.75,0.0,0.0,379,64,"[167029093, 167526299, 165866251, 62344870, 64...",ATP metabolic process
GO:0009141,-0.478,-2.745,0.0,0.0,381,64,"[167029093, 167526299, 165866251, 62344870, 64...",nucleoside triphosphate metabolic process


In [29]:
cc_rag

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0042995,0.534,3.138,0.0,0.0,269,81,"[167953213, 168171488, 167966912, 13586931, 62...",cell projection
GO:0044463,0.529,2.985,0.0,0.0,229,66,"[167953213, 168171488, 167966912, 13586931, 62...",cell projection part
GO:0044422,0.492,2.964,0.0,0.0,402,90,"[167953213, 168171488, 167966912, 13586931, 62...",organelle part
GO:0044459,0.531,2.39,0.0,2.728e-05,119,29,"[168161151, 28240379, 40674602, 62545344, 2125...",plasma membrane part
GO:1990351,0.531,2.387,0.0,2.182e-05,117,29,"[168161151, 28240379, 40674602, 62545344, 2125...",transporter complex
GO:1904949,0.531,2.386,0.0,1.819e-05,117,29,"[168161151, 28240379, 40674602, 62545344, 2125...",ATPase complex
GO:0098797,0.531,2.386,0.0,1.559e-05,117,29,"[168161151, 28240379, 40674602, 62545344, 2125...",plasma membrane protein complex
GO:0043190,0.531,2.381,0.0,1.364e-05,115,29,"[168161151, 28240379, 40674602, 62545344, 2125...",ATP-binding cassette (ABC) transporter complex
GO:0098796,0.46,2.183,0.0,0.0003395,171,35,"[62344870, 168161151, 28240379, 40674602, 6254...",membrane protein complex
GO:0043234,0.46,2.171,0.0001883,0.0003383,180,35,"[62344870, 168161151, 28240379, 40674602, 6254...",protein complex


In [30]:
cc_wt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0000015,-0.87,-3.496,0.0,0.0,43,19,"[62743325, 62658191, 22213675, 18120033, 66797...",phosphopyruvate hydratase complex
GO:0044445,-0.87,-3.453,0.0,0.0,47,19,"[62743325, 62658191, 22213675, 18120033, 66797...",cytosolic part
GO:1902494,-0.49,-2.699,0.0,0.0,202,56,"[7522635, 22519241, 63652778, 168124324, 16816...",catalytic complex
GO:0044444,-0.43,-2.607,0.0,0.0,422,80,"[165850968, 11332141, 62054342, 67774188, 4009...",cytoplasmic part
GO:0005737,-0.471,-2.194,0.0004234,0.0004848,264,31,"[63092817, 166239197, 61703706, 62359585, 6270...",cytoplasm
GO:0043229,-0.34,-1.842,0.006066,0.009561,327,51,"[165850968, 11332141, 62054342, 67774188, 4009...",intracellular organelle
GO:0043232,-0.34,-1.838,0.003035,0.008311,320,51,"[165850968, 11332141, 62054342, 67774188, 4009...",intracellular non-membrane-bounded organelle
GO:0005840,-0.328,-1.759,0.007912,0.01312,319,50,"[165850968, 11332141, 62054342, 67774188, 4009...",ribosome
GO:1990904,-0.291,-1.596,0.01878,0.03439,360,55,"[165850968, 11332141, 62054342, 67774188, 4009...",ribonucleoprotein complex
