# UNUSED Enrichment for milestone genes

In [2]:
import os
import sys
from importlib import reload

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.insert(0,module_path)
    
import warnings

import pandas as pd
from IPython.display import display
pd.set_option('max_colwidth', 70)
pd.set_option('display.max_rows', 500)
import string
import numpy as np
import matplotlib.pyplot as plt

from orangecontrib.bioinformatics.geneset.__init__ import list_all

import jupyter_functions as jf
reload(jf)
import jupyter_functions as jf
from networks.functionsDENet import loadPickle, savePickle
import enrichment_library as enr
reload(enr)
import enrichment_library as enr
from stages_DE.stages_library import PHENOTYPES

  import pandas.util.testing as tm


In [1]:
#Path for expression data (mergedGenes_RPKUM.tsv)
dataPath='/home/karin/Documents/timeTrajectories/data/RPKUM/combined/'
path_de='/home/karin/Documents/timeTrajectories/data/deTime/stage_vs_other/'
path_impulse='/home/karin/Documents/timeTrajectories/data/stages/DE_across_stages/'

## Gene sets

Uses gene sets that do not have less than 5 or more than 500 genes. Use all ontologies.

In [6]:
#Reference
genes = pd.read_csv(dataPath + 'mergedGenes_RPKUM.tsv', sep='\t', index_col=0)
ALL_GENE_NAMES= set(genes[(genes != 0).any(axis=1)].index)
NAME_EID=enr.name_genes_entrez(gene_names=ALL_GENE_NAMES, key_entrez=False)
ALL_GENE_NAMES_EID=enr.convert_EID(genes=ALL_GENE_NAMES, name_EID=NAME_EID)

# Get all gene sets
gene_sets=list(list_all(organism='44689'))
GENE_SETS_ONTOLOGY=enr.get_gene_sets(gene_set_names=gene_sets, go_slims=True,set_sizes=(5,500),
                                     reference=ALL_GENE_NAMES_EID)
# Find out max/min gene set sizes and N sets
max_n=0
min_n=np.inf
for k,v in GENE_SETS_ONTOLOGY.items():
    for s in v:
        n=len(s.genes & ALL_GENE_NAMES_EID)
        if n<min_n:
            min_n=n
        if n>max_n:
            max_n=n
print('Min size in ref',min_n,'Max size in ref',max_n)  
print('N sets',{k:len(v) for k,v in GENE_SETS_ONTOLOGY.items()})

Min size in ref 5 Max size in ref 495
N sets {('GO', 'cellular_component'): 24, ('KEGG', 'Pathways'): 95, ('GO', 'molecular_function'): 35, ('Custom', 'Baylor'): 15, ('GO', 'biological_process'): 52, ('Dictybase', 'Phenotypes'): 207}


In [6]:
def group_diff_enrichment(query_names,group:str,padj:float=0.25,min_overlap:int=None,
                          use_annotated_genes:bool=False,enrichment_map=False,map_edge_filter=0.1):
    #Displays only gene sets that have overlap with query greater or equal to min_overlap 
    #For p value and padj calculation uses alll that have overlap >=1 } from gene_set_enrichment
    """
    :param use_annotated_genes: if True use for reference and query  only genes that have at 
    least one gene set annotation
    """
    query_EID=enr.convert_EID(genes=query_names, name_EID=NAME_EID)
    print('***  '+group+' selected:',len(query_names),'with EID:',len(query_EID))
    
    reference_gene_eids=ALL_GENE_NAMES_EID.copy()
    query_eids=query_EID.copy()
    
    if use_annotated_genes:
        gene_sets_genes=set()
        for gene_set_name, gene_sets in GENE_SETS_ONTOLOGY.items():
            for gene_set in gene_sets:
                gene_sets_genes.update(gene_set.genes)
        reference_gene_eids=set(reference_gene_eids) & gene_sets_genes
        query_eids=set(query_eids) & gene_sets_genes
        
        query_annotated_reatio='NA'
        if len(query_EID)>0:
            query_annotated_reatio=round(len(query_eids)/len(query_EID),2)
        print('Ratio of genes annotated with a gene set in reference',
              round(len(reference_gene_eids)/len(ALL_GENE_NAMES_EID),2),
             'and query',query_annotated_reatio)
    
    query_in_enriched=set()
    result=None
    fig,ax=None,None
    if len(query_eids) > 0:
        enrichment=enr.gene_set_enrichment(query_eids, reference_EID=reference_gene_eids, 
                                                padj_threshold=padj,min_overlap=min_overlap,
                                                gene_sets_ontology=GENE_SETS_ONTOLOGY)
        if len(enrichment)>0:
            enrichment_display=list()
            enrichment= sorted(enrichment, key=lambda data: data.padj)
            for enriched in enrichment:
                query_in_enriched.update(enriched.gene_set.genes & query_eids)
                enrichment_display.append({'Gene set':enriched.gene_set.name,
                                           'Ontology':enriched.ontology[0]+': '+enriched.ontology[1],
                                           'FDR':"{:.2e}".format(enriched.padj),'N in query':enriched.in_query,
                                           'Set size':len(enriched.gene_set.genes),
                                                          'N in ref.':enriched.in_reference})
            result=pd.DataFrame(enrichment_display)
            if enrichment_map:
                fig,ax=plt.subplots(figsize=(15,15))
                with warnings.catch_warnings(record=True):
                    enr.enrichment_map(enriched=enrichment, ax=ax,query_size=len(query_eids),
                                   fontsize=8,min_overlap=map_edge_filter)
                fig.suptitle('Group '+group+' using '+str(len(query_eids))+' out of '+str(len(query_names))+
                             ' genes for enrichment calculation.')
    print('Enrichment at FDR: '+str(padj)+' and min query - gene set overlap',str(min_overlap))
    print('N query genes in displayed gene sets:',len(query_in_enriched),'out of', len(query_eids),
          'query genes used for enrichment calculation.')
    display(result)
    print('\n')
    if enrichment_map:
        return (result,(fig,ax))
    else:
        return result

## Enrichment report
Enrichment map: Circles show gene sets (as displayed in the report). Size shows the overlap between gene set and query (genes of interest) as ratio of query genes used in enrichemnt calculation (larger is more). Colour denotes padj value (smaller is more yellow), with all values below 10^-10 being assigned the same shade of yellow. Lines show overlap between gene sets as ration of genes from smaller gene set present in the larger one, encoded as line thickness. Overlaps below 0.2 were excludded from plotting. Arrows show that all genes of a samller gene set (arrow start) are contained in the larger gene set.

### DESeq2
Only positive log2 fold changes (lFC) are used to select only overexpressed genes.

#### DESeq2 all strains

##### DESeq2 all strains lFC >= 2 and padj <= 0.01

In [None]:
milestones=pd.read_table(path_de+'nobatchrep_combined.tsv',index_col=0,sep='\t')
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('log2FoldChange>=2 & padj <= 0.01 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)
  

##### DESeq2 all strains lFC >= 1 and padj <= 0.05

In [None]:
milestones=pd.read_table(path_de+'nobatchrep_combined.tsv',index_col=0,sep='\t')
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('log2FoldChange>=1 & padj <= 0.05 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

#### DESeq2 WT

##### DESeq2 WT lFC >= 2 and padj <= 0.01

In [None]:
milestones=pd.read_table(path_de+'WT_batchrep_combined.tsv',index_col=0,sep='\t')
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('log2FoldChange>=2 & padj <= 0.01 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

##### DESeq2 WT lFC >= 1 and padj <= 0.05

In [None]:
%matplotlib agg
milestones=pd.read_table(path_de+'WT_batchrep_combined.tsv',index_col=0,sep='\t')
figs=[]
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('log2FoldChange>=1 & padj <= 0.05 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,
                                   use_annotated_genes=True,enrichment_map=True,map_edge_filter=0.2)
    figs.append(enriched[1])

###### Enrichment map for DESeq2 WT lFC >= 1 and padj <= 0.05

In [None]:
for fig,ax in figs:
    display(fig)

#### DESeq2 combined strain results (padj <= 0.05)

In [None]:
milestones=pd.read_table(path_de+'strain_batchrep_lFC05_upper_combined.tsv',index_col=0,sep='\t')
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('combined_padj <= 0.05 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

### ImpulseDE2
Milestone genes are selected as those that are at up-level in a stage and at down level at non neighbouring stages (to account for imperfect annotation of up/down). no_agg is not considered a neighbour of stream and vice versa.

#### ImpulseDE2 all strains

In [18]:
milestones=pd.read_table(path_impulse+'DEacrossStages_summary_noyem.tsv',index_col=0,sep='\t')
stages=np.array([phenotype for phenotype in PHENOTYPES if phenotype in list(milestones.columns)])
for stage_idx,stage in enumerate(stages):
    
    #Nonneighbouring stages
    neigh_previous=1
    neigh_next=1
    #Assumes phenotypes order
    if stage_idx==1:
        neigh_previous=0
    if stage_idx==0:
        neigh_next=0
    non_neighbours = stages[[(idx <stage_idx-neigh_previous or idx>stage_idx+neigh_next) 
                 for idx in range(len(stages)) ]]
    #print(stage,':',non_neighbours)
    
    #Find genes up at stage and down at nonenighbouring
    query_condition=stage+'=="up"'
    for non_neigh in non_neighbours:
        query_condition=query_condition+' & '+non_neigh+' =="down"'
    #print(query_condition)
    
    query_names=list(milestones.query(query_condition).index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

***  no_agg selected: 1199 with EID: 1193
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 328 out of 479 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,Ribosome,KEGG: Pathways,7e-38,67,105,97
1,ribosome,GO: cellular_component,3.31e-30,59,93,93
2,structural constituent of ribosome,GO: molecular_function,4.68e-30,56,85,85
3,structural molecule activity,GO: molecular_function,4.6199999999999997e-23,59,117,117
4,translation,GO: biological_process,1.67e-20,69,169,169
5,mitochondrion,GO: cellular_component,7.13e-08,60,229,229
6,oxidoreductase activity,GO: molecular_function,6.22e-05,49,210,207
7,Oxidative phosphorylation,KEGG: Pathways,6.62e-05,20,71,54
8,Biosynthesis of amino acids,KEGG: Pathways,0.00232,19,63,62
9,Biosynthesis of secondary metabolites,KEGG: Pathways,0.00375,55,289,281




***  stream selected: 956 with EID: 941
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 276 out of 377 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,aberrant streaming,Dictybase: Phenotypes,2.13e-08,27,72,72
1,aberrant cytokinesis,Dictybase: Phenotypes,2.13e-08,32,98,98
2,locomotion,GO: biological_process,3.06e-06,43,191,189
3,cell division,GO: biological_process,3.79e-06,31,115,115
4,signal transduction,GO: biological_process,4.33e-06,64,352,350
5,mitotic cell cycle,GO: biological_process,4.33e-06,28,100,100
6,Proteasome,KEGG: Pathways,4.33e-06,16,39,37
7,cytoskeleton organization,GO: biological_process,1.03e-05,42,197,196
8,plasma membrane,GO: cellular_component,5.68e-05,50,268,268
9,aberrant F-actin organization,Dictybase: Phenotypes,0.00011,14,37,37




***  lag selected: 1190 with EID: 1175
Ratio of genes annotated with a gene set in reference 0.32 and query 0.39
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 335 out of 461 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,signal transduction,GO: biological_process,1.91e-14,95,352,350
1,locomotion,GO: biological_process,9.65e-11,58,191,189
2,aberrant cytokinesis,Dictybase: Phenotypes,7.58e-09,36,98,98
3,aberrant streaming,Dictybase: Phenotypes,9.03e-07,27,72,72
4,cytoskeleton organization,GO: biological_process,2.15e-06,50,197,196
5,abolished aggregation,Dictybase: Phenotypes,6.79e-06,27,82,80
6,kinase activity,GO: molecular_function,6.79e-06,70,328,327
7,aberrant F-actin organization,Dictybase: Phenotypes,8.16e-06,17,37,37
8,enzyme binding,GO: molecular_function,1.82e-05,32,110,110
9,aberrant cell morphology,Dictybase: Phenotypes,3.96e-05,17,42,41




***  tag selected: 191 with EID: 191
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 61 out of 76 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,DNA replication,KEGG: Pathways,1.78e-25,20,34,33
1,chromosome,GO: cellular_component,1.36e-23,24,70,70
2,chromosome organization,GO: biological_process,1.88e-14,18,74,74
3,Mismatch repair,KEGG: Pathways,8.25e-12,10,19,19
4,chromosome segregation,GO: biological_process,2.03e-10,9,18,18
5,Nucleotide excision repair,KEGG: Pathways,6.61e-10,11,37,37
6,cell cycle,GO: biological_process,2.16e-09,19,167,167
7,DNA metabolic process,GO: biological_process,8.73e-09,15,105,105
8,mitotic nuclear division,GO: biological_process,4.98e-08,7,15,15
9,Base excision repair,KEGG: Pathways,3.16e-07,8,28,28




***  tip selected: 261 with EID: 260
Ratio of genes annotated with a gene set in reference 0.32 and query 0.38
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 73 out of 99 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,chromosome,GO: cellular_component,1.1700000000000001e-27,29,70,70
1,DNA replication,KEGG: Pathways,3.75e-23,20,34,33
2,chromosome organization,GO: biological_process,1.99e-13,19,74,74
3,cell cycle,GO: biological_process,3.45e-13,26,167,167
4,Mismatch repair,KEGG: Pathways,2.25e-12,11,19,19
5,Nucleotide excision repair,KEGG: Pathways,3.17e-11,13,37,37
6,chromosome segregation,GO: biological_process,1.94e-09,9,18,18
7,DNA metabolic process,GO: biological_process,6.61e-09,17,105,105
8,nuclear chromosome,GO: cellular_component,3.26e-08,8,17,17
9,cell division,GO: biological_process,1.79e-07,16,115,115




***  slug selected: 76 with EID: 76
Ratio of genes annotated with a gene set in reference 0.32 and query 0.28
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 2 out of 21 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,cell cycle arrest,Dictybase: Phenotypes,0.0191,2,5,5




***  mhat selected: 8 with EID: 8
Ratio of genes annotated with a gene set in reference 0.32 and query 0.0
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 0 query genes used for enrichment calculation.


None



***  cul selected: 565 with EID: 559
Ratio of genes annotated with a gene set in reference 0.32 and query 0.28
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 32 out of 156 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,Golgi apparatus,GO: cellular_component,0.0859,11,85,85
1,SNARE interactions in vesicular transport,KEGG: Pathways,0.11,5,23,23
2,oxidoreductase activity,GO: molecular_function,0.11,18,210,207




***  FB selected: 555 with EID: 546
Ratio of genes annotated with a gene set in reference 0.32 and query 0.23
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 36 out of 123 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,anatomical structure formation involved in morphogenesis,GO: biological_process,5.29e-05,13,74,74
1,extracellular region,GO: cellular_component,0.00198,16,157,157
2,cell differentiation,GO: biological_process,0.00204,15,145,145
3,SNARE interactions in vesicular transport,KEGG: Pathways,0.00236,6,23,23
4,secondary metabolic process,GO: biological_process,0.00995,3,5,5
5,Ubiquinone and other terpenoid-quinone biosynthesis,KEGG: Pathways,0.0886,3,10,10
6,decreased spore viability,Dictybase: Phenotypes,0.24,6,60,59






#### ImpulseDE2 WT

In [19]:
milestones=pd.read_table(path_impulse+'DEacrossStages_summary_WT_noyem_batchrep.tsv',index_col=0,sep='\t')
stages=np.array([phenotype for phenotype in PHENOTYPES if phenotype in list(milestones.columns)])
for stage_idx,stage in enumerate(stages):
    
    #Nonneighbouring stages
    neigh_previous=1
    neigh_next=1
    #Assumes phenotypes order
    if stage_idx==1:
        neigh_previous=0
    if stage_idx==0:
        neigh_next=0
    non_neighbours = stages[[(idx <stage_idx-neigh_previous or idx>stage_idx+neigh_next) 
                 for idx in range(len(stages)) ]]
    #print(stage,':',non_neighbours)
    
    #Find genes up at stage and down at nonenighbouring
    query_condition=stage+'=="up"'
    for non_neigh in non_neighbours:
        query_condition=query_condition+' & '+non_neigh+' =="down"'
    #print(query_condition)
    
    query_names=list(milestones.query(query_condition).index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

***  no_agg selected: 2330 with EID: 2310
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 539 out of 923 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,Ribosome,KEGG: Pathways,2.53e-28,76,105,97
1,translation,GO: biological_process,4.76e-25,104,169,169
2,structural constituent of ribosome,GO: molecular_function,2.3699999999999998e-23,65,85,85
3,ribosome,GO: cellular_component,1.22e-22,68,93,93
4,mitochondrion,GO: cellular_component,1.17e-19,118,229,229
5,ribosome biogenesis,GO: biological_process,4.03e-17,47,61,61
6,nucleolus,GO: cellular_component,5.36e-16,51,72,72
7,structural molecule activity,GO: molecular_function,2.82e-15,69,117,117
8,Ribosome biogenesis in eukaryotes,KEGG: Pathways,3.89e-14,47,72,68
9,tRNA metabolic process,GO: biological_process,3.96e-12,45,69,69




***  stream selected: 409 with EID: 403
Ratio of genes annotated with a gene set in reference 0.32 and query 0.41
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 127 out of 166 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,Proteasome,KEGG: Pathways,4e-06,12,39,37
1,Protein processing in endoplasmic reticulum,KEGG: Pathways,0.000347,15,91,88
2,locomotion,GO: biological_process,0.00083,22,191,189
3,signal transduction,GO: biological_process,0.000986,32,352,350
4,aberrant streaming,Dictybase: Phenotypes,0.00182,12,72,72
5,catabolic process,GO: biological_process,0.00411,24,250,250
6,aberrant aggregation,Dictybase: Phenotypes,0.00598,10,61,61
7,abolished activation of adenylate cyclase,Dictybase: Phenotypes,0.00598,4,8,8
8,decreased aggregate size,Dictybase: Phenotypes,0.00724,7,32,32
9,decreased cell motility,Dictybase: Phenotypes,0.00724,11,76,76




***  lag selected: 429 with EID: 423
Ratio of genes annotated with a gene set in reference 0.32 and query 0.42
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 125 out of 176 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,Proteasome,KEGG: Pathways,7.99e-06,12,39,37
1,signal transduction,GO: biological_process,4.5e-05,37,352,350
2,locomotion,GO: biological_process,0.000671,23,191,189
3,aberrant aggregation,Dictybase: Phenotypes,0.00073,12,61,61
4,Protein processing in endoplasmic reticulum,KEGG: Pathways,0.0014,14,91,88
5,decreased aggregate size,Dictybase: Phenotypes,0.00239,8,32,32
6,aberrant streaming,Dictybase: Phenotypes,0.00239,12,72,72
7,abolished activation of adenylate cyclase,Dictybase: Phenotypes,0.00756,4,8,8
8,abolished streaming,Dictybase: Phenotypes,0.0121,7,33,33
9,abolished slug migration,Dictybase: Phenotypes,0.0121,8,43,43




***  tag selected: 273 with EID: 272
Ratio of genes annotated with a gene set in reference 0.32 and query 0.35
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 65 out of 95 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,chromosome,GO: cellular_component,1.44e-16,21,70,70
1,DNA replication,KEGG: Pathways,1.44e-16,16,34,33
2,DNA metabolic process,GO: biological_process,3.43e-14,22,105,105
3,Mismatch repair,KEGG: Pathways,7.58e-11,10,19,19
4,chromosome organization,GO: biological_process,1.5e-10,16,74,74
5,DNA binding,GO: molecular_function,2.58e-09,24,221,221
6,cell cycle,GO: biological_process,1.72e-08,20,167,167
7,chromosome segregation,GO: biological_process,3.62e-08,8,18,18
8,Base excision repair,KEGG: Pathways,9.4e-08,9,28,28
9,Nucleotide excision repair,KEGG: Pathways,1.27e-06,9,37,37




***  tip selected: 486 with EID: 485
Ratio of genes annotated with a gene set in reference 0.32 and query 0.34
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 119 out of 166 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,chromosome,GO: cellular_component,1e-31,37,70,70
1,DNA replication,KEGG: Pathways,5.03e-27,25,34,33
2,chromosome organization,GO: biological_process,1.89e-21,30,74,74
3,cell cycle,GO: biological_process,2.75e-20,41,167,167
4,DNA metabolic process,GO: biological_process,1.13e-16,30,105,105
5,Mismatch repair,KEGG: Pathways,1.13e-16,15,19,19
6,chromosome segregation,GO: biological_process,1.72e-15,14,18,18
7,mitotic nuclear division,GO: biological_process,1.1e-11,11,15,15
8,DNA binding,GO: molecular_function,3.39e-11,35,221,221
9,Base excision repair,KEGG: Pathways,2.51e-10,13,28,28




***  slug selected: 117 with EID: 116
Ratio of genes annotated with a gene set in reference 0.32 and query 0.22
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 25 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,chromosome organization,GO: biological_process,0.00687,5,74,74
1,nucleoplasm,GO: cellular_component,0.0985,4,91,91
2,cell cycle,GO: biological_process,0.176,4,167,167
3,cell death,GO: biological_process,0.176,2,25,25
4,DNA metabolic process,GO: biological_process,0.176,3,105,105
5,chromosome,GO: cellular_component,0.176,3,70,70
6,RNA polymerase,KEGG: Pathways,0.176,2,26,25
7,enzyme regulator activity,GO: molecular_function,0.176,3,105,105
8,cell division,GO: biological_process,0.185,3,115,115
9,transferase activity,GO: molecular_function,0.202,2,51,51




***  mhat selected: 28 with EID: 28
Ratio of genes annotated with a gene set in reference 0.32 and query 0.5
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 7 out of 14 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,generation of precursor metabolites and energy,GO: biological_process,0.0736,3,77,77
1,carbohydrate metabolic process,GO: biological_process,0.0736,3,89,88
2,Starch and sucrose metabolism,KEGG: Pathways,0.0736,2,31,31
3,Amino sugar and nucleotide sugar metabolism,KEGG: Pathways,0.0736,2,27,27
4,catabolic process,GO: biological_process,0.101,4,250,250
5,small molecule metabolic process,GO: biological_process,0.147,4,354,353
6,oxidoreductase activity,GO: molecular_function,0.149,3,210,207
7,Carbon metabolism,KEGG: Pathways,0.188,2,104,102
8,Biosynthesis of secondary metabolites,KEGG: Pathways,0.219,3,289,281
9,cellular component assembly,GO: biological_process,0.244,2,202,201




***  cul selected: 1412 with EID: 1403
Ratio of genes annotated with a gene set in reference 0.32 and query 0.31
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 112 out of 431 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,SNARE interactions in vesicular transport,KEGG: Pathways,0.00324,11,23,23
1,secondary metabolic process,GO: biological_process,0.0741,4,5,5
2,autophagy,GO: biological_process,0.0741,11,34,34
3,Endocytosis,KEGG: Pathways,0.0773,18,77,75
4,cell differentiation,GO: biological_process,0.117,28,145,145
5,Autophagy - other,KEGG: Pathways,0.164,8,25,24
6,decreased spore viability,Dictybase: Phenotypes,0.179,14,60,59
7,wild type,Dictybase: Phenotypes,0.239,52,341,338




***  FB selected: 564 with EID: 560
Ratio of genes annotated with a gene set in reference 0.32 and query 0.25
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 33 out of 141 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query,Set size,N in ref.
0,extracellular region,GO: cellular_component,0.0245,16,157,157
1,anatomical structure formation involved in morphogenesis,GO: biological_process,0.0256,10,74,74
2,Starch and sucrose metabolism,KEGG: Pathways,0.0444,6,31,31
3,SNARE interactions in vesicular transport,KEGG: Pathways,0.0547,5,23,23
4,cell differentiation,GO: biological_process,0.211,12,145,145




