# Enrichment of regulons from Regulons_by_strain

In [1]:
import os
import sys
from importlib import reload

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.insert(0,module_path)

import pandas as pd
from IPython.display import display

from orangecontrib.bioinformatics.geneset.__init__ import list_all

import jupyter_functions as jf
reload(jf)
import jupyter_functions as jf
from functionsDENet import loadPickle, savePickle
import stages_DE.enrichment_library as enr
reload(enr)
import stages_DE.enrichment_library as enr

In [2]:
#Path for mergedGenes_RPKUM.tsv
dataPath='/home/karin/Documents/timeTrajectories/data/RPKUM/combined/'
#Path for 
pathRegulons='/home/karin/Documents/timeTrajectories/data/regulons/by_strain/kN300_mean0std1_log/'

## Used gene sets
Use gene sets that do not have less than 5 or more than 500 genes. Use all ontologies.

In [3]:
# Get all gene sets
gene_sets=list(list_all(organism='44689'))
GENE_SETS_ONTOLOGY=enr.get_gene_sets(gene_set_names=gene_sets, go_slims=True,set_sizes=(5,500))

In [4]:
# For conversion of gene names to EID
genes = pd.read_csv(dataPath + 'mergedGenes_RPKUM.tsv', sep='\t', index_col=0)
all_gene_names= genes[(genes != 0).any(axis=1)].index
NAME_EID=enr.name_genes_entrez(gene_names=all_gene_names, key_entrez=False)
ALL_GENE_NAMES_EID=enr.convert_EID(genes=all_gene_names, name_EID=NAME_EID)

In [5]:
def group_diff_enrichment(data:pd.DataFrame,group:str,padj:float=0.25,min_overlap:int=None,
                          use_annotated_genes:bool=False):
    #Displays only gene sets that have overlap with query greater or equal to min_overlap 
    #For p value and padj calculation uses alll that have overlap >=1 } from gene_set_enrichment
    """
    :param use_annotated_genes: if True use for reference and query  only genes that have at 
    least one gene set annotation
    """
    selected=list(data[data['Cluster']==group].index)
    query_EID=enr.convert_EID(genes=selected, name_EID=NAME_EID)
    print('***  '+group+' selected:',len(selected),'with EID:',len(query_EID))
    
    reference_gene_eids=ALL_GENE_NAMES_EID.copy()
    query_eids=query_EID.copy()
    
    if use_annotated_genes:
        gene_sets_genes=set()
        for gene_set_name, gene_sets in GENE_SETS_ONTOLOGY.items():
            for gene_set in gene_sets:
                gene_sets_genes.update(gene_set.genes)
        reference_gene_eids=set(reference_gene_eids) & gene_sets_genes
        query_eids=set(query_eids) & gene_sets_genes
        print('Ratio of genes annotated with a gene set in reference',
              round(len(reference_gene_eids)/len(ALL_GENE_NAMES_EID),2),
             'and query',round(len(query_eids)/len(query_EID),2))
    
    result=None
    if len(query_eids) > 0:
        enrichment=enr.gene_set_enrichment(query_eids, reference_EID=reference_gene_eids, 
                                                padj_threshold=padj,min_overlap=min_overlap,
                                                gene_sets_ontology=GENE_SETS_ONTOLOGY)
        query_in_enriched=set()
        if len(enrichment)>0:
            enrichment_display=list()
            enrichment= sorted(enrichment, key=lambda data: data.padj)
            for enriched in enrichment:
                query_in_enriched.update(enriched.gene_set.genes & query_eids)
                enrichment_display.append({'Gene set':enriched.gene_set.name,'Ontology':enriched.ontology,
                                           'FDR':"{:.2e}".format(enriched.padj),'N in query':enriched.in_query})
            result=pd.DataFrame(enrichment_display)
    print('Enrichment at FDR: '+str(padj)+' and min query - gene set overlap',str(min_overlap))
    print('N query genes in displayed gene sets:',len(query_in_enriched),'out of', len(query_eids),
          'query genes used for enrichment calculation.')
    display(result)
    return result

## Enrichment reports

In [11]:
save_enrichment=False

### Enrichment for expression_minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30.pdf

In [6]:
regulons=pd.read_table(pathRegulons+'mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30.tab',index_col=0)
for group in range(1,len(regulons['Cluster'].unique())+1):
    enriched=group_diff_enrichment(data=regulons,group='C'+str(group),min_overlap=2)
    if save_enrichment and enriched is not None:
        enriched.to_csv(pathRegulons+'enrichment/minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30_enrichment_cluster'+str(group)+'.tsv', sep='\t',index=False)

***  C1 selected: 91 with EID: 91
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 91 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",8.23e-11,7
1,external encapsulating structure,"(GO, cellular_component)",8.23e-11,7
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",4.29e-09,6
3,anatomical structure formation involved in mor...,"(GO, biological_process)",8.53e-08,9
4,cell wall organization or biogenesis,"(GO, biological_process)",1.17e-06,5
5,cell differentiation,"(GO, biological_process)",1.85e-06,10
6,anatomical structure development,"(GO, biological_process)",0.000258,13
7,abolished cellulose binding,"(Dictybase, Phenotypes)",0.000713,2
8,Starch and sucrose metabolism,"(KEGG, Pathways)",0.000881,4
9,abolished stalk cell differentiation,"(Dictybase, Phenotypes)",0.0106,2


***  C2 selected: 77 with EID: 77
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 77 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.00282,7
1,reproduction,"(GO, biological_process)",0.0679,4
2,Starch and sucrose metabolism,"(KEGG, Pathways)",0.101,2
3,anatomical structure development,"(GO, biological_process)",0.101,7
4,cell adhesion,"(GO, biological_process)",0.101,3
5,decreased sporulation,"(Dictybase, Phenotypes)",0.144,2
6,development arrests at mound stage,"(Dictybase, Phenotypes)",0.144,2
7,abolished culmination,"(Dictybase, Phenotypes)",0.166,2
8,delayed aggregation,"(Dictybase, Phenotypes)",0.2,2
9,anatomical structure formation involved in mor...,"(GO, biological_process)",0.2,2


***  C3 selected: 74 with EID: 74
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 24 out of 74 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",4.02e-06,15
1,cell adhesion,"(GO, biological_process)",0.00799,5
2,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.00856,3
3,extracellular region,"(GO, cellular_component)",0.0106,6
4,Glycosaminoglycan degradation,"(KEGG, Pathways)",0.0123,2
5,decreased fruiting body size,"(Dictybase, Phenotypes)",0.0136,4
6,decreased aggregate size,"(Dictybase, Phenotypes)",0.0136,3
7,increased slug migration,"(Dictybase, Phenotypes)",0.0136,2
8,response to stress,"(GO, biological_process)",0.0136,8
9,signal transduction,"(GO, biological_process)",0.0136,8


***  C4 selected: 74 with EID: 74
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 8 out of 74 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.00016,8


***  C5 selected: 68 with EID: 68
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 65 out of 68 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",6.409999999999999e-126,60
1,ribosome,"(GO, cellular_component)",1.4700000000000001e-124,59
2,structural constituent of ribosome,"(GO, molecular_function)",8.17e-109,53
3,structural molecule activity,"(GO, molecular_function)",1.72e-98,53
4,translation,"(GO, biological_process)",5.92e-86,52
5,RNA binding,"(GO, molecular_function)",2.97e-15,16
6,rRNA binding,"(GO, molecular_function)",8.86e-13,7
7,cytosol,"(GO, cellular_component)",5.69e-05,8
8,ribosome biogenesis,"(GO, biological_process)",0.0168,3
9,delayed development,"(Dictybase, Phenotypes)",0.162,2


***  C6 selected: 66 with EID: 65
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 65 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Oxidative phosphorylation,"(KEGG, Pathways)",6.98e-10,9
1,Metabolic pathways,"(KEGG, Pathways)",2.55e-08,19
2,mitochondrion,"(GO, cellular_component)",8.07e-05,9
3,Citrate cycle (TCA cycle),"(KEGG, Pathways)",0.00039,4
4,Phagosome,"(KEGG, Pathways)",0.00328,4
5,Biosynthesis of antibiotics,"(KEGG, Pathways)",0.00492,6
6,cytoplasmic vesicle,"(GO, cellular_component)",0.00895,6
7,generation of precursor metabolites and energy,"(GO, biological_process)",0.00924,4
8,protein folding,"(GO, biological_process)",0.0103,3
9,aberrant cellular response to stress,"(Dictybase, Phenotypes)",0.0131,2


***  C7 selected: 64 with EID: 64
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 4 out of 64 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.00446,2
1,aberrant fruiting body morphology,"(Dictybase, Phenotypes)",0.195,2


***  C8 selected: 61 with EID: 59
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 59 query genes used for enrichment calculation.


None

***  C9 selected: 58 with EID: 58
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 58 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.0813,4
1,cell adhesion,"(GO, biological_process)",0.0813,3
2,hydrolase activity,"(GO, molecular_function)",0.113,2
3,cell differentiation,"(GO, biological_process)",0.159,3
4,anatomical structure formation involved in mor...,"(GO, biological_process)",0.176,2


***  C10 selected: 57 with EID: 57
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 57 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000359,7
1,cell-cell signaling,"(GO, biological_process)",0.0372,2
2,decreased spore viability,"(Dictybase, Phenotypes)",0.187,2
3,peptidase activity,"(GO, molecular_function)",0.207,2


***  C11 selected: 55 with EID: 55
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 21 out of 55 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytoskeletal protein binding,"(GO, molecular_function)",5.97e-08,10
1,Endocytosis,"(KEGG, Pathways)",0.000398,5
2,cytoskeleton organization,"(GO, biological_process)",0.00293,6
3,cellular component assembly,"(GO, biological_process)",0.00303,6
4,protein-containing complex assembly,"(GO, biological_process)",0.0034,5
5,response to stress,"(GO, biological_process)",0.00574,7
6,cytoskeleton,"(GO, cellular_component)",0.00778,5
7,cytoplasmic vesicle,"(GO, cellular_component)",0.0533,4
8,cytosol,"(GO, cellular_component)",0.0533,4
9,plasma membrane,"(GO, cellular_component)",0.106,4


***  C12 selected: 54 with EID: 54
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 4 out of 54 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Phagosome,"(KEGG, Pathways)",0.25,2
1,decreased spore viability,"(Dictybase, Phenotypes)",0.25,2


***  C13 selected: 46 with EID: 45
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 45 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000497,6
1,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0282,3
2,cell differentiation,"(GO, biological_process)",0.121,3
3,oxidoreductase activity,"(GO, molecular_function)",0.152,3


***  C14 selected: 45 with EID: 45
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 45 query genes used for enrichment calculation.


None

***  C15 selected: 41 with EID: 41
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 38 out of 41 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",2.8e-69,29
1,peptidase activity,"(GO, molecular_function)",7.5499999999999995e-34,21
2,catabolic process,"(GO, biological_process)",6.870000000000001e-32,25
3,response to stress,"(GO, biological_process)",0.0373,5
4,enzyme regulator activity,"(GO, molecular_function)",0.0373,3
5,ATPase activity,"(GO, molecular_function)",0.0827,3
6,Protein processing in endoplasmic reticulum,"(KEGG, Pathways)",0.135,2


***  C16 selected: 40 with EID: 40
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 40 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant CRAC localization to the plasma membrane,"(Dictybase, Phenotypes)",0.0148,2
1,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0161,2
2,translucent sorus,"(Dictybase, Phenotypes)",0.0161,2
3,decreased intracellular cAMP level,"(Dictybase, Phenotypes)",0.0161,2
4,anatomical structure development,"(GO, biological_process)",0.0314,6
5,cell death,"(GO, biological_process)",0.0314,2
6,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0314,3
7,kinase activity,"(GO, molecular_function)",0.0344,5
8,RNA degradation,"(KEGG, Pathways)",0.0488,2
9,cell differentiation,"(GO, biological_process)",0.0488,3


***  C17 selected: 36 with EID: 36
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 36 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",2.31e-14,13
1,chromosome segregation,"(GO, biological_process)",1.37e-10,6
2,mitotic nuclear division,"(GO, biological_process)",5.9e-09,5
3,chromosome organization,"(GO, biological_process)",1.74e-08,7
4,mitotic cell cycle,"(GO, biological_process)",9.72e-08,7
5,DNA replication,"(KEGG, Pathways)",1.92e-07,5
6,cell division,"(GO, biological_process)",1.92e-07,7
7,chromosome,"(GO, cellular_component)",2.17e-07,6
8,cytoskeleton,"(GO, cellular_component)",3.71e-06,7
9,cytoskeleton organization,"(GO, biological_process)",0.000901,5


***  C18 selected: 30 with EID: 30
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 30 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",3.61e-19,12
1,nucleolus,"(GO, cellular_component)",4.81e-17,11
2,ribosome biogenesis,"(GO, biological_process)",5.97e-16,10
3,ribonucleoprotein complex assembly,"(GO, biological_process)",5.35e-07,4
4,RNA binding,"(GO, molecular_function)",8.44e-06,6
5,protein-containing complex assembly,"(GO, biological_process)",0.001,4
6,rRNA binding,"(GO, molecular_function)",0.001,2
7,cellular component assembly,"(GO, biological_process)",0.003,4
8,helicase activity,"(GO, molecular_function)",0.012,2
9,methyltransferase activity,"(GO, molecular_function)",0.013,2


***  C19 selected: 23 with EID: 23
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 3 out of 23 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,kinase activity,"(GO, molecular_function)",0.11,3


***  C20 selected: 20 with EID: 20
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 20 query genes used for enrichment calculation.


None

***  C21 selected: 19 with EID: 19
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 19 query genes used for enrichment calculation.


None

#### Using only genes annotated with at least one gene set 
To find genes that have gene set annotations only gene sets used in enrichment are used (see above). Genes that are in at least one of these gene sets are used for enrichment calculation, both for reference and gene group of interest (query).

In [7]:
regulons=pd.read_table(pathRegulons+'mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30.tab',index_col=0)
for group in range(1,len(regulons['Cluster'].unique())+1):
    enriched=group_diff_enrichment(data=regulons,group='C'+str(group),min_overlap=2,use_annotated_genes=True)
    if  save_enrichment and enriched is not None:
        enriched.to_csv(pathRegulons+'enrichment/minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30_annotatedOnly_enrichment_cluster'+str(group)+'.tsv', sep='\t',index=False)

***  C1 selected: 91 with EID: 91
Ratio of genes annotated with a gene set in reference 0.32 and query 0.45
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 41 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",6.81e-10,7
1,external encapsulating structure,"(GO, cellular_component)",6.81e-10,7
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",2.74e-08,6
3,anatomical structure formation involved in mor...,"(GO, biological_process)",1.03e-06,9
4,cell wall organization or biogenesis,"(GO, biological_process)",5.66e-06,5
5,cell differentiation,"(GO, biological_process)",2.53e-05,10
6,abolished cellulose binding,"(Dictybase, Phenotypes)",0.00161,2
7,Starch and sucrose metabolism,"(KEGG, Pathways)",0.00346,4
8,anatomical structure development,"(GO, biological_process)",0.0035,13
9,abolished stalk cell differentiation,"(Dictybase, Phenotypes)",0.0228,2


***  C2 selected: 77 with EID: 77
Ratio of genes annotated with a gene set in reference 0.32 and query 0.29
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 15 out of 22 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.000748,7
1,reproduction,"(GO, biological_process)",0.0608,4
2,anatomical structure development,"(GO, biological_process)",0.0724,7
3,cell adhesion,"(GO, biological_process)",0.0883,3
4,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0908,2
5,decreased sporulation,"(Dictybase, Phenotypes)",0.12,2
6,development arrests at mound stage,"(Dictybase, Phenotypes)",0.12,2
7,abolished culmination,"(Dictybase, Phenotypes)",0.138,2
8,delayed aggregation,"(Dictybase, Phenotypes)",0.169,2
9,anatomical structure formation involved in mor...,"(GO, biological_process)",0.172,2


***  C3 selected: 74 with EID: 74
Ratio of genes annotated with a gene set in reference 0.32 and query 0.42
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 24 out of 31 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",2.37e-05,15
1,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.018,3
2,cell adhesion,"(GO, biological_process)",0.018,5
3,Glycosaminoglycan degradation,"(KEGG, Pathways)",0.0259,2
4,extracellular region,"(GO, cellular_component)",0.0286,6
5,decreased aggregate size,"(Dictybase, Phenotypes)",0.0315,3
6,increased slug migration,"(Dictybase, Phenotypes)",0.0315,2
7,decreased fruiting body size,"(Dictybase, Phenotypes)",0.038,4
8,response to stress,"(GO, biological_process)",0.0549,8
9,signal transduction,"(GO, biological_process)",0.0549,8


***  C4 selected: 74 with EID: 74
Ratio of genes annotated with a gene set in reference 0.32 and query 0.23
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 15 out of 17 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",3.67e-06,8
1,wild type,"(Dictybase, Phenotypes)",0.142,5
2,decreased gene expression,"(Dictybase, Phenotypes)",0.208,2
3,decreased spore viability,"(Dictybase, Phenotypes)",0.208,2
4,DNA binding,"(GO, molecular_function)",0.215,3


***  C5 selected: 68 with EID: 68
Ratio of genes annotated with a gene set in reference 0.32 and query 0.97
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 62 out of 66 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",6.55e-98,60
1,ribosome,"(GO, cellular_component)",6.07e-97,59
2,structural constituent of ribosome,"(GO, molecular_function)",9.42e-84,53
3,structural molecule activity,"(GO, molecular_function)",1.8600000000000003e-73,53
4,translation,"(GO, biological_process)",2.02e-61,52
5,rRNA binding,"(GO, molecular_function)",2.37e-09,7
6,RNA binding,"(GO, molecular_function)",3.8e-08,16
7,cytosol,"(GO, cellular_component)",0.0748,8


***  C6 selected: 66 with EID: 65
Ratio of genes annotated with a gene set in reference 0.32 and query 0.58
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 38 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Oxidative phosphorylation,"(KEGG, Pathways)",9.41e-08,9
1,Metabolic pathways,"(KEGG, Pathways)",8.64e-05,19
2,Citrate cycle (TCA cycle),"(KEGG, Pathways)",0.00509,4
3,mitochondrion,"(GO, cellular_component)",0.00511,9
4,Phagosome,"(KEGG, Pathways)",0.0299,4
5,Biosynthesis of antibiotics,"(KEGG, Pathways)",0.0694,6
6,aberrant cellular response to stress,"(Dictybase, Phenotypes)",0.0694,2
7,generation of precursor metabolites and energy,"(GO, biological_process)",0.0694,4
8,protein folding,"(GO, biological_process)",0.0694,3
9,Ribosome,"(KEGG, Pathways)",0.111,4


***  C7 selected: 64 with EID: 64
Ratio of genes annotated with a gene set in reference 0.32 and query 0.16
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 6 out of 10 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.000986,2
1,anatomical structure development,"(GO, biological_process)",0.0572,4
2,aberrant fruiting body morphology,"(Dictybase, Phenotypes)",0.059,2
3,extracellular region,"(GO, cellular_component)",0.0993,2


***  C8 selected: 61 with EID: 59
Ratio of genes annotated with a gene set in reference 0.32 and query 0.19
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 6 out of 11 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",0.208,4
1,wild type,"(Dictybase, Phenotypes)",0.224,3
2,extracellular region,"(GO, cellular_component)",0.224,2


***  C9 selected: 58 with EID: 58
Ratio of genes annotated with a gene set in reference 0.32 and query 0.21
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 12 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.0226,4
1,cell adhesion,"(GO, biological_process)",0.0226,3
2,cell differentiation,"(GO, biological_process)",0.044,3
3,hydrolase activity,"(GO, molecular_function)",0.044,2
4,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0885,2


***  C10 selected: 57 with EID: 57
Ratio of genes annotated with a gene set in reference 0.32 and query 0.25
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 11 out of 14 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",1.89e-05,7
1,cell-cell signaling,"(GO, biological_process)",0.0211,2
2,decreased spore viability,"(Dictybase, Phenotypes)",0.12,2
3,peptidase activity,"(GO, molecular_function)",0.142,2
4,plasma membrane,"(GO, cellular_component)",0.17,3
5,anatomical structure development,"(GO, biological_process)",0.17,4


***  C11 selected: 55 with EID: 55
Ratio of genes annotated with a gene set in reference 0.32 and query 0.44
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 24 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytoskeletal protein binding,"(GO, molecular_function)",3.76e-07,10
1,Endocytosis,"(KEGG, Pathways)",0.00141,5
2,cellular component assembly,"(GO, biological_process)",0.0115,6
3,protein-containing complex assembly,"(GO, biological_process)",0.0115,5
4,cytoskeleton organization,"(GO, biological_process)",0.0115,6
5,response to stress,"(GO, biological_process)",0.0249,7
6,cytoskeleton,"(GO, cellular_component)",0.0255,5
7,cytoplasmic vesicle,"(GO, cellular_component)",0.132,4
8,cytosol,"(GO, cellular_component)",0.132,4
9,decreased cell motility,"(Dictybase, Phenotypes)",0.233,2


***  C12 selected: 54 with EID: 54
Ratio of genes annotated with a gene set in reference 0.32 and query 0.2
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 4 out of 11 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Phagosome,"(KEGG, Pathways)",0.16,2
1,decreased spore viability,"(Dictybase, Phenotypes)",0.16,2


***  C13 selected: 46 with EID: 45
Ratio of genes annotated with a gene set in reference 0.32 and query 0.33
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 15 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000326,6
1,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0285,3
2,cell differentiation,"(GO, biological_process)",0.124,3
3,oxidoreductase activity,"(GO, molecular_function)",0.16,3


***  C14 selected: 45 with EID: 45
Ratio of genes annotated with a gene set in reference 0.32 and query 0.13
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 3 out of 6 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytosol,"(GO, cellular_component)",0.235,2
1,response to stress,"(GO, biological_process)",0.243,2


***  C15 selected: 41 with EID: 41
Ratio of genes annotated with a gene set in reference 0.32 and query 0.98
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 35 out of 40 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",2.2e-55,29
1,peptidase activity,"(GO, molecular_function)",8.26e-24,21
2,catabolic process,"(GO, biological_process)",4.25e-20,25


***  C16 selected: 40 with EID: 40
Ratio of genes annotated with a gene set in reference 0.32 and query 0.38
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 15 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant CRAC localization to the plasma membrane,"(Dictybase, Phenotypes)",0.0196,2
1,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0213,2
2,translucent sorus,"(Dictybase, Phenotypes)",0.0213,2
3,decreased intracellular cAMP level,"(Dictybase, Phenotypes)",0.0213,2
4,anatomical structure development,"(GO, biological_process)",0.0376,6
5,cell death,"(GO, biological_process)",0.0376,2
6,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0376,3
7,kinase activity,"(GO, molecular_function)",0.0508,5
8,RNA degradation,"(KEGG, Pathways)",0.0574,2
9,cell differentiation,"(GO, biological_process)",0.0574,3


***  C17 selected: 36 with EID: 36
Ratio of genes annotated with a gene set in reference 0.32 and query 0.53
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 19 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",8.25e-13,13
1,chromosome segregation,"(GO, biological_process)",1.81e-09,6
2,mitotic nuclear division,"(GO, biological_process)",5.5e-08,5
3,chromosome organization,"(GO, biological_process)",3.03e-07,7
4,mitotic cell cycle,"(GO, biological_process)",1.66e-06,7
5,DNA replication,"(KEGG, Pathways)",2.06e-06,5
6,chromosome,"(GO, cellular_component)",2.75e-06,6
7,cell division,"(GO, biological_process)",2.75e-06,7
8,cytoskeleton,"(GO, cellular_component)",5.99e-05,7
9,microtubule organizing center,"(GO, cellular_component)",0.00617,3


***  C18 selected: 30 with EID: 30
Ratio of genes annotated with a gene set in reference 0.32 and query 0.7
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 21 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",1.08e-15,12
1,nucleolus,"(GO, cellular_component)",8.61e-14,11
2,ribosome biogenesis,"(GO, biological_process)",6.18e-13,10
3,ribonucleoprotein complex assembly,"(GO, biological_process)",1.1e-05,4
4,RNA binding,"(GO, molecular_function)",0.000566,6
5,rRNA binding,"(GO, molecular_function)",0.00545,2
6,protein-containing complex assembly,"(GO, biological_process)",0.0142,4
7,cellular component assembly,"(GO, biological_process)",0.0441,4
8,helicase activity,"(GO, molecular_function)",0.0526,2
9,methyltransferase activity,"(GO, molecular_function)",0.0563,2


***  C19 selected: 23 with EID: 23
Ratio of genes annotated with a gene set in reference 0.32 and query 0.26
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 3 out of 6 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,kinase activity,"(GO, molecular_function)",0.0471,3


***  C20 selected: 20 with EID: 20
Ratio of genes annotated with a gene set in reference 0.32 and query 0.0
Enrichment at FDR: 0.25 and min query - gene set overlap 2


UnboundLocalError: local variable 'query_in_enriched' referenced before assignment

### Enrichment for mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log.tab

In [8]:
regulons=pd.read_table(pathRegulons+'mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log.tab',index_col=0)
for group in range(1,len(regulons['Cluster'].unique())+1):
    enriched=group_diff_enrichment(data=regulons,group='C'+str(group),min_overlap=2)
    if  save_enrichment and enriched is not None:
        enriched.to_csv(pathRegulons+'enrichment/minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log_enrichment_cluster'+str(group)+'.tsv', sep='\t',index=False)

***  C1 selected: 96 with EID: 96
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 30 out of 96 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",2.66e-05,16
1,cell differentiation,"(GO, biological_process)",0.00827,7
2,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.0147,3
3,signal transduction,"(GO, biological_process)",0.0147,10
4,Glycosaminoglycan degradation,"(KEGG, Pathways)",0.0218,2
5,increased slug migration,"(Dictybase, Phenotypes)",0.0252,2
6,aberrant aggregation,"(Dictybase, Phenotypes)",0.0252,4
7,extracellular region,"(GO, cellular_component)",0.0252,6
8,decreased aggregate size,"(Dictybase, Phenotypes)",0.0265,3
9,decreased fruiting body size,"(Dictybase, Phenotypes)",0.034,4


***  C2 selected: 89 with EID: 88
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 88 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.00263,7
1,anatomical structure formation involved in mor...,"(GO, biological_process)",0.019,4
2,Starch and sucrose metabolism,"(KEGG, Pathways)",0.102,2
3,cell differentiation,"(GO, biological_process)",0.102,4


***  C3 selected: 87 with EID: 87
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 36 out of 87 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",7.49e-09,6
1,external encapsulating structure,"(GO, cellular_component)",7.49e-09,6
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",3.47e-07,5
3,cell wall organization or biogenesis,"(GO, biological_process)",9.7e-07,5
4,anatomical structure formation involved in mor...,"(GO, biological_process)",1.47e-05,7
5,anatomical structure development,"(GO, biological_process)",0.000151,13
6,extracellular region,"(GO, cellular_component)",0.000189,8
7,abolished cellulose binding,"(Dictybase, Phenotypes)",0.000541,2
8,cell differentiation,"(GO, biological_process)",0.000698,7
9,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.00427,2


***  C4 selected: 85 with EID: 85
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 7 out of 85 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.00697,7


***  C5 selected: 73 with EID: 73
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 2 out of 73 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell-cell signaling,"(GO, biological_process)",0.0361,2


***  C6 selected: 72 with EID: 72
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 72 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",6.28e-07,10


***  C7 selected: 70 with EID: 70
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 70 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.154,4
1,kinase activity,"(GO, molecular_function)",0.154,6
2,cell adhesion,"(GO, biological_process)",0.227,2


***  C8 selected: 65 with EID: 63
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 8 out of 63 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",0.0174,2
1,external encapsulating structure,"(GO, cellular_component)",0.0174,2
2,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0381,3
3,hydrolase activity,"(GO, molecular_function)",0.0911,2
4,cell differentiation,"(GO, biological_process)",0.114,3
5,lipid binding,"(GO, molecular_function)",0.114,2
6,extracellular region,"(GO, cellular_component)",0.12,3


***  C9 selected: 57 with EID: 56
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 6 out of 56 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell adhesion,"(GO, biological_process)",0.0123,4
1,decreased spore viability,"(Dictybase, Phenotypes)",0.18,2


***  C10 selected: 54 with EID: 54
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 26 out of 54 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Oxidative phosphorylation,"(KEGG, Pathways)",7.61e-08,7
1,mitochondrion,"(GO, cellular_component)",7.61e-08,11
2,Metabolic pathways,"(KEGG, Pathways)",9.01e-06,14
3,generation of precursor metabolites and energy,"(GO, biological_process)",1.52e-05,6
4,protein folding,"(GO, biological_process)",0.000212,4
5,unfolded protein binding,"(GO, molecular_function)",0.00176,3
6,oxidoreductase activity,"(GO, molecular_function)",0.00207,6
7,Phagosome,"(KEGG, Pathways)",0.0103,3
8,transmembrane transporter activity,"(GO, molecular_function)",0.0242,4
9,transmembrane transport,"(GO, biological_process)",0.0303,4


***  C11 selected: 54 with EID: 54
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 44 out of 54 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",1.13e-86,44
1,ribosome,"(GO, cellular_component)",4.68e-85,43
2,structural constituent of ribosome,"(GO, molecular_function)",8.59e-76,39
3,structural molecule activity,"(GO, molecular_function)",4.2499999999999997e-69,39
4,translation,"(GO, biological_process)",2.73e-62,39
5,RNA binding,"(GO, molecular_function)",4.33e-10,11
6,rRNA binding,"(GO, molecular_function)",6.82e-07,4


***  C12 selected: 49 with EID: 49
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 49 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.00188,6
1,anatomical structure formation involved in mor...,"(GO, biological_process)",0.00885,4
2,DNA-binding transcription factor activity,"(GO, molecular_function)",0.0129,3
3,reproduction,"(GO, biological_process)",0.0207,4
4,anatomical structure development,"(GO, biological_process)",0.0265,7
5,decreased gene expression,"(Dictybase, Phenotypes)",0.0374,3
6,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0478,2
7,DNA binding,"(GO, molecular_function)",0.06,4
8,decreased spore viability,"(Dictybase, Phenotypes)",0.0725,2
9,generation of precursor metabolites and energy,"(GO, biological_process)",0.0955,2


***  C13 selected: 49 with EID: 49
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 42 out of 49 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",1.18e-65,29
1,peptidase activity,"(GO, molecular_function)",1.24e-31,21
2,catabolic process,"(GO, biological_process)",4.33e-29,25
3,Protein processing in endoplasmic reticulum,"(KEGG, Pathways)",0.0525,3
4,enzyme regulator activity,"(GO, molecular_function)",0.0525,3
5,response to stress,"(GO, biological_process)",0.0713,5
6,endoplasmic reticulum,"(GO, cellular_component)",0.0768,3
7,ATPase activity,"(GO, molecular_function)",0.13,3
8,cytoplasmic vesicle,"(GO, cellular_component)",0.178,3


***  C14 selected: 43 with EID: 43
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 21 out of 43 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytoskeletal protein binding,"(GO, molecular_function)",2.38e-10,11
1,cellular component assembly,"(GO, biological_process)",8.13e-06,8
2,protein-containing complex assembly,"(GO, biological_process)",8.13e-06,7
3,cytoskeleton organization,"(GO, biological_process)",8.13e-06,8
4,cytoskeleton,"(GO, cellular_component)",3.66e-05,7
5,Endocytosis,"(KEGG, Pathways)",5.73e-05,5
6,response to stress,"(GO, biological_process)",0.00154,7
7,cytosol,"(GO, cellular_component)",0.00469,5
8,plasma membrane,"(GO, cellular_component)",0.0136,5
9,decreased cell motility,"(Dictybase, Phenotypes)",0.0137,3


***  C15 selected: 38 with EID: 38
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 38 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",5.3e-14,13
1,chromosome segregation,"(GO, biological_process)",1.94e-10,6
2,mitotic nuclear division,"(GO, biological_process)",7.84e-09,5
3,chromosome organization,"(GO, biological_process)",2.61e-08,7
4,mitotic cell cycle,"(GO, biological_process)",1.45e-07,7
5,DNA replication,"(KEGG, Pathways)",2.74e-07,5
6,cell division,"(GO, biological_process)",2.74e-07,7
7,chromosome,"(GO, cellular_component)",3.05e-07,6
8,cytoskeleton,"(GO, cellular_component)",5.47e-06,7
9,cytoskeleton organization,"(GO, biological_process)",0.00117,5


***  C16 selected: 35 with EID: 35
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 35 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant CRAC localization to the plasma membrane,"(Dictybase, Phenotypes)",0.0112,2
1,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0243,2
2,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0378,3
3,cell death,"(GO, biological_process)",0.0389,2
4,RNA degradation,"(KEGG, Pathways)",0.0437,2
5,aberrant aggregation,"(Dictybase, Phenotypes)",0.0437,2
6,decreased chemotaxis to cAMP,"(Dictybase, Phenotypes)",0.0437,2
7,anatomical structure development,"(GO, biological_process)",0.0437,5
8,cell differentiation,"(GO, biological_process)",0.0437,3
9,kinase activity,"(GO, molecular_function)",0.0437,4


***  C17 selected: 31 with EID: 31
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 31 query genes used for enrichment calculation.


None

***  C18 selected: 31 with EID: 31
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 31 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",7.7e-19,12
1,nucleolus,"(GO, cellular_component)",9.74e-17,11
2,ribosome biogenesis,"(GO, biological_process)",1.15e-15,10
3,ribonucleoprotein complex assembly,"(GO, biological_process)",8.06e-07,4
4,RNA binding,"(GO, molecular_function)",1.36e-05,6
5,protein-containing complex assembly,"(GO, biological_process)",0.00141,4
6,rRNA binding,"(GO, molecular_function)",0.00141,2
7,cellular component assembly,"(GO, biological_process)",0.00446,4
8,helicase activity,"(GO, molecular_function)",0.0168,2
9,methyltransferase activity,"(GO, molecular_function)",0.0182,2


***  C19 selected: 20 with EID: 20
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 20 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,ribosome,"(GO, cellular_component)",4.29e-33,17
1,Ribosome,"(KEGG, Pathways)",4.7e-33,17
2,structural constituent of ribosome,"(GO, molecular_function)",1.1199999999999999e-28,15
3,structural molecule activity,"(GO, molecular_function)",1.4600000000000002e-26,15
4,translation,"(GO, biological_process)",5.8e-22,14
5,rRNA binding,"(GO, molecular_function)",3.78e-06,3
6,RNA binding,"(GO, molecular_function)",1.82e-05,5
7,cytosol,"(GO, cellular_component)",3.81e-05,5
8,ribosome biogenesis,"(GO, biological_process)",0.000302,3
9,delayed development,"(Dictybase, Phenotypes)",0.0117,2


***  C20 selected: 1 with EID: 1
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 1 query genes used for enrichment calculation.


None

#### Using only genes annotated with at least one gene set

In [9]:
regulons=pd.read_table(pathRegulons+'mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log.tab',index_col=0)
for group in range(1,len(regulons['Cluster'].unique())+1):
    enriched=group_diff_enrichment(data=regulons,group='C'+str(group),min_overlap=2,use_annotated_genes=True)
    if save_enrichment and enriched is not None:
        enriched.to_csv(pathRegulons+'enrichment/mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log.tab'+str(group)+'.tsv', sep='\t',index=False)

***  C1 selected: 96 with EID: 96
Ratio of genes annotated with a gene set in reference 0.32 and query 0.39
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 30 out of 37 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",6.78e-05,16
1,cell differentiation,"(GO, biological_process)",0.0202,7
2,Glycosaminoglycan degradation,"(KEGG, Pathways)",0.0313,2
3,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.0313,3
4,signal transduction,"(GO, biological_process)",0.0313,10
5,increased slug migration,"(Dictybase, Phenotypes)",0.0418,2
6,aberrant aggregation,"(Dictybase, Phenotypes)",0.0418,4
7,decreased aggregate size,"(Dictybase, Phenotypes)",0.0436,3
8,extracellular region,"(GO, cellular_component)",0.0436,6
9,decreased fruiting body size,"(Dictybase, Phenotypes)",0.0604,4


***  C2 selected: 89 with EID: 88
Ratio of genes annotated with a gene set in reference 0.32 and query 0.2
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 18 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",6.5e-05,7
1,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0029,4
2,cell differentiation,"(GO, biological_process)",0.0242,4
3,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0425,2


***  C3 selected: 87 with EID: 87
Ratio of genes annotated with a gene set in reference 0.32 and query 0.45
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 39 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",4.58e-08,6
1,external encapsulating structure,"(GO, cellular_component)",4.58e-08,6
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",1.63e-06,5
3,cell wall organization or biogenesis,"(GO, biological_process)",4.53e-06,5
4,anatomical structure formation involved in mor...,"(GO, biological_process)",0.000105,7
5,abolished cellulose binding,"(Dictybase, Phenotypes)",0.00141,2
6,extracellular region,"(GO, cellular_component)",0.00152,8
7,anatomical structure development,"(GO, biological_process)",0.00185,13
8,cell differentiation,"(GO, biological_process)",0.00448,7
9,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.00828,2


***  C4 selected: 85 with EID: 85
Ratio of genes annotated with a gene set in reference 0.32 and query 0.24
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 7 out of 20 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000488,7


***  C5 selected: 73 with EID: 73
Ratio of genes annotated with a gene set in reference 0.32 and query 0.11
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 5 out of 8 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell-cell signaling,"(GO, biological_process)",0.00391,2
1,anatomical structure development,"(GO, biological_process)",0.0503,4
2,extracellular region,"(GO, cellular_component)",0.134,2
3,wild type,"(Dictybase, Phenotypes)",0.233,2
4,signal transduction,"(GO, biological_process)",0.233,2


***  C6 selected: 72 with EID: 72
Ratio of genes annotated with a gene set in reference 0.32 and query 0.24
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 17 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",3.29e-09,10
1,wild type,"(Dictybase, Phenotypes)",0.154,5


***  C7 selected: 70 with EID: 70
Ratio of genes annotated with a gene set in reference 0.32 and query 0.26
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 18 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.0617,4
1,kinase activity,"(GO, molecular_function)",0.0617,6
2,cell adhesion,"(GO, biological_process)",0.181,2


***  C8 selected: 65 with EID: 63
Ratio of genes annotated with a gene set in reference 0.32 and query 0.14
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 8 out of 9 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",0.00297,2
1,external encapsulating structure,"(GO, cellular_component)",0.00297,2
2,anatomical structure formation involved in mor...,"(GO, biological_process)",0.00297,3
3,extracellular region,"(GO, cellular_component)",0.0134,3
4,cell differentiation,"(GO, biological_process)",0.0134,3
5,hydrolase activity,"(GO, molecular_function)",0.0134,2
6,lipid binding,"(GO, molecular_function)",0.0179,2
7,anatomical structure development,"(GO, biological_process)",0.0266,4
8,protein transport,"(GO, biological_process)",0.115,2


***  C9 selected: 57 with EID: 56
Ratio of genes annotated with a gene set in reference 0.32 and query 0.2
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 6 out of 11 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell adhesion,"(GO, biological_process)",0.00125,4
1,decreased spore viability,"(Dictybase, Phenotypes)",0.112,2


***  C10 selected: 54 with EID: 54
Ratio of genes annotated with a gene set in reference 0.32 and query 0.57
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 26 out of 31 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Oxidative phosphorylation,"(KEGG, Pathways)",6.18e-06,7
1,mitochondrion,"(GO, cellular_component)",1.39e-05,11
2,generation of precursor metabolites and energy,"(GO, biological_process)",0.000471,6
3,Metabolic pathways,"(KEGG, Pathways)",0.0019,14
4,protein folding,"(GO, biological_process)",0.0019,4
5,unfolded protein binding,"(GO, molecular_function)",0.00933,3
6,oxidoreductase activity,"(GO, molecular_function)",0.0362,6
7,Phagosome,"(KEGG, Pathways)",0.0514,3
8,Citrate cycle (TCA cycle),"(KEGG, Pathways)",0.124,2
9,Propanoate metabolism,"(KEGG, Pathways)",0.126,2


***  C11 selected: 54 with EID: 54
Ratio of genes annotated with a gene set in reference 0.32 and query 0.89
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 44 out of 48 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",7.620000000000001e-70,44
1,ribosome,"(GO, cellular_component)",2.1799999999999997e-68,43
2,structural constituent of ribosome,"(GO, molecular_function)",4.2199999999999996e-60,39
3,structural molecule activity,"(GO, molecular_function)",2.02e-53,39
4,translation,"(GO, biological_process)",1.2299999999999999e-46,39
5,RNA binding,"(GO, molecular_function)",1.25e-05,11
6,rRNA binding,"(GO, molecular_function)",3.87e-05,4


***  C12 selected: 49 with EID: 49
Ratio of genes annotated with a gene set in reference 0.32 and query 0.31
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 15 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.000742,6
1,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0058,4
2,DNA-binding transcription factor activity,"(GO, molecular_function)",0.01,3
3,anatomical structure development,"(GO, biological_process)",0.0111,7
4,reproduction,"(GO, biological_process)",0.0111,4
5,decreased gene expression,"(Dictybase, Phenotypes)",0.0298,3
6,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0425,2
7,DNA binding,"(GO, molecular_function)",0.0528,4
8,decreased spore viability,"(Dictybase, Phenotypes)",0.0697,2
9,cytoskeleton organization,"(GO, biological_process)",0.0864,3


***  C13 selected: 49 with EID: 49
Ratio of genes annotated with a gene set in reference 0.32 and query 0.94
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 35 out of 46 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",1.94e-52,29
1,peptidase activity,"(GO, molecular_function)",4.64e-22,21
2,catabolic process,"(GO, biological_process)",6.21e-18,25


***  C14 selected: 43 with EID: 43
Ratio of genes annotated with a gene set in reference 0.32 and query 0.51
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 22 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytoskeletal protein binding,"(GO, molecular_function)",8.28e-09,11
1,cellular component assembly,"(GO, biological_process)",0.000128,8
2,protein-containing complex assembly,"(GO, biological_process)",0.000128,7
3,cytoskeleton organization,"(GO, biological_process)",0.000128,8
4,Endocytosis,"(KEGG, Pathways)",0.000444,5
5,cytoskeleton,"(GO, cellular_component)",0.000444,7
6,response to stress,"(GO, biological_process)",0.0182,7
7,cytosol,"(GO, cellular_component)",0.0316,5
8,cell morphogenesis,"(GO, biological_process)",0.0462,2
9,decreased cell motility,"(Dictybase, Phenotypes)",0.0484,3


***  C15 selected: 38 with EID: 38
Ratio of genes annotated with a gene set in reference 0.32 and query 0.5
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 19 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",8.25e-13,13
1,chromosome segregation,"(GO, biological_process)",1.81e-09,6
2,mitotic nuclear division,"(GO, biological_process)",5.5e-08,5
3,chromosome organization,"(GO, biological_process)",3.03e-07,7
4,mitotic cell cycle,"(GO, biological_process)",1.66e-06,7
5,DNA replication,"(KEGG, Pathways)",2.06e-06,5
6,chromosome,"(GO, cellular_component)",2.75e-06,6
7,cell division,"(GO, biological_process)",2.75e-06,7
8,cytoskeleton,"(GO, cellular_component)",5.99e-05,7
9,microtubule organizing center,"(GO, cellular_component)",0.00617,3


***  C16 selected: 35 with EID: 35
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 14 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant CRAC localization to the plasma membrane,"(Dictybase, Phenotypes)",0.0168,2
1,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0366,2
2,cell death,"(GO, biological_process)",0.0488,2
3,anatomical structure formation involved in mor...,"(GO, biological_process)",0.0488,3
4,RNA degradation,"(KEGG, Pathways)",0.0598,2
5,aberrant aggregation,"(Dictybase, Phenotypes)",0.0598,2
6,decreased chemotaxis to cAMP,"(Dictybase, Phenotypes)",0.0598,2
7,anatomical structure development,"(GO, biological_process)",0.0598,5
8,cell differentiation,"(GO, biological_process)",0.0598,3
9,kinase activity,"(GO, molecular_function)",0.066,4


***  C17 selected: 31 with EID: 31
Ratio of genes annotated with a gene set in reference 0.32 and query 0.26
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 2 out of 8 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,oxidoreductase activity,"(GO, molecular_function)",0.231,2


***  C18 selected: 31 with EID: 31
Ratio of genes annotated with a gene set in reference 0.32 and query 0.71
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 22 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",3.09e-15,12
1,nucleolus,"(GO, cellular_component)",2.23e-13,11
2,ribosome biogenesis,"(GO, biological_process)",1.47e-12,10
3,ribonucleoprotein complex assembly,"(GO, biological_process)",1.76e-05,4
4,RNA binding,"(GO, molecular_function)",0.000985,6
5,rRNA binding,"(GO, molecular_function)",0.00786,2
6,protein-containing complex assembly,"(GO, biological_process)",0.0221,4
7,cellular component assembly,"(GO, biological_process)",0.0678,4
8,helicase activity,"(GO, molecular_function)",0.0754,2
9,methyltransferase activity,"(GO, molecular_function)",0.0806,2


***  C19 selected: 20 with EID: 20
Ratio of genes annotated with a gene set in reference 0.32 and query 0.9
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 18 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,ribosome,"(GO, cellular_component)",1.92e-26,17
1,Ribosome,"(KEGG, Pathways)",2.1e-26,17
2,structural constituent of ribosome,"(GO, molecular_function)",1.65e-22,15
3,structural molecule activity,"(GO, molecular_function)",2.13e-20,15
4,translation,"(GO, biological_process)",3.87e-16,14
5,rRNA binding,"(GO, molecular_function)",8.21e-05,3
6,RNA binding,"(GO, molecular_function)",0.0023,5
7,cytosol,"(GO, cellular_component)",0.00453,5
8,ribosome biogenesis,"(GO, biological_process)",0.006,3
9,delayed development,"(Dictybase, Phenotypes)",0.083,2


***  C20 selected: 1 with EID: 1
Ratio of genes annotated with a gene set in reference 0.32 and query 0.0
Enrichment at FDR: 0.25 and min query - gene set overlap 2


UnboundLocalError: local variable 'query_in_enriched' referenced before assignment