# Enrichment of regulons from Regulons_by_strain

In [1]:
import os
import sys
from importlib import reload

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.insert(0,module_path)

import pandas as pd
from IPython.display import display
pd.set_option('max_colwidth', 70)
pd.set_option('display.max_rows', 500)
import string

from orangecontrib.bioinformatics.geneset.__init__ import list_all

import jupyter_functions as jf
reload(jf)
import jupyter_functions as jf
from functionsDENet import loadPickle, savePickle
import stages_DE.enrichment_library as enr
reload(enr)
import stages_DE.enrichment_library as enr

In [2]:
#Path for expression data (mergedGenes_RPKUM.tsv)
dataPath='/home/karin/Documents/timeTrajectories/data/RPKUM/combined/'
#Path for regulons
pathRegulons='/home/karin/Documents/timeTrajectories/data/regulons/by_strain/kN300_mean0std1_log/'

## Used gene sets
Use gene sets that do not have less than 5 or more than 500 genes. Use all ontologies.

In [18]:
# Get all gene sets
gene_sets=list(list_all(organism='44689'))
GENE_SETS_ONTOLOGY=enr.get_gene_sets(gene_set_names=gene_sets, go_slims=True,set_sizes=(5,500))

In [13]:
# For conversion of gene names to EID
genes = pd.read_csv(dataPath + 'mergedGenes_RPKUM.tsv', sep='\t', index_col=0)
all_gene_names= genes[(genes != 0).any(axis=1)].index
NAME_EID=enr.name_genes_entrez(gene_names=all_gene_names, key_entrez=False)
ALL_GENE_NAMES_EID=enr.convert_EID(genes=all_gene_names, name_EID=NAME_EID)

In [14]:
def group_diff_enrichment(data:pd.DataFrame,group:str,padj:float=0.25,min_overlap:int=None,
                          use_annotated_genes:bool=False):
    #Displays only gene sets that have overlap with query greater or equal to min_overlap 
    #For p value and padj calculation uses alll that have overlap >=1 } from gene_set_enrichment
    """
    :param use_annotated_genes: if True use for reference and query  only genes that have at 
    least one gene set annotation
    """
    selected=list(data[data['Cluster']==group].index)
    query_EID=enr.convert_EID(genes=selected, name_EID=NAME_EID)
    print('***  '+group+' selected:',len(selected),'with EID:',len(query_EID))
    
    reference_gene_eids=ALL_GENE_NAMES_EID.copy()
    query_eids=query_EID.copy()
    
    if use_annotated_genes:
        gene_sets_genes=set()
        for gene_set_name, gene_sets in GENE_SETS_ONTOLOGY.items():
            for gene_set in gene_sets:
                gene_sets_genes.update(gene_set.genes)
        reference_gene_eids=set(reference_gene_eids) & gene_sets_genes
        query_eids=set(query_eids) & gene_sets_genes
        print('Ratio of genes annotated with a gene set in reference',
              round(len(reference_gene_eids)/len(ALL_GENE_NAMES_EID),2),
             'and query',round(len(query_eids)/len(query_EID),2))
    
    query_in_enriched=set()
    result=None
    if len(query_eids) > 0:
        enrichment=enr.gene_set_enrichment(query_eids, reference_EID=reference_gene_eids, 
                                                padj_threshold=padj,min_overlap=min_overlap,
                                                gene_sets_ontology=GENE_SETS_ONTOLOGY)
        if len(enrichment)>0:
            enrichment_display=list()
            enrichment= sorted(enrichment, key=lambda data: data.padj)
            for enriched in enrichment:
                query_in_enriched.update(enriched.gene_set.genes & query_eids)
                enrichment_display.append({'Gene set':enriched.gene_set.name,'Ontology':enriched.ontology,
                                           'FDR':"{:.2e}".format(enriched.padj),'N in query':enriched.in_query})
            result=pd.DataFrame(enrichment_display)
    print('Enrichment at FDR: '+str(padj)+' and min query - gene set overlap',str(min_overlap))
    print('N query genes in displayed gene sets:',len(query_in_enriched),'out of', len(query_eids),
          'query genes used for enrichment calculation.')
    display(result)
    print('\n')
    return result

## Enrichment reports

In [16]:
save_enrichment=False

### Enrichment for expression_minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30.pdf

In [15]:
regulons=pd.read_table(pathRegulons+'mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30.tab',index_col=0)
for group in range(1,len(regulons['Cluster'].unique())+1):
    enriched=group_diff_enrichment(data=regulons,group='C'+str(group),min_overlap=2)
    if save_enrichment and enriched is not None:
        enriched.to_csv(pathRegulons+'enrichment/minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30_enrichment_cluster'+str(group)+'.tsv', sep='\t',index=False)

***  C1 selected: 30 with EID: 30
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 30 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",3.61e-19,12
1,nucleolus,"(GO, cellular_component)",4.81e-17,11
2,ribosome biogenesis,"(GO, biological_process)",5.97e-16,10
3,ribonucleoprotein complex assembly,"(GO, biological_process)",3.39e-07,4
4,RNA binding,"(GO, molecular_function)",8.74e-06,6
5,protein-containing complex assembly,"(GO, biological_process)",0.000759,4
6,rRNA binding,"(GO, molecular_function)",0.001,2
7,cellular component assembly,"(GO, biological_process)",0.00269,4
8,helicase activity,"(GO, molecular_function)",0.012,2
9,methyltransferase activity,"(GO, molecular_function)",0.013,2




***  C2 selected: 66 with EID: 65
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 65 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Oxidative phosphorylation,"(KEGG, Pathways)",6.91e-10,9
1,Metabolic pathways,"(KEGG, Pathways)",3.34e-08,19
2,mitochondrion,"(GO, cellular_component)",8.58e-05,9
3,Citrate cycle (TCA cycle),"(KEGG, Pathways)",0.000386,4
4,Phagosome,"(KEGG, Pathways)",0.0028,4
5,generation of precursor metabolites and energy,"(GO, biological_process)",0.0106,4
6,cytoplasmic vesicle,"(GO, cellular_component)",0.0106,6
7,protein folding,"(GO, biological_process)",0.0106,3
8,aberrant cellular response to stress,"(Dictybase, Phenotypes)",0.0144,2
9,Ribosome,"(KEGG, Pathways)",0.015,4




***  C3 selected: 68 with EID: 68
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 62 out of 68 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",6.409999999999999e-126,60
1,ribosome,"(GO, cellular_component)",1.4700000000000001e-124,59
2,structural constituent of ribosome,"(GO, molecular_function)",8.17e-109,53
3,structural molecule activity,"(GO, molecular_function)",1.72e-98,53
4,translation,"(GO, biological_process)",1.79e-85,52
5,RNA binding,"(GO, molecular_function)",3.26e-15,16
6,rRNA binding,"(GO, molecular_function)",8.86e-13,7
7,cytosol,"(GO, cellular_component)",6.12e-05,8
8,ribosome biogenesis,"(GO, biological_process)",0.0168,3
9,delayed development,"(Dictybase, Phenotypes)",0.183,2




***  C4 selected: 55 with EID: 55
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 21 out of 55 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytoskeletal protein binding,"(GO, molecular_function)",6.35e-08,10
1,Endocytosis,"(KEGG, Pathways)",0.000398,5
2,protein-containing complex assembly,"(GO, biological_process)",0.00203,5
3,cytoskeleton organization,"(GO, biological_process)",0.00203,6
4,cellular component assembly,"(GO, biological_process)",0.00203,6
5,plasma membrane,"(GO, cellular_component)",0.00203,7
6,response to stress,"(GO, biological_process)",0.00571,7
7,cytoskeleton,"(GO, cellular_component)",0.0068,5
8,cytoplasmic vesicle,"(GO, cellular_component)",0.0522,4
9,cytosol,"(GO, cellular_component)",0.0522,4




***  C5 selected: 41 with EID: 41
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 38 out of 41 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",2.8e-69,29
1,peptidase activity,"(GO, molecular_function)",7.5499999999999995e-34,21
2,catabolic process,"(GO, biological_process)",1.6e-31,25
3,response to stress,"(GO, biological_process)",0.0373,5
4,enzyme regulator activity,"(GO, molecular_function)",0.0373,3
5,ATPase activity,"(GO, molecular_function)",0.0815,3
6,Protein processing in endoplasmic reticulum,"(KEGG, Pathways)",0.135,2




***  C6 selected: 74 with EID: 74
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 24 out of 74 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",4.37e-06,15
1,cell adhesion,"(GO, biological_process)",0.00792,5
2,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.00848,3
3,extracellular region,"(GO, cellular_component)",0.0102,6
4,Glycosaminoglycan degradation,"(KEGG, Pathways)",0.0122,2
5,signal transduction,"(GO, biological_process)",0.0147,8
6,decreased aggregate size,"(Dictybase, Phenotypes)",0.0147,3
7,increased slug migration,"(Dictybase, Phenotypes)",0.0147,2
8,decreased fruiting body size,"(Dictybase, Phenotypes)",0.0147,4
9,response to stress,"(GO, biological_process)",0.0153,8




***  C7 selected: 40 with EID: 40
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 40 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant CRAC localization to the plasma membrane,"(Dictybase, Phenotypes)",0.0149,2
1,decreased intracellular cAMP level,"(Dictybase, Phenotypes)",0.0162,2
2,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0162,2
3,translucent sorus,"(Dictybase, Phenotypes)",0.0162,2
4,anatomical structure development,"(GO, biological_process)",0.0318,6
5,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0318,3
6,cell death,"(GO, biological_process)",0.0318,2
7,kinase activity,"(GO, molecular_function)",0.0347,5
8,cell differentiation,"(GO, biological_process)",0.0478,3
9,RNA degradation,"(KEGG, Pathways)",0.0478,2




***  C8 selected: 23 with EID: 23
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 3 out of 23 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,kinase activity,"(GO, molecular_function)",0.11,3




***  C9 selected: 20 with EID: 20
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 20 query genes used for enrichment calculation.


None



***  C10 selected: 36 with EID: 36
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 36 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",2.14e-14,13
1,chromosome segregation,"(GO, biological_process)",1.37e-10,6
2,mitotic nuclear division,"(GO, biological_process)",5.9e-09,5
3,mitotic cell cycle,"(GO, biological_process)",1.21e-07,7
4,cell division,"(GO, biological_process)",2.24e-07,7
5,DNA replication,"(KEGG, Pathways)",2.24e-07,5
6,chromosome,"(GO, cellular_component)",2.48e-07,6
7,chromosome organization,"(GO, biological_process)",3.04e-07,6
8,cytoskeleton,"(GO, cellular_component)",3.71e-06,7
9,cytoskeleton organization,"(GO, biological_process)",0.000923,5




***  C11 selected: 77 with EID: 77
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 77 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.00285,7
1,anatomical structure development,"(GO, biological_process)",0.113,7
2,cell adhesion,"(GO, biological_process)",0.113,3
3,Starch and sucrose metabolism,"(KEGG, Pathways)",0.113,2
4,decreased sporulation,"(Dictybase, Phenotypes)",0.151,2
5,development arrests at mound stage,"(Dictybase, Phenotypes)",0.151,2
6,abolished culmination,"(Dictybase, Phenotypes)",0.173,2
7,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.209,2
8,delayed aggregation,"(Dictybase, Phenotypes)",0.209,2




***  C12 selected: 57 with EID: 57
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 57 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000344,7
1,cell-cell signaling,"(GO, biological_process)",0.0567,2
2,decreased spore viability,"(Dictybase, Phenotypes)",0.187,2
3,peptidase activity,"(GO, molecular_function)",0.207,2




***  C13 selected: 64 with EID: 64
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 4 out of 64 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.00446,2
1,aberrant fruiting body morphology,"(Dictybase, Phenotypes)",0.195,2




***  C14 selected: 19 with EID: 19
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 19 query genes used for enrichment calculation.


None



***  C15 selected: 91 with EID: 91
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 91 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",8.23e-11,7
1,external encapsulating structure,"(GO, cellular_component)",8.23e-11,7
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",4.29e-09,6
3,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",8.53e-08,9
4,cell wall organization or biogenesis,"(GO, biological_process)",1.17e-06,5
5,cell differentiation,"(GO, biological_process)",1.85e-06,10
6,anatomical structure development,"(GO, biological_process)",0.000277,13
7,abolished cellulose binding,"(Dictybase, Phenotypes)",0.000713,2
8,Starch and sucrose metabolism,"(KEGG, Pathways)",0.000881,4
9,extracellular region,"(GO, cellular_component)",0.0106,6




***  C16 selected: 74 with EID: 74
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 8 out of 74 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000149,8




***  C17 selected: 61 with EID: 59
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 59 query genes used for enrichment calculation.


None



***  C18 selected: 46 with EID: 45
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 45 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000479,6
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0282,3
2,cell differentiation,"(GO, biological_process)",0.121,3
3,oxidoreductase activity,"(GO, molecular_function)",0.152,3




***  C19 selected: 58 with EID: 58
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 58 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell adhesion,"(GO, biological_process)",0.0813,3
1,extracellular region,"(GO, cellular_component)",0.0813,4
2,hydrolase activity,"(GO, molecular_function)",0.123,2
3,cell differentiation,"(GO, biological_process)",0.159,3
4,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.176,2




***  C20 selected: 54 with EID: 54
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 4 out of 54 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Phagosome,"(KEGG, Pathways)",0.242,2
1,decreased spore viability,"(Dictybase, Phenotypes)",0.242,2




***  C21 selected: 45 with EID: 45
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 45 query genes used for enrichment calculation.


None





#### Using only genes annotated with at least one gene set 
To find genes that have gene set annotations only gene sets used in enrichment are used (see above). Genes that are in at least one of these gene sets are used for enrichment calculation, both for reference and gene group of interest (query).

In [19]:
regulons=pd.read_table(pathRegulons+'mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30.tab',index_col=0)
for group in range(1,len(regulons['Cluster'].unique())+1):
    enriched=group_diff_enrichment(data=regulons,group='C'+str(group),min_overlap=2,use_annotated_genes=True)
    if  save_enrichment and enriched is not None:
        enriched.to_csv(pathRegulons+'enrichment/minExpressed0.990.1Strains1Min1Max18_clustersLouvain0.4minmaxNologPCA30kN30_annotatedOnly_enrichment_cluster'+str(group)+'.tsv', sep='\t',index=False)

***  C1 selected: 30 with EID: 30
Ratio of genes annotated with a gene set in reference 0.32 and query 0.7
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 21 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",1.07e-15,12
1,nucleolus,"(GO, cellular_component)",8.54e-14,11
2,ribosome biogenesis,"(GO, biological_process)",6.14e-13,10
3,ribonucleoprotein complex assembly,"(GO, biological_process)",7e-06,4
4,RNA binding,"(GO, molecular_function)",0.000582,6
5,rRNA binding,"(GO, molecular_function)",0.00545,2
6,protein-containing complex assembly,"(GO, biological_process)",0.0109,4
7,cellular component assembly,"(GO, biological_process)",0.0399,4
8,helicase activity,"(GO, molecular_function)",0.0526,2
9,methyltransferase activity,"(GO, molecular_function)",0.0562,2




***  C2 selected: 66 with EID: 65
Ratio of genes annotated with a gene set in reference 0.32 and query 0.58
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 38 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Oxidative phosphorylation,"(KEGG, Pathways)",9.24e-08,9
1,Metabolic pathways,"(KEGG, Pathways)",0.00011,19
2,Citrate cycle (TCA cycle),"(KEGG, Pathways)",0.00502,4
3,mitochondrion,"(GO, cellular_component)",0.00537,9
4,Phagosome,"(KEGG, Pathways)",0.0256,4
5,protein folding,"(GO, biological_process)",0.0655,3
6,aberrant cellular response to stress,"(Dictybase, Phenotypes)",0.0655,2
7,generation of precursor metabolites and energy,"(GO, biological_process)",0.0672,4
8,cytoplasmic vesicle,"(GO, cellular_component)",0.129,6
9,Ribosome,"(KEGG, Pathways)",0.129,4




***  C3 selected: 68 with EID: 68
Ratio of genes annotated with a gene set in reference 0.32 and query 0.97
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 62 out of 66 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",6.26e-98,60
1,ribosome,"(GO, cellular_component)",5.8e-97,59
2,structural constituent of ribosome,"(GO, molecular_function)",9.05e-84,53
3,structural molecule activity,"(GO, molecular_function)",1.7800000000000002e-73,53
4,translation,"(GO, biological_process)",5.8499999999999995e-61,52
5,rRNA binding,"(GO, molecular_function)",2.35e-09,7
6,RNA binding,"(GO, molecular_function)",4.1e-08,16
7,cytosol,"(GO, cellular_component)",0.0786,8




***  C4 selected: 55 with EID: 55
Ratio of genes annotated with a gene set in reference 0.32 and query 0.44
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 24 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytoskeletal protein binding,"(GO, molecular_function)",3.97e-07,10
1,Endocytosis,"(KEGG, Pathways)",0.00141,5
2,plasma membrane,"(GO, cellular_component)",0.0069,7
3,cytoskeleton organization,"(GO, biological_process)",0.0069,6
4,cellular component assembly,"(GO, biological_process)",0.0069,6
5,protein-containing complex assembly,"(GO, biological_process)",0.0069,5
6,cytoskeleton,"(GO, cellular_component)",0.0223,5
7,response to stress,"(GO, biological_process)",0.0223,7
8,cytosol,"(GO, cellular_component)",0.13,4
9,cytoplasmic vesicle,"(GO, cellular_component)",0.133,4




***  C5 selected: 41 with EID: 41
Ratio of genes annotated with a gene set in reference 0.32 and query 0.98
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 35 out of 40 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",2.15e-55,29
1,peptidase activity,"(GO, molecular_function)",8.13e-24,21
2,catabolic process,"(GO, biological_process)",9.51e-20,25




***  C6 selected: 74 with EID: 74
Ratio of genes annotated with a gene set in reference 0.32 and query 0.42
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 24 out of 31 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",2.55e-05,15
1,cell adhesion,"(GO, biological_process)",0.0178,5
2,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.0178,3
3,Glycosaminoglycan degradation,"(KEGG, Pathways)",0.0256,2
4,extracellular region,"(GO, cellular_component)",0.0273,6
5,decreased aggregate size,"(Dictybase, Phenotypes)",0.0312,3
6,increased slug migration,"(Dictybase, Phenotypes)",0.0312,2
7,decreased fruiting body size,"(Dictybase, Phenotypes)",0.0376,4
8,signal transduction,"(GO, biological_process)",0.0592,8
9,response to stress,"(GO, biological_process)",0.0612,8




***  C7 selected: 40 with EID: 40
Ratio of genes annotated with a gene set in reference 0.32 and query 0.38
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 15 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant CRAC localization to the plasma membrane,"(Dictybase, Phenotypes)",0.0198,2
1,translucent sorus,"(Dictybase, Phenotypes)",0.0215,2
2,decreased intracellular cAMP level,"(Dictybase, Phenotypes)",0.0215,2
3,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0215,2
4,anatomical structure development,"(GO, biological_process)",0.0379,6
5,cell death,"(GO, biological_process)",0.0379,2
6,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0379,3
7,kinase activity,"(GO, molecular_function)",0.0511,5
8,RNA degradation,"(KEGG, Pathways)",0.0562,2
9,cell differentiation,"(GO, biological_process)",0.0562,3




***  C8 selected: 23 with EID: 23
Ratio of genes annotated with a gene set in reference 0.32 and query 0.26
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 3 out of 6 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,kinase activity,"(GO, molecular_function)",0.047,3




***  C9 selected: 20 with EID: 20
Ratio of genes annotated with a gene set in reference 0.32 and query 0.0
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 0 query genes used for enrichment calculation.


None



***  C10 selected: 36 with EID: 36
Ratio of genes annotated with a gene set in reference 0.32 and query 0.53
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 19 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",7.55e-13,13
1,chromosome segregation,"(GO, biological_process)",1.8e-09,6
2,mitotic nuclear division,"(GO, biological_process)",5.47e-08,5
3,mitotic cell cycle,"(GO, biological_process)",2.07e-06,7
4,DNA replication,"(KEGG, Pathways)",2.46e-06,5
5,chromosome,"(GO, cellular_component)",3.13e-06,6
6,cell division,"(GO, biological_process)",3.13e-06,7
7,chromosome organization,"(GO, biological_process)",3.83e-06,6
8,cytoskeleton,"(GO, cellular_component)",5.96e-05,7
9,microtubule organizing center,"(GO, cellular_component)",0.00616,3




***  C11 selected: 77 with EID: 77
Ratio of genes annotated with a gene set in reference 0.32 and query 0.29
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 15 out of 22 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.000753,7
1,anatomical structure development,"(GO, biological_process)",0.0852,7
2,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0979,2
3,cell adhesion,"(GO, biological_process)",0.0979,3
4,decreased sporulation,"(Dictybase, Phenotypes)",0.126,2
5,development arrests at mound stage,"(Dictybase, Phenotypes)",0.126,2
6,abolished culmination,"(Dictybase, Phenotypes)",0.145,2
7,delayed aggregation,"(Dictybase, Phenotypes)",0.177,2
8,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.179,2
9,peptidase activity,"(GO, molecular_function)",0.222,2




***  C12 selected: 57 with EID: 57
Ratio of genes annotated with a gene set in reference 0.32 and query 0.25
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 11 out of 14 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",1.8e-05,7
1,cell-cell signaling,"(GO, biological_process)",0.0322,2
2,decreased spore viability,"(Dictybase, Phenotypes)",0.12,2
3,peptidase activity,"(GO, molecular_function)",0.142,2
4,plasma membrane,"(GO, cellular_component)",0.171,3
5,anatomical structure development,"(GO, biological_process)",0.171,4




***  C13 selected: 64 with EID: 64
Ratio of genes annotated with a gene set in reference 0.32 and query 0.16
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 6 out of 10 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.000984,2
1,anatomical structure development,"(GO, biological_process)",0.0571,4
2,aberrant fruiting body morphology,"(Dictybase, Phenotypes)",0.0589,2
3,extracellular region,"(GO, cellular_component)",0.0981,2




***  C14 selected: 19 with EID: 19
Ratio of genes annotated with a gene set in reference 0.32 and query 0.05
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 1 query genes used for enrichment calculation.


None



***  C15 selected: 91 with EID: 91
Ratio of genes annotated with a gene set in reference 0.32 and query 0.45
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 41 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",6.78e-10,7
1,external encapsulating structure,"(GO, cellular_component)",6.78e-10,7
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",2.73e-08,6
3,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",1.03e-06,9
4,cell wall organization or biogenesis,"(GO, biological_process)",5.63e-06,5
5,cell differentiation,"(GO, biological_process)",2.52e-05,10
6,abolished cellulose binding,"(Dictybase, Phenotypes)",0.0016,2
7,Starch and sucrose metabolism,"(KEGG, Pathways)",0.00345,4
8,anatomical structure development,"(GO, biological_process)",0.00372,13
9,abolished stalk cell differentiation,"(Dictybase, Phenotypes)",0.0228,2




***  C16 selected: 74 with EID: 74
Ratio of genes annotated with a gene set in reference 0.32 and query 0.23
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 15 out of 17 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",3.37e-06,8
1,wild type,"(Dictybase, Phenotypes)",0.138,5
2,decreased gene expression,"(Dictybase, Phenotypes)",0.202,2
3,decreased spore viability,"(Dictybase, Phenotypes)",0.202,2
4,DNA binding,"(GO, molecular_function)",0.211,3




***  C17 selected: 61 with EID: 59
Ratio of genes annotated with a gene set in reference 0.32 and query 0.2
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 4 out of 12 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",0.234,4




***  C18 selected: 46 with EID: 45
Ratio of genes annotated with a gene set in reference 0.32 and query 0.33
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 15 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000313,6
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0284,3
2,cell differentiation,"(GO, biological_process)",0.123,3
3,oxidoreductase activity,"(GO, molecular_function)",0.159,3




***  C19 selected: 58 with EID: 58
Ratio of genes annotated with a gene set in reference 0.32 and query 0.21
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 12 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.0226,4
1,cell adhesion,"(GO, biological_process)",0.0226,3
2,cell differentiation,"(GO, biological_process)",0.0439,3
3,hydrolase activity,"(GO, molecular_function)",0.0439,2
4,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0883,2




***  C20 selected: 54 with EID: 54
Ratio of genes annotated with a gene set in reference 0.32 and query 0.2
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 6 out of 11 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Phagosome,"(KEGG, Pathways)",0.155,2
1,decreased spore viability,"(Dictybase, Phenotypes)",0.155,2
2,extracellular region,"(GO, cellular_component)",0.244,2




***  C21 selected: 45 with EID: 45
Ratio of genes annotated with a gene set in reference 0.32 and query 0.13
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 3 out of 6 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytosol,"(GO, cellular_component)",0.224,2
1,response to stress,"(GO, biological_process)",0.224,2






### Enrichment for mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log.tab

In [13]:
regulons=pd.read_table(pathRegulons+'mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log.tab',index_col=0)
for group in range(1,len(regulons['Cluster'].unique())+1):
    cluster='C'+str(group)
    print('*** Cluster %s (%s)' %(cluster,string.ascii_uppercase[group-1]))
    
    enriched=group_diff_enrichment(data=regulons,group=cluster,min_overlap=2)
    if  save_enrichment and enriched is not None:
        enriched.to_csv(pathRegulons+'enrichment/minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log_enrichment_cluster'+str(group)+'.tsv', sep='\t',index=False)

*** Cluster C1 (A)
***  C1 selected: 31 with EID: 31
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 31 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",7.7e-19,12
1,nucleolus,"(GO, cellular_component)",9.74e-17,11
2,ribosome biogenesis,"(GO, biological_process)",1.15e-15,10
3,ribonucleoprotein complex assembly,"(GO, biological_process)",5.11e-07,4
4,RNA binding,"(GO, molecular_function)",1.41e-05,6
5,protein-containing complex assembly,"(GO, biological_process)",0.00113,4
6,rRNA binding,"(GO, molecular_function)",0.00141,2
7,cellular component assembly,"(GO, biological_process)",0.004,4
8,helicase activity,"(GO, molecular_function)",0.0168,2
9,methyltransferase activity,"(GO, molecular_function)",0.0182,2




*** Cluster C2 (B)
***  C2 selected: 54 with EID: 54
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 26 out of 54 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Oxidative phosphorylation,"(KEGG, Pathways)",8.06e-08,7
1,mitochondrion,"(GO, cellular_component)",8.06e-08,11
2,Metabolic pathways,"(KEGG, Pathways)",1.08e-05,14
3,generation of precursor metabolites and energy,"(GO, biological_process)",1.18e-05,6
4,protein folding,"(GO, biological_process)",0.000187,4
5,unfolded protein binding,"(GO, molecular_function)",0.00172,3
6,oxidoreductase activity,"(GO, molecular_function)",0.00188,6
7,Phagosome,"(KEGG, Pathways)",0.00903,3
8,Citrate cycle (TCA cycle),"(KEGG, Pathways)",0.0382,2
9,Propanoate metabolism,"(KEGG, Pathways)",0.0382,2




*** Cluster C3 (C)
***  C3 selected: 54 with EID: 54
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 44 out of 54 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",1.13e-86,44
1,ribosome,"(GO, cellular_component)",4.68e-85,43
2,structural constituent of ribosome,"(GO, molecular_function)",8.59e-76,39
3,structural molecule activity,"(GO, molecular_function)",4.2499999999999997e-69,39
4,translation,"(GO, biological_process)",6.01e-62,39
5,RNA binding,"(GO, molecular_function)",4.62e-10,11
6,rRNA binding,"(GO, molecular_function)",6.82e-07,4




*** Cluster C4 (D)
***  C4 selected: 43 with EID: 43
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 21 out of 43 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytoskeletal protein binding,"(GO, molecular_function)",2.55e-10,11
1,protein-containing complex assembly,"(GO, biological_process)",5.66e-06,7
2,cytoskeleton organization,"(GO, biological_process)",5.66e-06,8
3,cellular component assembly,"(GO, biological_process)",5.66e-06,8
4,plasma membrane,"(GO, cellular_component)",3.29e-05,8
5,cytoskeleton,"(GO, cellular_component)",3.29e-05,7
6,Endocytosis,"(KEGG, Pathways)",4.91e-05,5
7,response to stress,"(GO, biological_process)",0.00158,7
8,cytosol,"(GO, cellular_component)",0.00436,5
9,decreased cell motility,"(Dictybase, Phenotypes)",0.0137,3




*** Cluster C5 (E)
***  C5 selected: 20 with EID: 20
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 20 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,ribosome,"(GO, cellular_component)",4.29e-33,17
1,Ribosome,"(KEGG, Pathways)",4.7e-33,17
2,structural constituent of ribosome,"(GO, molecular_function)",1.1199999999999999e-28,15
3,structural molecule activity,"(GO, molecular_function)",1.4600000000000002e-26,15
4,translation,"(GO, biological_process)",7.52e-22,14
5,rRNA binding,"(GO, molecular_function)",3.78e-06,3
6,RNA binding,"(GO, molecular_function)",1.87e-05,5
7,cytosol,"(GO, cellular_component)",4e-05,5
8,ribosome biogenesis,"(GO, biological_process)",0.000302,3
9,delayed development,"(Dictybase, Phenotypes)",0.0121,2




*** Cluster C6 (F)
***  C6 selected: 49 with EID: 49
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 42 out of 49 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",1.18e-65,29
1,peptidase activity,"(GO, molecular_function)",1.24e-31,21
2,catabolic process,"(GO, biological_process)",1e-28,25
3,Protein processing in endoplasmic reticulum,"(KEGG, Pathways)",0.054,3
4,enzyme regulator activity,"(GO, molecular_function)",0.055,3
5,response to stress,"(GO, biological_process)",0.0789,5
6,ATPase activity,"(GO, molecular_function)",0.139,3
7,cytoplasmic vesicle,"(GO, cellular_component)",0.194,3
8,endoplasmic reticulum,"(GO, cellular_component)",0.219,2




*** Cluster C7 (G)
***  C7 selected: 96 with EID: 96
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 30 out of 96 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",2.92e-05,16
1,cell differentiation,"(GO, biological_process)",0.00827,7
2,signal transduction,"(GO, biological_process)",0.0147,10
3,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.0147,3
4,Glycosaminoglycan degradation,"(KEGG, Pathways)",0.0218,2
5,extracellular region,"(GO, cellular_component)",0.0247,6
6,aberrant aggregation,"(Dictybase, Phenotypes)",0.0247,4
7,increased slug migration,"(Dictybase, Phenotypes)",0.0252,2
8,decreased aggregate size,"(Dictybase, Phenotypes)",0.0265,3
9,decreased fruiting body size,"(Dictybase, Phenotypes)",0.034,4




*** Cluster C8 (H)
***  C8 selected: 35 with EID: 35
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 35 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant CRAC localization to the plasma membrane,"(Dictybase, Phenotypes)",0.0113,2
1,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0246,2
2,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0382,3
3,cell death,"(GO, biological_process)",0.0393,2
4,anatomical structure development,"(GO, biological_process)",0.0428,5
5,cell differentiation,"(GO, biological_process)",0.0428,3
6,RNA degradation,"(KEGG, Pathways)",0.0428,2
7,kinase activity,"(GO, molecular_function)",0.0428,4
8,aberrant aggregation,"(Dictybase, Phenotypes)",0.0428,2
9,decreased chemotaxis to cAMP,"(Dictybase, Phenotypes)",0.0428,2




*** Cluster C9 (I)
***  C9 selected: 38 with EID: 38
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 38 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",4.9e-14,13
1,chromosome segregation,"(GO, biological_process)",1.94e-10,6
2,mitotic nuclear division,"(GO, biological_process)",7.84e-09,5
3,mitotic cell cycle,"(GO, biological_process)",1.81e-07,7
4,cell division,"(GO, biological_process)",3.2e-07,7
5,DNA replication,"(KEGG, Pathways)",3.2e-07,5
6,chromosome,"(GO, cellular_component)",3.48e-07,6
7,chromosome organization,"(GO, biological_process)",4.26e-07,6
8,cytoskeleton,"(GO, cellular_component)",5.47e-06,7
9,cytoskeleton organization,"(GO, biological_process)",0.0012,5




*** Cluster C10 (J)
***  C10 selected: 70 with EID: 70
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 70 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.151,4
1,kinase activity,"(GO, molecular_function)",0.151,6
2,cell adhesion,"(GO, biological_process)",0.227,2




*** Cluster C11 (K)
***  C11 selected: 85 with EID: 85
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 7 out of 85 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.0066,7




*** Cluster C12 (L)
***  C12 selected: 73 with EID: 73
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 2 out of 73 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell-cell signaling,"(GO, biological_process)",0.055,2




*** Cluster C13 (M)
***  C13 selected: 49 with EID: 49
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 49 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.00188,6
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.00885,4
2,DNA-binding transcription factor activity,"(GO, molecular_function)",0.0119,3
3,anatomical structure development,"(GO, biological_process)",0.0344,7
4,decreased gene expression,"(Dictybase, Phenotypes)",0.0411,3
5,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0522,2
6,DNA binding,"(GO, molecular_function)",0.0638,4
7,decreased spore viability,"(Dictybase, Phenotypes)",0.0752,2
8,generation of precursor metabolites and energy,"(GO, biological_process)",0.0947,2
9,reproduction,"(GO, biological_process)",0.0963,2




*** Cluster C14 (N)
***  C14 selected: 87 with EID: 87
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 36 out of 87 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",7.4e-09,6
1,external encapsulating structure,"(GO, cellular_component)",7.4e-09,6
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",3.43e-07,5
3,cell wall organization or biogenesis,"(GO, biological_process)",9.59e-07,5
4,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",1.45e-05,7
5,anatomical structure development,"(GO, biological_process)",0.000161,13
6,extracellular region,"(GO, cellular_component)",0.000178,8
7,abolished cellulose binding,"(Dictybase, Phenotypes)",0.000534,2
8,cell differentiation,"(GO, biological_process)",0.00069,7
9,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.00422,2




*** Cluster C15 (O)
***  C15 selected: 31 with EID: 31
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 31 query genes used for enrichment calculation.


None



*** Cluster C16 (P)
***  C16 selected: 72 with EID: 72
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 72 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",5.91e-07,10




*** Cluster C17 (Q)
***  C17 selected: 57 with EID: 56
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 6 out of 56 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell adhesion,"(GO, biological_process)",0.0123,4
1,decreased spore viability,"(Dictybase, Phenotypes)",0.18,2




*** Cluster C18 (R)
***  C18 selected: 65 with EID: 63
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 8 out of 63 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",0.0183,2
1,external encapsulating structure,"(GO, cellular_component)",0.0183,2
2,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0402,3
3,hydrolase activity,"(GO, molecular_function)",0.105,2
4,cell differentiation,"(GO, biological_process)",0.12,3
5,lipid binding,"(GO, molecular_function)",0.12,2
6,extracellular region,"(GO, cellular_component)",0.125,3




*** Cluster C19 (S)
***  C19 selected: 89 with EID: 88
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 88 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.00253,7
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.019,4
2,cell differentiation,"(GO, biological_process)",0.102,4
3,Starch and sucrose metabolism,"(KEGG, Pathways)",0.102,2




*** Cluster C20 (T)
***  C20 selected: 1 with EID: 1
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 1 query genes used for enrichment calculation.


None





#### Using only genes annotated with at least one gene set

In [14]:
regulons=pd.read_table(pathRegulons+'mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log.tab',index_col=0)
for group in range(1,len(regulons['Cluster'].unique())+1):
    cluster='C'+str(group)
    print('*** Cluster %s (%s)' %(cluster,string.ascii_uppercase[group-1]))

    enriched=group_diff_enrichment(data=regulons,group=cluster,min_overlap=2,use_annotated_genes=True)
    if save_enrichment and enriched is not None:
        enriched.to_csv(pathRegulons+'enrichment/mergedGenes_minExpressed0.990.1Strains1Min1Max18_clustersAX4Louvain0.4m0s1log.tab'+str(group)+'.tsv', sep='\t',index=False)

*** Cluster C1 (A)
***  C1 selected: 31 with EID: 31
Ratio of genes annotated with a gene set in reference 0.32 and query 0.71
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 22 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",3.06e-15,12
1,nucleolus,"(GO, cellular_component)",2.21e-13,11
2,ribosome biogenesis,"(GO, biological_process)",1.46e-12,10
3,ribonucleoprotein complex assembly,"(GO, biological_process)",1.12e-05,4
4,RNA binding,"(GO, molecular_function)",0.00101,6
5,rRNA binding,"(GO, molecular_function)",0.00785,2
6,protein-containing complex assembly,"(GO, biological_process)",0.017,4
7,cellular component assembly,"(GO, biological_process)",0.0615,4
8,helicase activity,"(GO, molecular_function)",0.0753,2
9,methyltransferase activity,"(GO, molecular_function)",0.0804,2




*** Cluster C2 (B)
***  C2 selected: 54 with EID: 54
Ratio of genes annotated with a gene set in reference 0.32 and query 0.57
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 26 out of 31 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Oxidative phosphorylation,"(KEGG, Pathways)",6.02e-06,7
1,mitochondrion,"(GO, cellular_component)",1.47e-05,11
2,generation of precursor metabolites and energy,"(GO, biological_process)",0.000368,6
3,protein folding,"(GO, biological_process)",0.00208,4
4,Metabolic pathways,"(KEGG, Pathways)",0.00208,14
5,unfolded protein binding,"(GO, molecular_function)",0.00912,3
6,oxidoreductase activity,"(GO, molecular_function)",0.0329,6
7,Phagosome,"(KEGG, Pathways)",0.0452,3
8,Citrate cycle (TCA cycle),"(KEGG, Pathways)",0.122,2
9,Propanoate metabolism,"(KEGG, Pathways)",0.123,2




*** Cluster C3 (C)
***  C3 selected: 54 with EID: 54
Ratio of genes annotated with a gene set in reference 0.32 and query 0.89
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 44 out of 48 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",7.370000000000001e-70,44
1,ribosome,"(GO, cellular_component)",2.11e-68,43
2,structural constituent of ribosome,"(GO, molecular_function)",4.1e-60,39
3,structural molecule activity,"(GO, molecular_function)",1.96e-53,39
4,translation,"(GO, biological_process)",2.61e-46,39
5,RNA binding,"(GO, molecular_function)",1.31e-05,11
6,rRNA binding,"(GO, molecular_function)",3.86e-05,4




*** Cluster C4 (D)
***  C4 selected: 43 with EID: 43
Ratio of genes annotated with a gene set in reference 0.32 and query 0.51
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 20 out of 22 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cytoskeletal protein binding,"(GO, molecular_function)",8.8e-09,11
1,protein-containing complex assembly,"(GO, biological_process)",0.000102,7
2,cytoskeleton organization,"(GO, biological_process)",0.000102,8
3,cellular component assembly,"(GO, biological_process)",0.000102,8
4,Endocytosis,"(KEGG, Pathways)",0.000442,5
5,cytoskeleton,"(GO, cellular_component)",0.000442,7
6,plasma membrane,"(GO, cellular_component)",0.000485,8
7,response to stress,"(GO, biological_process)",0.0183,7
8,cytosol,"(GO, cellular_component)",0.0292,5
9,cell morphogenesis,"(GO, biological_process)",0.0415,2




*** Cluster C5 (E)
***  C5 selected: 20 with EID: 20
Ratio of genes annotated with a gene set in reference 0.32 and query 0.9
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 18 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,ribosome,"(GO, cellular_component)",1.9e-26,17
1,Ribosome,"(KEGG, Pathways)",2.08e-26,17
2,structural constituent of ribosome,"(GO, molecular_function)",1.64e-22,15
3,structural molecule activity,"(GO, molecular_function)",2.1e-20,15
4,translation,"(GO, biological_process)",4.95e-16,14
5,rRNA binding,"(GO, molecular_function)",8.19e-05,3
6,RNA binding,"(GO, molecular_function)",0.00235,5
7,cytosol,"(GO, cellular_component)",0.00472,5
8,ribosome biogenesis,"(GO, biological_process)",0.00599,3
9,delayed development,"(Dictybase, Phenotypes)",0.0851,2




*** Cluster C6 (F)
***  C6 selected: 49 with EID: 49
Ratio of genes annotated with a gene set in reference 0.32 and query 0.94
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 35 out of 46 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",1.89e-52,29
1,peptidase activity,"(GO, molecular_function)",4.56e-22,21
2,catabolic process,"(GO, biological_process)",1.37e-17,25




*** Cluster C7 (G)
***  C7 selected: 96 with EID: 96
Ratio of genes annotated with a gene set in reference 0.32 and query 0.39
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 30 out of 37 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",7.38e-05,16
1,cell differentiation,"(GO, biological_process)",0.0201,7
2,signal transduction,"(GO, biological_process)",0.0313,10
3,Glycosaminoglycan degradation,"(KEGG, Pathways)",0.0313,2
4,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.0313,3
5,aberrant aggregation,"(Dictybase, Phenotypes)",0.0417,4
6,increased slug migration,"(Dictybase, Phenotypes)",0.0417,2
7,extracellular region,"(GO, cellular_component)",0.0434,6
8,decreased aggregate size,"(Dictybase, Phenotypes)",0.0434,3
9,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0603,2




*** Cluster C8 (H)
***  C8 selected: 35 with EID: 35
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 14 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant CRAC localization to the plasma membrane,"(Dictybase, Phenotypes)",0.017,2
1,development arrests at tipped mound stage,"(Dictybase, Phenotypes)",0.0369,2
2,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0493,3
3,cell death,"(GO, biological_process)",0.0493,2
4,anatomical structure development,"(GO, biological_process)",0.0587,5
5,cell differentiation,"(GO, biological_process)",0.0587,3
6,RNA degradation,"(KEGG, Pathways)",0.0587,2
7,aberrant aggregation,"(Dictybase, Phenotypes)",0.0587,2
8,decreased chemotaxis to cAMP,"(Dictybase, Phenotypes)",0.0587,2
9,kinase activity,"(GO, molecular_function)",0.0647,4




*** Cluster C9 (I)
***  C9 selected: 38 with EID: 38
Ratio of genes annotated with a gene set in reference 0.32 and query 0.5
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 18 out of 19 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",7.55e-13,13
1,chromosome segregation,"(GO, biological_process)",1.8e-09,6
2,mitotic nuclear division,"(GO, biological_process)",5.47e-08,5
3,mitotic cell cycle,"(GO, biological_process)",2.07e-06,7
4,DNA replication,"(KEGG, Pathways)",2.46e-06,5
5,cell division,"(GO, biological_process)",3.13e-06,7
6,chromosome,"(GO, cellular_component)",3.13e-06,6
7,chromosome organization,"(GO, biological_process)",3.83e-06,6
8,cytoskeleton,"(GO, cellular_component)",5.96e-05,7
9,microtubule organizing center,"(GO, cellular_component)",0.00616,3




*** Cluster C10 (J)
***  C10 selected: 70 with EID: 70
Ratio of genes annotated with a gene set in reference 0.32 and query 0.26
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 18 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.0602,4
1,kinase activity,"(GO, molecular_function)",0.0602,6
2,cell adhesion,"(GO, biological_process)",0.181,2




*** Cluster C11 (K)
***  C11 selected: 85 with EID: 85
Ratio of genes annotated with a gene set in reference 0.32 and query 0.24
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 7 out of 20 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000458,7




*** Cluster C12 (L)
***  C12 selected: 73 with EID: 73
Ratio of genes annotated with a gene set in reference 0.32 and query 0.11
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 5 out of 8 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell-cell signaling,"(GO, biological_process)",0.00599,2
1,anatomical structure development,"(GO, biological_process)",0.0514,4
2,extracellular region,"(GO, cellular_component)",0.132,2
3,signal transduction,"(GO, biological_process)",0.233,2
4,wild type,"(Dictybase, Phenotypes)",0.233,2




*** Cluster C13 (M)
***  C13 selected: 49 with EID: 49
Ratio of genes annotated with a gene set in reference 0.32 and query 0.31
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 15 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.000739,6
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.00578,4
2,DNA-binding transcription factor activity,"(GO, molecular_function)",0.00924,3
3,anatomical structure development,"(GO, biological_process)",0.0126,7
4,decreased gene expression,"(Dictybase, Phenotypes)",0.0328,3
5,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0463,2
6,DNA binding,"(GO, molecular_function)",0.058,4
7,decreased spore viability,"(Dictybase, Phenotypes)",0.0722,2
8,reproduction,"(GO, biological_process)",0.0849,2
9,generation of precursor metabolites and energy,"(GO, biological_process)",0.0849,2




*** Cluster C14 (N)
***  C14 selected: 87 with EID: 87
Ratio of genes annotated with a gene set in reference 0.32 and query 0.45
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 39 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell wall,"(GO, cellular_component)",4.51e-08,6
1,external encapsulating structure,"(GO, cellular_component)",4.51e-08,6
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",1.6e-06,5
3,cell wall organization or biogenesis,"(GO, biological_process)",4.46e-06,5
4,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.000103,7
5,abolished cellulose binding,"(Dictybase, Phenotypes)",0.00139,2
6,extracellular region,"(GO, cellular_component)",0.00143,8
7,anatomical structure development,"(GO, biological_process)",0.00195,13
8,cell differentiation,"(GO, biological_process)",0.0044,7
9,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.00817,2




*** Cluster C15 (O)
***  C15 selected: 31 with EID: 31
Ratio of genes annotated with a gene set in reference 0.32 and query 0.26
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 2 out of 8 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,oxidoreductase activity,"(GO, molecular_function)",0.231,2




*** Cluster C16 (P)
***  C16 selected: 72 with EID: 72
Ratio of genes annotated with a gene set in reference 0.32 and query 0.25
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 12 out of 18 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",6.66e-09,10
1,wild type,"(Dictybase, Phenotypes)",0.198,5




*** Cluster C17 (Q)
***  C17 selected: 57 with EID: 56
Ratio of genes annotated with a gene set in reference 0.32 and query 0.2
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 6 out of 11 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell adhesion,"(GO, biological_process)",0.00125,4
1,decreased spore viability,"(Dictybase, Phenotypes)",0.112,2




*** Cluster C18 (R)
***  C18 selected: 65 with EID: 63
Ratio of genes annotated with a gene set in reference 0.32 and query 0.14
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 8 out of 9 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.00313,3
1,cell wall,"(GO, cellular_component)",0.00313,2
2,external encapsulating structure,"(GO, cellular_component)",0.00313,2
3,cell differentiation,"(GO, biological_process)",0.0139,3
4,extracellular region,"(GO, cellular_component)",0.0139,3
5,hydrolase activity,"(GO, molecular_function)",0.0139,2
6,lipid binding,"(GO, molecular_function)",0.0202,2
7,anatomical structure development,"(GO, biological_process)",0.0287,4
8,protein transport,"(GO, biological_process)",0.123,2




*** Cluster C19 (S)
***  C19 selected: 89 with EID: 88
Ratio of genes annotated with a gene set in reference 0.32 and query 0.2
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 10 out of 18 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",6.19e-05,7
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.00289,4
2,cell differentiation,"(GO, biological_process)",0.0241,4
3,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0425,2




*** Cluster C20 (T)
***  C20 selected: 1 with EID: 1
Ratio of genes annotated with a gene set in reference 0.32 and query 0.0
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 0 query genes used for enrichment calculation.


None



