# Enrichment for milestone genes

In [52]:
import os
import sys
from importlib import reload

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.insert(0,module_path)

import pandas as pd
from IPython.display import display
pd.set_option('max_colwidth', 70)
pd.set_option('display.max_rows', 500)
import string
import numpy as np

from orangecontrib.bioinformatics.geneset.__init__ import list_all

import jupyter_functions as jf
reload(jf)
import jupyter_functions as jf
from networks.functionsDENet import loadPickle, savePickle
import enrichment_library as enr
reload(enr)
import enrichment_library as enr
from stages_DE.stages_library import PHENOTYPES

In [5]:
#Path for expression data (mergedGenes_RPKUM.tsv)
dataPath='/home/karin/Documents/timeTrajectories/data/RPKUM/combined/'
path_de='/home/karin/Documents/timeTrajectories/data/deTime/stage_vs_other/'
path_impulse='/home/karin/Documents/timeTrajectories/data/stages/DE_across_stages/'

## Gene sets

Uses gene sets that do not have less than 5 or more than 500 genes. Use all ontologies.

In [25]:
# Get all gene sets
gene_sets=list(list_all(organism='44689'))
GENE_SETS_ONTOLOGY=enr.get_gene_sets(gene_set_names=gene_sets, go_slims=True,set_sizes=(5,500))

In [26]:
# For conversion of gene names to EID
genes = pd.read_csv(dataPath + 'mergedGenes_RPKUM.tsv', sep='\t', index_col=0)
all_gene_names= genes[(genes != 0).any(axis=1)].index
NAME_EID=enr.name_genes_entrez(gene_names=all_gene_names, key_entrez=False)
ALL_GENE_NAMES_EID=enr.convert_EID(genes=all_gene_names, name_EID=NAME_EID)

In [71]:
def group_diff_enrichment(query_names,group:str,padj:float=0.25,min_overlap:int=None,
                          use_annotated_genes:bool=False):
    #Displays only gene sets that have overlap with query greater or equal to min_overlap 
    #For p value and padj calculation uses alll that have overlap >=1 } from gene_set_enrichment
    """
    :param use_annotated_genes: if True use for reference and query  only genes that have at 
    least one gene set annotation
    """
    query_EID=enr.convert_EID(genes=query_names, name_EID=NAME_EID)
    print('***  '+group+' selected:',len(query_names),'with EID:',len(query_EID))
    
    reference_gene_eids=ALL_GENE_NAMES_EID.copy()
    query_eids=query_EID.copy()
    
    if use_annotated_genes:
        gene_sets_genes=set()
        for gene_set_name, gene_sets in GENE_SETS_ONTOLOGY.items():
            for gene_set in gene_sets:
                gene_sets_genes.update(gene_set.genes)
        reference_gene_eids=set(reference_gene_eids) & gene_sets_genes
        query_eids=set(query_eids) & gene_sets_genes
        
        query_annotated_reatio='NA'
        if len(query_EID)>0:
            query_annotated_reatio=round(len(query_eids)/len(query_EID),2)
        print('Ratio of genes annotated with a gene set in reference',
              round(len(reference_gene_eids)/len(ALL_GENE_NAMES_EID),2),
             'and query',query_annotated_reatio)
    
    query_in_enriched=set()
    result=None
    if len(query_eids) > 0:
        enrichment=enr.gene_set_enrichment(query_eids, reference_EID=reference_gene_eids, 
                                                padj_threshold=padj,min_overlap=min_overlap,
                                                gene_sets_ontology=GENE_SETS_ONTOLOGY)
        if len(enrichment)>0:
            enrichment_display=list()
            enrichment= sorted(enrichment, key=lambda data: data.padj)
            for enriched in enrichment:
                query_in_enriched.update(enriched.gene_set.genes & query_eids)
                enrichment_display.append({'Gene set':enriched.gene_set.name,'Ontology':enriched.ontology,
                                           'FDR':"{:.2e}".format(enriched.padj),'N in query':enriched.in_query})
            result=pd.DataFrame(enrichment_display)
    print('Enrichment at FDR: '+str(padj)+' and min query - gene set overlap',str(min_overlap))
    print('N query genes in displayed gene sets:',len(query_in_enriched),'out of', len(query_eids),
          'query genes used for enrichment calculation.')
    display(result)
    print('\n')
    return result

## Enrichment report

### DESeq2
Only positive log2 fold changes (lFC) are used to select only overexpressed genes.

#### DESeq2 all strains

##### DESeq2 all strains lFC >= 2 and padj <= 0.01

In [40]:
milestones=pd.read_table(path_de+'nobatchrep_combined.tsv',index_col=0,sep='\t')
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('log2FoldChange>=2 & padj <= 0.01 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)
  

***  no_agg selected: 216 with EID: 215
Ratio of genes annotated with a gene set in reference 0.32 and query 0.38
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 64 out of 82 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.000433,14
1,oxidoreductase activity,"(GO, molecular_function)",0.00122,15
2,hydrolase activity,"(GO, molecular_function)",0.0138,6
3,homeostatic process,"(GO, biological_process)",0.0196,7
4,lysosome,"(GO, cellular_component)",0.0683,6
5,response to stress,"(GO, biological_process)",0.0683,16
6,cytoskeletal protein binding,"(GO, molecular_function)",0.105,9
7,Lysine biosynthesis,"(KEGG, Pathways)",0.109,2
8,peptidase activity,"(GO, molecular_function)",0.176,6
9,aberrant cellular response to stress,"(Dictybase, Phenotypes)",0.184,2




***  stream selected: 15 with EID: 15
Ratio of genes annotated with a gene set in reference 0.32 and query 0.13
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 2 query genes used for enrichment calculation.


None



***  lag selected: 17 with EID: 17
Ratio of genes annotated with a gene set in reference 0.32 and query 0.18
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 3 query genes used for enrichment calculation.


None



***  tag selected: 37 with EID: 37
Ratio of genes annotated with a gene set in reference 0.32 and query 0.11
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 2 out of 4 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,kinase activity,"(GO, molecular_function)",0.0861,2




***  tip selected: 48 with EID: 48
Ratio of genes annotated with a gene set in reference 0.32 and query 0.08
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 4 out of 4 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.076,2
1,small molecule metabolic process,"(GO, biological_process)",0.107,2
2,Metabolic pathways,"(KEGG, Pathways)",0.226,2




***  slug selected: 411 with EID: 410
Ratio of genes annotated with a gene set in reference 0.32 and query 0.18
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 43 out of 73 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",1.15e-14,24
1,external encapsulating structure,"(GO, cellular_component)",0.00257,4
2,cell wall,"(GO, cellular_component)",0.00257,4
3,anatomical structure development,"(GO, biological_process)",0.00748,19
4,cell wall organization or biogenesis,"(GO, biological_process)",0.0532,3
5,aberrant cell-cell adhesion,"(Dictybase, Phenotypes)",0.0611,2
6,extracellular matrix,"(GO, cellular_component)",0.141,2
7,aberrant spore coat morphology,"(Dictybase, Phenotypes)",0.202,2
8,decreased spore viability,"(Dictybase, Phenotypes)",0.213,4
9,plasma membrane,"(GO, cellular_component)",0.213,10




***  mhat selected: 527 with EID: 526
Ratio of genes annotated with a gene set in reference 0.32 and query 0.27
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 89 out of 144 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",1.45e-10,28
1,aberrant spore coat morphology,"(Dictybase, Phenotypes)",1.24e-07,8
2,cell wall organization or biogenesis,"(GO, biological_process)",9.76e-07,8
3,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",2.07e-06,15
4,external encapsulating structure,"(GO, cellular_component)",3.85e-06,7
5,cell wall,"(GO, cellular_component)",3.85e-06,7
6,cell differentiation,"(GO, biological_process)",5.45e-06,20
7,anatomical structure development,"(GO, biological_process)",3.81e-05,36
8,Starch and sucrose metabolism,"(KEGG, Pathways)",0.00226,7
9,extracellular matrix,"(GO, cellular_component)",0.00229,4




***  cul selected: 933 with EID: 926
Ratio of genes annotated with a gene set in reference 0.32 and query 0.21
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 96 out of 192 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",9.44e-13,35
1,cell differentiation,"(GO, biological_process)",7.98e-06,24
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",2.05e-05,7
3,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",2.05e-05,16
4,cell wall organization or biogenesis,"(GO, biological_process)",0.000118,7
5,precocious spore germination,"(Dictybase, Phenotypes)",0.00412,3
6,external encapsulating structure,"(GO, cellular_component)",0.00589,5
7,cell wall,"(GO, cellular_component)",0.00589,5
8,anatomical structure development,"(GO, biological_process)",0.00589,38
9,extracellular matrix,"(GO, cellular_component)",0.00712,4




***  yem selected: 1001 with EID: 995
Ratio of genes annotated with a gene set in reference 0.32 and query 0.32
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 149 out of 320 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,secondary metabolic process,"(GO, biological_process)",0.00116,5
1,cell differentiation,"(GO, biological_process)",0.00163,28
2,Metabolic pathways,"(KEGG, Pathways)",0.00685,80
3,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.00685,17
4,Ubiquinone and other terpenoid-quinone biosynthesis,"(KEGG, Pathways)",0.0415,5
5,decreased gene expression,"(Dictybase, Phenotypes)",0.0615,16
6,Cysteine and methionine metabolism,"(KEGG, Pathways)",0.0615,8
7,Phenylalanine,"(KEGG, Pathways)",0.0776,3
8,Fatty acid biosynthesis,"(KEGG, Pathways)",0.0776,4
9,oxidoreductase activity,"(GO, molecular_function)",0.0776,29




***  FB selected: 709 with EID: 705
Ratio of genes annotated with a gene set in reference 0.32 and query 0.22
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 65 out of 155 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",5.7e-09,27
1,cell differentiation,"(GO, biological_process)",0.00412,17
2,oxidoreductase activity,"(GO, molecular_function)",0.00929,20
3,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0226,10
4,secondary metabolic process,"(GO, biological_process)",0.0226,3
5,cell adhesion,"(GO, biological_process)",0.163,9






##### DESeq2 all strains lFC >= 1 and padj <= 0.05

In [41]:
milestones=pd.read_table(path_de+'nobatchrep_combined.tsv',index_col=0,sep='\t')
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('log2FoldChange>=1 & padj <= 0.05 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

***  no_agg selected: 1652 with EID: 1643
Ratio of genes annotated with a gene set in reference 0.32 and query 0.37
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 389 out of 600 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",1.24e-64,90
1,ribosome,"(GO, cellular_component)",1e-59,85
2,structural constituent of ribosome,"(GO, molecular_function)",6.37e-57,79
3,structural molecule activity,"(GO, molecular_function)",8.62e-44,85
4,translation,"(GO, biological_process)",1.33e-38,99
5,mitochondrion,"(GO, cellular_component)",2.91e-09,73
6,Steroid biosynthesis,"(KEGG, Pathways)",2.64e-07,14
7,rRNA binding,"(GO, molecular_function)",2.37e-06,11
8,RNA binding,"(GO, molecular_function)",0.000216,48
9,Biosynthesis of secondary metabolites,"(KEGG, Pathways)",0.00101,68




***  stream selected: 170 with EID: 167
Ratio of genes annotated with a gene set in reference 0.32 and query 0.25
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 30 out of 41 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",4.03e-05,11
1,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.00272,4
2,cell adhesion,"(GO, biological_process)",0.0636,5
3,aberrant streaming,"(Dictybase, Phenotypes)",0.0868,4
4,aberrant aggregation,"(Dictybase, Phenotypes)",0.0868,4
5,abolished slug migration,"(Dictybase, Phenotypes)",0.0868,3
6,plasma membrane,"(GO, cellular_component)",0.0868,8
7,signal transduction,"(GO, biological_process)",0.0868,9
8,aberrant culmination,"(Dictybase, Phenotypes)",0.0984,4
9,precocious aggregation,"(Dictybase, Phenotypes)",0.108,2




***  lag selected: 193 with EID: 190
Ratio of genes annotated with a gene set in reference 0.32 and query 0.3
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 44 out of 57 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.00159,11
1,plasma membrane,"(GO, cellular_component)",0.00603,13
2,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.00791,4
3,abolished slug migration,"(Dictybase, Phenotypes)",0.00992,5
4,signal transduction,"(GO, biological_process)",0.00992,14
5,aberrant aggregation,"(Dictybase, Phenotypes)",0.0419,5
6,multiple tips in culminant,"(Dictybase, Phenotypes)",0.0421,2
7,response to stress,"(GO, biological_process)",0.08,12
8,decreased culmination,"(Dictybase, Phenotypes)",0.107,2
9,abolished streaming,"(Dictybase, Phenotypes)",0.107,3




***  tag selected: 384 with EID: 382
Ratio of genes annotated with a gene set in reference 0.32 and query 0.18
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 44 out of 70 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,chromosome segregation,"(GO, biological_process)",0.0015,5
1,cell cycle,"(GO, biological_process)",0.00194,12
2,mitotic nuclear division,"(GO, biological_process)",0.0049,4
3,DNA replication,"(KEGG, Pathways)",0.0079,5
4,chromosome organization,"(GO, biological_process)",0.0079,7
5,increased cell-substrate adhesion,"(Dictybase, Phenotypes)",0.0304,4
6,chromosome,"(GO, cellular_component)",0.0304,6
7,mitotic cell cycle,"(GO, biological_process)",0.0304,7
8,cell division,"(GO, biological_process)",0.06,7
9,extracellular region,"(GO, cellular_component)",0.084,8




***  tip selected: 616 with EID: 613
Ratio of genes annotated with a gene set in reference 0.32 and query 0.26
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 93 out of 157 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,chromosome,"(GO, cellular_component)",1.37e-12,22
1,DNA replication,"(KEGG, Pathways)",1.37e-12,16
2,cell cycle,"(GO, biological_process)",1.25e-08,27
3,chromosome segregation,"(GO, biological_process)",3.99e-07,9
4,chromosome organization,"(GO, biological_process)",7.65e-07,16
5,mitotic nuclear division,"(GO, biological_process)",2.53e-05,7
6,cell division,"(GO, biological_process)",6.42e-05,17
7,nuclear chromosome,"(GO, cellular_component)",0.000792,6
8,mitotic cell cycle,"(GO, biological_process)",0.000792,14
9,Mismatch repair,"(KEGG, Pathways)",0.00142,6




***  slug selected: 1373 with EID: 1367
Ratio of genes annotated with a gene set in reference 0.32 and query 0.21
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 166 out of 292 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",5.18e-09,38
1,anatomical structure development,"(GO, biological_process)",2.12e-05,62
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",3.22e-05,8
3,external encapsulating structure,"(GO, cellular_component)",4.7e-05,8
4,cell wall,"(GO, cellular_component)",4.7e-05,8
5,cell differentiation,"(GO, biological_process)",0.000257,27
6,Starch and sucrose metabolism,"(KEGG, Pathways)",0.000298,11
7,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.000705,17
8,cell wall organization or biogenesis,"(GO, biological_process)",0.00149,7
9,signal transduction,"(GO, biological_process)",0.00722,44




***  mhat selected: 1871 with EID: 1864
Ratio of genes annotated with a gene set in reference 0.32 and query 0.3
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 337 out of 567 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure development,"(GO, biological_process)",9.79e-07,107
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",2.46e-05,29
2,aberrant spore coat morphology,"(Dictybase, Phenotypes)",2.77e-05,10
3,cell differentiation,"(GO, biological_process)",4.76e-05,44
4,extracellular region,"(GO, cellular_component)",0.000166,45
5,external encapsulating structure,"(GO, cellular_component)",0.000691,9
6,cell wall,"(GO, cellular_component)",0.000691,9
7,Starch and sucrose metabolism,"(KEGG, Pathways)",0.00203,14
8,decreased spore viability,"(Dictybase, Phenotypes)",0.0062,20
9,signal transduction,"(GO, biological_process)",0.00928,74




***  cul selected: 2344 with EID: 2328
Ratio of genes annotated with a gene set in reference 0.32 and query 0.27
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 315 out of 629 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",1.33e-11,40
1,cell differentiation,"(GO, biological_process)",7.01e-11,59
2,extracellular region,"(GO, cellular_component)",8.78e-09,58
3,anatomical structure development,"(GO, biological_process)",2.45e-05,109
4,aberrant spore coat morphology,"(Dictybase, Phenotypes)",4.6e-05,10
5,external encapsulating structure,"(GO, cellular_component)",0.00146,9
6,cell wall,"(GO, cellular_component)",0.00146,9
7,Starch and sucrose metabolism,"(KEGG, Pathways)",0.00146,15
8,decreased spore viability,"(Dictybase, Phenotypes)",0.00876,21
9,aberrant culmination,"(Dictybase, Phenotypes)",0.017,27




***  yem selected: 2231 with EID: 2218
Ratio of genes annotated with a gene set in reference 0.32 and query 0.34
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 56 out of 760 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,secondary metabolic process,"(GO, biological_process)",0.134,5
1,Glycerolipid metabolism,"(KEGG, Pathways)",0.145,10
2,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.145,26
3,DNA-binding transcription factor activity,"(GO, molecular_function)",0.145,16




***  FB selected: 1970 with EID: 1958
Ratio of genes annotated with a gene set in reference 0.32 and query 0.25
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 277 out of 494 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",1.97e-06,46
1,cell differentiation,"(GO, biological_process)",1.97e-06,44
2,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",2.88e-06,28
3,anatomical structure development,"(GO, biological_process)",0.000512,86
4,Starch and sucrose metabolism,"(KEGG, Pathways)",0.000581,14
5,aberrant culmination,"(Dictybase, Phenotypes)",0.0251,23
6,wild type,"(Dictybase, Phenotypes)",0.0482,62
7,spores inviable,"(Dictybase, Phenotypes)",0.0482,4
8,SNARE interactions in vesicular transport,"(KEGG, Pathways)",0.0482,9
9,secondary metabolic process,"(GO, biological_process)",0.0482,4






#### DESeq2 WT

##### DESeq2 WT lFC >= 2 and padj <= 0.01

In [42]:
milestones=pd.read_table(path_de+'WT_batchrep_combined.tsv',index_col=0,sep='\t')
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('log2FoldChange>=2 & padj <= 0.01 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

***  no_agg selected: 878 with EID: 873
Ratio of genes annotated with a gene set in reference 0.32 and query 0.39
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 260 out of 337 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",2.61e-19,43
1,structural constituent of ribosome,"(GO, molecular_function)",2.12e-15,36
2,ribosome,"(GO, cellular_component)",4.53e-15,37
3,translation,"(GO, biological_process)",4.53e-15,51
4,structural molecule activity,"(GO, molecular_function)",1e-10,36
5,mitochondrion,"(GO, cellular_component)",1.79e-06,45
6,Steroid biosynthesis,"(KEGG, Pathways)",0.000193,9
7,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",0.00116,17
8,oxidoreductase activity,"(GO, molecular_function)",0.0014,35
9,Biosynthesis of secondary metabolites,"(KEGG, Pathways)",0.00205,43




***  stream selected: 87 with EID: 86
Ratio of genes annotated with a gene set in reference 0.32 and query 0.17
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 15 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,precocious aggregation,"(Dictybase, Phenotypes)",0.0438,2
1,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.0438,2
2,cell adhesion,"(GO, biological_process)",0.0438,3
3,decreased sporulation,"(Dictybase, Phenotypes)",0.0823,2
4,plasma membrane,"(GO, cellular_component)",0.0823,4
5,decreased fruiting body size,"(Dictybase, Phenotypes)",0.106,2
6,decreased cell motility,"(Dictybase, Phenotypes)",0.106,2
7,extracellular region,"(GO, cellular_component)",0.221,2
8,GTPase activity,"(GO, molecular_function)",0.221,2




***  lag selected: 70 with EID: 69
Ratio of genes annotated with a gene set in reference 0.32 and query 0.22
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 15 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,abolished slug migration,"(Dictybase, Phenotypes)",0.0298,3
1,aberrant streaming,"(Dictybase, Phenotypes)",0.0341,3
2,aberrant culmination,"(Dictybase, Phenotypes)",0.0341,3
3,aberrant aggregation,"(Dictybase, Phenotypes)",0.0341,3
4,plasma membrane,"(GO, cellular_component)",0.0341,5
5,aberrant cAMP wave formation,"(Dictybase, Phenotypes)",0.0382,3
6,GTPase activity,"(GO, molecular_function)",0.0731,3
7,aberrant slug migration,"(Dictybase, Phenotypes)",0.078,2
8,reproduction,"(GO, biological_process)",0.104,2
9,cell adhesion,"(GO, biological_process)",0.109,2




***  tag selected: 24 with EID: 24
Ratio of genes annotated with a gene set in reference 0.32 and query 0.17
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 4 out of 4 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Biosynthesis of secondary metabolites,"(KEGG, Pathways)",0.0974,2
1,small molecule metabolic process,"(GO, biological_process)",0.0974,2
2,kinase activity,"(GO, molecular_function)",0.0974,2
3,Metabolic pathways,"(KEGG, Pathways)",0.197,2




***  tip selected: 60 with EID: 60
Ratio of genes annotated with a gene set in reference 0.32 and query 0.37
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 21 out of 22 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle,"(GO, biological_process)",1.39e-08,11
1,chromosome,"(GO, cellular_component)",4.46e-08,8
2,chromosome organization,"(GO, biological_process)",4.69e-08,8
3,mitotic nuclear division,"(GO, biological_process)",1.24e-07,5
4,chromosome segregation,"(GO, biological_process)",2.66e-07,5
5,mitotic cell cycle,"(GO, biological_process)",2.66e-07,8
6,DNA replication,"(KEGG, Pathways)",5.25e-06,5
7,cell cycle arrest,"(Dictybase, Phenotypes)",0.00188,2
8,cell division,"(GO, biological_process)",0.00196,5
9,Pyrimidine metabolism,"(KEGG, Pathways)",0.00257,3




***  slug selected: 148 with EID: 148
Ratio of genes annotated with a gene set in reference 0.32 and query 0.14
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 11 out of 21 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",4.01e-05,8
1,external encapsulating structure,"(GO, cellular_component)",0.0388,2
2,cell wall,"(GO, cellular_component)",0.0388,2
3,Glutathione metabolism,"(KEGG, Pathways)",0.108,2




***  mhat selected: 545 with EID: 544
Ratio of genes annotated with a gene set in reference 0.32 and query 0.24
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 63 out of 132 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",1.72e-07,23
1,aberrant spore coat morphology,"(Dictybase, Phenotypes)",2.45e-06,7
2,external encapsulating structure,"(GO, cellular_component)",7.83e-05,6
3,cell wall,"(GO, cellular_component)",7.83e-05,6
4,cell wall organization or biogenesis,"(GO, biological_process)",0.000173,6
5,extracellular matrix,"(GO, cellular_component)",0.00221,4
6,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.00368,10
7,anatomical structure development,"(GO, biological_process)",0.0102,28
8,decreased spore viability,"(Dictybase, Phenotypes)",0.0131,8
9,cell differentiation,"(GO, biological_process)",0.0172,13




***  cul selected: 944 with EID: 936
Ratio of genes annotated with a gene set in reference 0.32 and query 0.23
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 94 out of 211 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",7.27e-10,33
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.00014,16
2,cell differentiation,"(GO, biological_process)",0.000146,23
3,aberrant spore coat morphology,"(Dictybase, Phenotypes)",0.000979,6
4,cell wall organization or biogenesis,"(GO, biological_process)",0.0037,6
5,precocious spore germination,"(Dictybase, Phenotypes)",0.00636,3
6,extracellular matrix,"(GO, cellular_component)",0.017,4
7,anatomical structure development,"(GO, biological_process)",0.0242,39
8,Starch and sucrose metabolism,"(KEGG, Pathways)",0.028,7
9,secondary metabolic process,"(GO, biological_process)",0.0352,3




***  FB selected: 307 with EID: 303
Ratio of genes annotated with a gene set in reference 0.32 and query 0.23
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 21 out of 69 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.099,9
1,oxidoreductase activity,"(GO, molecular_function)",0.099,11
2,Fructose and mannose metabolism,"(KEGG, Pathways)",0.146,3
3,Pentose and glucuronate interconversions,"(KEGG, Pathways)",0.146,2






##### DESeq2 WT lFC >= 1 and padj <= 0.05

In [43]:
milestones=pd.read_table(path_de+'WT_batchrep_combined.tsv',index_col=0,sep='\t')
for stage in [phenotype for phenotype in PHENOTYPES if phenotype in list(milestones['Stage'].unique())]:
    query_names=list(milestones.query('log2FoldChange>=1 & padj <= 0.05 & Stage=="'+stage+'"').index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

***  no_agg selected: 2578 with EID: 2560
Ratio of genes annotated with a gene set in reference 0.32 and query 0.38
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 640 out of 968 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",4.3699999999999994e-52,94
1,ribosome,"(GO, cellular_component)",3.25e-46,88
2,structural constituent of ribosome,"(GO, molecular_function)",4.69e-45,82
3,translation,"(GO, biological_process)",1.11e-36,120
4,structural molecule activity,"(GO, molecular_function)",1.22e-30,89
5,mitochondrion,"(GO, cellular_component)",6.34e-22,125
6,ribosome biogenesis,"(GO, biological_process)",1.73e-19,50
7,nucleolus,"(GO, cellular_component)",1.65e-12,48
8,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",1.43e-11,45
9,RNA binding,"(GO, molecular_function)",5.13e-09,79




***  stream selected: 557 with EID: 553
Ratio of genes annotated with a gene set in reference 0.32 and query 0.35
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 88 out of 194 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",3.43e-40,34
1,peptidase activity,"(GO, molecular_function)",3.46e-09,23
2,catabolic process,"(GO, biological_process)",6.26e-07,35
3,Protein processing in endoplasmic reticulum,"(KEGG, Pathways)",1.01e-05,18
4,endoplasmic reticulum,"(GO, cellular_component)",3.53e-05,20
5,Various types of N-glycan biosynthesis,"(KEGG, Pathways)",0.0221,6
6,Protein export,"(KEGG, Pathways)",0.0518,5
7,aberrant streaming,"(Dictybase, Phenotypes)",0.0585,10
8,curly fingers,"(Dictybase, Phenotypes)",0.0585,2
9,decreased contractile vacuole discharge,"(Dictybase, Phenotypes)",0.0585,2




***  lag selected: 509 with EID: 505
Ratio of genes annotated with a gene set in reference 0.32 and query 0.36
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 136 out of 183 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",1.3900000000000001e-21,24
1,plasma membrane,"(GO, cellular_component)",0.00782,27
2,peptidase activity,"(GO, molecular_function)",0.00782,14
3,aberrant streaming,"(Dictybase, Phenotypes)",0.029,11
4,decreased aggregate size,"(Dictybase, Phenotypes)",0.0297,7
5,aberrant cell morphology,"(Dictybase, Phenotypes)",0.0473,7
6,aberrant cytokinesis,"(Dictybase, Phenotypes)",0.0473,12
7,curly fingers,"(Dictybase, Phenotypes)",0.0473,2
8,decreased contractile vacuole discharge,"(Dictybase, Phenotypes)",0.0473,2
9,development arrests at loose mound stage,"(Dictybase, Phenotypes)",0.0473,5




***  tag selected: 305 with EID: 305
Ratio of genes annotated with a gene set in reference 0.32 and query 0.36
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 93 out of 109 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,DNA replication,"(KEGG, Pathways)",1.2500000000000001e-29,24
1,chromosome,"(GO, cellular_component)",1.5e-26,29
2,cell cycle,"(GO, biological_process)",5.93e-19,33
3,chromosome organization,"(GO, biological_process)",2.37e-16,22
4,DNA metabolic process,"(GO, biological_process)",3.07e-16,25
5,Mismatch repair,"(KEGG, Pathways)",1.46e-15,13
6,chromosome segregation,"(GO, biological_process)",3.45e-14,12
7,Nucleotide excision repair,"(KEGG, Pathways)",9.47e-11,13
8,cell division,"(GO, biological_process)",2.48e-10,20
9,mitotic nuclear division,"(GO, biological_process)",4.06e-10,9




***  tip selected: 309 with EID: 308
Ratio of genes annotated with a gene set in reference 0.32 and query 0.39
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 103 out of 119 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,chromosome,"(GO, cellular_component)",8.09e-33,34
1,DNA replication,"(KEGG, Pathways)",5.65e-29,24
2,chromosome organization,"(GO, biological_process)",2.72e-23,28
3,cell cycle,"(GO, biological_process)",5.550000000000001e-21,36
4,DNA metabolic process,"(GO, biological_process)",9.91e-19,28
5,chromosome segregation,"(GO, biological_process)",1.36e-17,14
6,Mismatch repair,"(KEGG, Pathways)",3.62e-15,13
7,mitotic nuclear division,"(GO, biological_process)",2.23e-13,11
8,DNA binding,"(GO, molecular_function)",1.4e-12,31
9,cell division,"(GO, biological_process)",1.47e-12,23




***  slug selected: 757 with EID: 753
Ratio of genes annotated with a gene set in reference 0.32 and query 0.24
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 107 out of 182 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,chromosome segregation,"(GO, biological_process)",0.000132,8
1,chromosome,"(GO, cellular_component)",0.000675,13
2,external encapsulating structure,"(GO, cellular_component)",0.000675,6
3,cell wall,"(GO, cellular_component)",0.000675,6
4,DNA replication,"(KEGG, Pathways)",0.000675,9
5,chromosome organization,"(GO, biological_process)",0.00105,13
6,cell cycle,"(GO, biological_process)",0.00233,20
7,mitotic nuclear division,"(GO, biological_process)",0.0129,5
8,nuclear chromosome,"(GO, cellular_component)",0.0197,5
9,Glutathione metabolism,"(KEGG, Pathways)",0.0197,6




***  mhat selected: 1615 with EID: 1609
Ratio of genes annotated with a gene set in reference 0.32 and query 0.29
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 219 out of 468 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant spore coat morphology,"(Dictybase, Phenotypes)",0.000304,9
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.00382,22
2,cell differentiation,"(GO, biological_process)",0.00382,35
3,extracellular region,"(GO, cellular_component)",0.0058,36
4,anatomical structure development,"(GO, biological_process)",0.00652,78
5,decreased spore viability,"(Dictybase, Phenotypes)",0.00715,18
6,Starch and sucrose metabolism,"(KEGG, Pathways)",0.00724,12
7,Golgi apparatus,"(GO, cellular_component)",0.014,22
8,abolished cellulose biosynthesis,"(Dictybase, Phenotypes)",0.0389,4
9,cell wall organization or biogenesis,"(GO, biological_process)",0.0389,7




***  cul selected: 2289 with EID: 2274
Ratio of genes annotated with a gene set in reference 0.32 and query 0.28
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 373 out of 631 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",5.55e-08,35
1,cell differentiation,"(GO, biological_process)",5.55e-08,54
2,Starch and sucrose metabolism,"(KEGG, Pathways)",1.66e-05,18
3,anatomical structure development,"(GO, biological_process)",1.66e-05,110
4,aberrant spore coat morphology,"(Dictybase, Phenotypes)",5.21e-05,10
5,extracellular region,"(GO, cellular_component)",0.00122,46
6,DNA-binding transcription factor activity,"(GO, molecular_function)",0.0216,15
7,aberrant culmination,"(Dictybase, Phenotypes)",0.0247,27
8,increased prespore cell differentiation,"(Dictybase, Phenotypes)",0.0304,5
9,decreased prestalk 0 cell differentiation,"(Dictybase, Phenotypes)",0.0304,4




***  FB selected: 1096 with EID: 1085
Ratio of genes annotated with a gene set in reference 0.32 and query 0.28
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 148 out of 304 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell differentiation,"(GO, biological_process)",0.0334,25
1,anatomical structure development,"(GO, biological_process)",0.0404,54
2,DNA-binding transcription factor activity,"(GO, molecular_function)",0.043,10
3,aberrant gene expression,"(Dictybase, Phenotypes)",0.0803,7
4,decreased prestalk 0 cell differentiation,"(Dictybase, Phenotypes)",0.0803,3
5,SNARE interactions in vesicular transport,"(KEGG, Pathways)",0.0803,7
6,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.0803,14
7,DNA binding,"(GO, molecular_function)",0.0803,30
8,kinase activity,"(GO, molecular_function)",0.137,39
9,aberrant protein phosphorylation,"(Dictybase, Phenotypes)",0.148,2






### ImpulseDE2
Milestone genes are selected as those that are at up-level in a stage and at down level at non neighbouring stages (to account for imperfect annotation of up/down). no_agg is not considered a neighbour of stream and vice versa.

#### ImpulseDE2 all strains

In [73]:
milestones=pd.read_table(path_impulse+'DEacrossStages_summary_noyem.tsv',index_col=0,sep='\t')
stages=np.array([phenotype for phenotype in PHENOTYPES if phenotype in list(milestones.columns)])
for stage_idx,stage in enumerate(stages):
    
    #Nonneighbouring stages
    neigh_previous=1
    neigh_next=1
    #Assumes phenotypes order
    if stage_idx==1:
        neigh_previous=0
    if stage_idx==0:
        neigh_next=0
    non_neighbours = stages[[(idx <stage_idx-neigh_previous or idx>stage_idx+neigh_next) 
                 for idx in range(len(stages)) ]]
    #print(stage,':',non_neighbours)
    
    #Find genes up at stage and down at nonenighbouring
    query_condition=stage+'=="up"'
    for non_neigh in non_neighbours:
        query_condition=query_condition+' & '+non_neigh+' =="down"'
    #print(query_condition)
    
    query_names=list(milestones.query(query_condition).index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

***  no_agg selected: 1199 with EID: 1193
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 344 out of 479 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",7.4e-38,67
1,ribosome,"(GO, cellular_component)",3.54e-30,59
2,structural constituent of ribosome,"(GO, molecular_function)",5.0199999999999996e-30,56
3,structural molecule activity,"(GO, molecular_function)",4.9599999999999995e-23,59
4,translation,"(GO, biological_process)",1.78e-20,69
5,mitochondrion,"(GO, cellular_component)",7.78e-08,60
6,Metabolic pathways,"(KEGG, Pathways)",6.02e-05,119
7,oxidoreductase activity,"(GO, molecular_function)",6.02e-05,49
8,Oxidative phosphorylation,"(KEGG, Pathways)",6.62e-05,20
9,Biosynthesis of amino acids,"(KEGG, Pathways)",0.00236,19




***  stream selected: 956 with EID: 941
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 270 out of 377 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,aberrant streaming,"(Dictybase, Phenotypes)",2.91e-08,27
1,aberrant cytokinesis,"(Dictybase, Phenotypes)",2.91e-08,32
2,locomotion,"(GO, biological_process)",4.16e-06,43
3,cell division,"(GO, biological_process)",5.19e-06,31
4,Proteasome,"(KEGG, Pathways)",5.81e-06,16
5,signal transduction,"(GO, biological_process)",5.81e-06,64
6,mitotic cell cycle,"(GO, biological_process)",5.81e-06,28
7,cytoskeleton organization,"(GO, biological_process)",1.4e-05,42
8,plasma membrane,"(GO, cellular_component)",7.71e-05,50
9,aberrant F-actin organization,"(Dictybase, Phenotypes)",0.000153,14




***  lag selected: 1190 with EID: 1175
Ratio of genes annotated with a gene set in reference 0.32 and query 0.39
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 332 out of 461 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,signal transduction,"(GO, biological_process)",2.5e-14,95
1,locomotion,"(GO, biological_process)",1.3e-10,58
2,aberrant cytokinesis,"(Dictybase, Phenotypes)",1.04e-08,36
3,aberrant streaming,"(Dictybase, Phenotypes)",1.25e-06,27
4,cytoskeleton organization,"(GO, biological_process)",2.94e-06,50
5,kinase activity,"(GO, molecular_function)",9.42e-06,70
6,abolished aggregation,"(Dictybase, Phenotypes)",9.43e-06,27
7,aberrant F-actin organization,"(Dictybase, Phenotypes)",1.14e-05,17
8,enzyme binding,"(GO, molecular_function)",2.51e-05,32
9,aberrant cell morphology,"(Dictybase, Phenotypes)",5.55e-05,17




***  tag selected: 191 with EID: 191
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 62 out of 76 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,DNA replication,"(KEGG, Pathways)",1.87e-25,20
1,chromosome,"(GO, cellular_component)",1.42e-23,24
2,chromosome organization,"(GO, biological_process)",1.98e-14,18
3,Mismatch repair,"(KEGG, Pathways)",8.78e-12,10
4,chromosome segregation,"(GO, biological_process)",2.17e-10,9
5,Nucleotide excision repair,"(KEGG, Pathways)",7.03e-10,11
6,cell cycle,"(GO, biological_process)",2.28e-09,19
7,DNA metabolic process,"(GO, biological_process)",9.25e-09,15
8,mitotic nuclear division,"(GO, biological_process)",5.32e-08,7
9,Base excision repair,"(KEGG, Pathways)",3.37e-07,8




***  tip selected: 261 with EID: 260
Ratio of genes annotated with a gene set in reference 0.32 and query 0.38
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 73 out of 99 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,chromosome,"(GO, cellular_component)",1.23e-27,29
1,DNA replication,"(KEGG, Pathways)",3.97e-23,20
2,chromosome organization,"(GO, biological_process)",2.11e-13,19
3,cell cycle,"(GO, biological_process)",3.63e-13,26
4,Mismatch repair,"(KEGG, Pathways)",2.41e-12,11
5,Nucleotide excision repair,"(KEGG, Pathways)",3.38e-11,13
6,chromosome segregation,"(GO, biological_process)",2.08e-09,9
7,DNA metabolic process,"(GO, biological_process)",7.03e-09,17
8,nuclear chromosome,"(GO, cellular_component)",3.49e-08,8
9,cell division,"(GO, biological_process)",1.91e-07,16




***  slug selected: 76 with EID: 76
Ratio of genes annotated with a gene set in reference 0.32 and query 0.28
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 5 out of 21 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,cell cycle arrest,"(Dictybase, Phenotypes)",0.0199,2
1,Pyrimidine metabolism,"(KEGG, Pathways)",0.217,2
2,cell division,"(GO, biological_process)",0.217,3




***  mhat selected: 8 with EID: 8
Ratio of genes annotated with a gene set in reference 0.32 and query 0.0
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 0 out of 0 query genes used for enrichment calculation.


None



***  cul selected: 565 with EID: 559
Ratio of genes annotated with a gene set in reference 0.32 and query 0.28
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 32 out of 156 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Golgi apparatus,"(GO, cellular_component)",0.0922,11
1,SNARE interactions in vesicular transport,"(KEGG, Pathways)",0.118,5
2,oxidoreductase activity,"(GO, molecular_function)",0.118,18




***  FB selected: 555 with EID: 546
Ratio of genes annotated with a gene set in reference 0.32 and query 0.23
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 34 out of 123 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",5.78e-05,13
1,extracellular region,"(GO, cellular_component)",0.00216,16
2,cell differentiation,"(GO, biological_process)",0.00223,15
3,SNARE interactions in vesicular transport,"(KEGG, Pathways)",0.0026,6
4,secondary metabolic process,"(GO, biological_process)",0.011,3
5,Ubiquinone and other terpenoid-quinone biosynthesis,"(KEGG, Pathways)",0.098,3






#### ImpulseDE2 WT

In [76]:
milestones=pd.read_table(path_impulse+'DEacrossStages_summary_WT_noyem_batchrep.tsv',index_col=0,sep='\t')
stages=np.array([phenotype for phenotype in PHENOTYPES if phenotype in list(milestones.columns)])
for stage_idx,stage in enumerate(stages):
    
    #Nonneighbouring stages
    neigh_previous=1
    neigh_next=1
    #Assumes phenotypes order
    if stage_idx==1:
        neigh_previous=0
    if stage_idx==0:
        neigh_next=0
    non_neighbours = stages[[(idx <stage_idx-neigh_previous or idx>stage_idx+neigh_next) 
                 for idx in range(len(stages)) ]]
    #print(stage,':',non_neighbours)
    
    #Find genes up at stage and down at nonenighbouring
    query_condition=stage+'=="up"'
    for non_neigh in non_neighbours:
        query_condition=query_condition+' & '+non_neigh+' =="down"'
    #print(query_condition)
    
    query_names=list(milestones.query(query_condition).index)
    enriched=group_diff_enrichment(query_names=query_names,group=stage,min_overlap=2,use_annotated_genes=True)

***  no_agg selected: 2330 with EID: 2310
Ratio of genes annotated with a gene set in reference 0.32 and query 0.4
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 568 out of 925 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Ribosome,"(KEGG, Pathways)",3.31e-28,76
1,translation,"(GO, biological_process)",6.3299999999999995e-25,104
2,structural constituent of ribosome,"(GO, molecular_function)",3.06e-23,65
3,ribosome,"(GO, cellular_component)",1.58e-22,68
4,mitochondrion,"(GO, cellular_component)",1.55e-19,118
5,ribosome biogenesis,"(GO, biological_process)",5.12e-17,47
6,nucleolus,"(GO, cellular_component)",6.82e-16,51
7,structural molecule activity,"(GO, molecular_function)",3.63e-15,69
8,Ribosome biogenesis in eukaryotes,"(KEGG, Pathways)",4.93e-14,47
9,tRNA metabolic process,"(GO, biological_process)",5.01e-12,45




***  stream selected: 409 with EID: 403
Ratio of genes annotated with a gene set in reference 0.32 and query 0.41
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 131 out of 166 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",4.95e-06,12
1,Protein processing in endoplasmic reticulum,"(KEGG, Pathways)",0.000429,15
2,locomotion,"(GO, biological_process)",0.00102,22
3,signal transduction,"(GO, biological_process)",0.00121,32
4,aberrant streaming,"(Dictybase, Phenotypes)",0.00226,12
5,catabolic process,"(GO, biological_process)",0.00506,24
6,abolished activation of adenylate cyclase,"(Dictybase, Phenotypes)",0.00744,4
7,aberrant aggregation,"(Dictybase, Phenotypes)",0.00744,10
8,decreased aggregate size,"(Dictybase, Phenotypes)",0.00903,7
9,decreased cell motility,"(Dictybase, Phenotypes)",0.00903,11




***  lag selected: 429 with EID: 423
Ratio of genes annotated with a gene set in reference 0.32 and query 0.42
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 135 out of 176 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Proteasome,"(KEGG, Pathways)",9.91e-06,12
1,signal transduction,"(GO, biological_process)",5.48e-05,37
2,locomotion,"(GO, biological_process)",0.000826,23
3,aberrant aggregation,"(Dictybase, Phenotypes)",0.000907,12
4,Protein processing in endoplasmic reticulum,"(KEGG, Pathways)",0.00173,14
5,aberrant streaming,"(Dictybase, Phenotypes)",0.00297,12
6,decreased aggregate size,"(Dictybase, Phenotypes)",0.00297,8
7,abolished activation of adenylate cyclase,"(Dictybase, Phenotypes)",0.00946,4
8,abolished streaming,"(Dictybase, Phenotypes)",0.0151,7
9,decreased cell motility,"(Dictybase, Phenotypes)",0.0151,11




***  tag selected: 273 with EID: 272
Ratio of genes annotated with a gene set in reference 0.32 and query 0.35
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 71 out of 95 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,chromosome,"(GO, cellular_component)",1.55e-16,21
1,DNA replication,"(KEGG, Pathways)",1.55e-16,16
2,DNA metabolic process,"(GO, biological_process)",3.67e-14,22
3,Mismatch repair,"(KEGG, Pathways)",8.21e-11,10
4,chromosome organization,"(GO, biological_process)",1.62e-10,16
5,DNA binding,"(GO, molecular_function)",2.75e-09,24
6,cell cycle,"(GO, biological_process)",1.84e-08,20
7,chromosome segregation,"(GO, biological_process)",3.93e-08,8
8,Base excision repair,"(KEGG, Pathways)",1.02e-07,9
9,Nucleotide excision repair,"(KEGG, Pathways)",1.38e-06,9




***  tip selected: 486 with EID: 485
Ratio of genes annotated with a gene set in reference 0.32 and query 0.34
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 119 out of 166 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,chromosome,"(GO, cellular_component)",1.1100000000000001e-31,37
1,DNA replication,"(KEGG, Pathways)",5.660000000000001e-27,25
2,chromosome organization,"(GO, biological_process)",2.1200000000000003e-21,30
3,cell cycle,"(GO, biological_process)",3.05e-20,41
4,Mismatch repair,"(KEGG, Pathways)",1.26e-16,15
5,DNA metabolic process,"(GO, biological_process)",1.26e-16,30
6,chromosome segregation,"(GO, biological_process)",1.96e-15,14
7,mitotic nuclear division,"(GO, biological_process)",1.27e-11,11
8,DNA binding,"(GO, molecular_function)",3.79e-11,35
9,Base excision repair,"(KEGG, Pathways)",2.87e-10,13




***  slug selected: 117 with EID: 116
Ratio of genes annotated with a gene set in reference 0.32 and query 0.22
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 13 out of 25 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,chromosome organization,"(GO, biological_process)",0.00773,5
1,nucleoplasm,"(GO, cellular_component)",0.111,4
2,chromosome,"(GO, cellular_component)",0.115,3
3,RNA polymerase,"(KEGG, Pathways)",0.115,2
4,cell cycle,"(GO, biological_process)",0.115,4
5,DNA metabolic process,"(GO, biological_process)",0.115,3
6,cell death,"(GO, biological_process)",0.115,2
7,enzyme regulator activity,"(GO, molecular_function)",0.115,3
8,cell division,"(GO, biological_process)",0.123,3
9,transferase activity,"(GO, molecular_function)",0.139,2




***  mhat selected: 28 with EID: 28
Ratio of genes annotated with a gene set in reference 0.32 and query 0.5
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 9 out of 14 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0786,2
1,Amino sugar and nucleotide sugar metabolism,"(KEGG, Pathways)",0.0786,2
2,carbohydrate metabolic process,"(GO, biological_process)",0.0786,3
3,generation of precursor metabolites and energy,"(GO, biological_process)",0.0786,3
4,catabolic process,"(GO, biological_process)",0.0811,4
5,small molecule metabolic process,"(GO, biological_process)",0.126,4
6,oxidoreductase activity,"(GO, molecular_function)",0.13,3
7,Carbon metabolism,"(KEGG, Pathways)",0.168,2
8,Metabolic pathways,"(KEGG, Pathways)",0.193,5
9,Biosynthesis of secondary metabolites,"(KEGG, Pathways)",0.193,3




***  cul selected: 1412 with EID: 1403
Ratio of genes annotated with a gene set in reference 0.32 and query 0.31
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 76 out of 431 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,SNARE interactions in vesicular transport,"(KEGG, Pathways)",0.00417,11
1,secondary metabolic process,"(GO, biological_process)",0.0957,4
2,autophagy,"(GO, biological_process)",0.0957,11
3,Endocytosis,"(KEGG, Pathways)",0.0993,18
4,cell differentiation,"(GO, biological_process)",0.149,28
5,Autophagy - other,"(KEGG, Pathways)",0.212,8
6,decreased spore viability,"(Dictybase, Phenotypes)",0.231,14




***  FB selected: 564 with EID: 560
Ratio of genes annotated with a gene set in reference 0.32 and query 0.25
Enrichment at FDR: 0.25 and min query - gene set overlap 2
N query genes in displayed gene sets: 35 out of 141 query genes used for enrichment calculation.


Unnamed: 0,Gene set,Ontology,FDR,N in query
0,extracellular region,"(GO, cellular_component)",0.0287,16
1,anatomical structure formation involved in morphogenesis,"(GO, biological_process)",0.03,10
2,Starch and sucrose metabolism,"(KEGG, Pathways)",0.0523,6
3,decreased pseudopod retraction,"(Dictybase, Phenotypes)",0.0599,2
4,SNARE interactions in vesicular transport,"(KEGG, Pathways)",0.0599,5
5,cell differentiation,"(GO, biological_process)",0.206,12




