In [1]:
import numpy as np
import pandas as pd
from textwrap import wrap
from matplotlib import pyplot as plt
from sklearn import metrics
import matplotlib.ticker as mticker
import sys, os
from hsbmpy import plot_topic_size, get_max_available_L

In [2]:
directory="/home/jovyan/work/phd/datasets/gtex/10"
os.chdir(directory)
sys.path.append('/home/jovyan/work/phd/')

In [3]:
L = get_max_available_L(directory)

In [4]:
df = pd.read_csv("mainTable.csv", index_col=[0])

# topic size

In [None]:
for l in range(0,L+1):
    plot_topic_size(directory,l)

## Topic O

In [None]:
df = pd.read_csv("mainTable.csv", index_col=0,header=0)

In [None]:
df_mv=pd.DataFrame(data=[df.mean(1), df.var(1),df.apply(lambda x: len([x[x>0]])/float(len(x)), 1)], index=['average', 'var', 'O']).transpose()
df_mv.head()

In [None]:
for l in range(0,L+1):
    fig = plt.figure(figsize=(15,8))
    ax = fig.subplots(1,2)
    candles = get_candles(directory,l,df_mv,ax[0])
    candlestick2_ohlc(ax[0], candles['open'],candles['high'],candles['low'],candles['close'],width=0.6,colordown='b')
    ax[1].hist((np.array(candles['open'])+np.array(candles['close']))/2, weights=candles['size'], range=(-0.05,1.05), bins=10, histtype='step')
    ax[1].set_xlabel("$O_i", fontsize=18)
    plt.show()
    fig.savefig("%s/topic_Ocandles_level_%d.pdf"%(directory,l))

# Geneontology

In [5]:
from geneontology import get_ontology_df, ensg_to_symbol
from tableanalyser import get_symbol
import gseapy as gs

In [None]:
import importlib, geneontology,tableanalyser
importlib.reload(geneontology)
importlib.reload(tableanalyser)
from geneontology import get_ontology_df, ensg_to_symbol
from tableanalyser import get_symbol

In [6]:
l=L-1
algorithm = "topsbm"
df_topics = pd.read_csv(f"{directory}/{algorithm}/{algorithm}_level_{l}_topics.csv")

In [7]:
df_symbols= pd.read_csv("https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_pub_ensembl_id&col=md_ensembl_id&col=md_eg_id&status=Approved&status=Entry%20Withdrawn&hgnc_dbtag=on&order_by=gd_app_sym_sort&format=text&submit=submit", index_col=[0], sep='\t')

In [8]:
def get_sea():
    for g in df_topics.values.ravel()[[str(s)!='nan' for s in df_topics.values.ravel()]]:
        yield get_symbol(g)

In [9]:
with open("gback.txt",'w') as f:
    list(map(lambda x: f.writelines(x+'\n')if len(x)>1 else None, get_sea()))
    
with open("gback_ensg.txt",'w') as f:
    list(map(lambda x: f.writelines(x[:15]+'\n')if len(x)>1 else None, df_topics.values.ravel()[[str(s)!='nan' for s in df_topics.values.ravel()]]))

In [10]:
gs.get_library_name()

['ARCHS4_Cell-lines',
 'ARCHS4_IDG_Coexp',
 'ARCHS4_Kinases_Coexp',
 'ARCHS4_TFs_Coexp',
 'ARCHS4_Tissues',
 'Achilles_fitness_decrease',
 'Achilles_fitness_increase',
 'Aging_Perturbations_from_GEO_down',
 'Aging_Perturbations_from_GEO_up',
 'Allen_Brain_Atlas_down',
 'Allen_Brain_Atlas_up',
 'BioCarta_2013',
 'BioCarta_2015',
 'BioCarta_2016',
 'BioPlanet_2019',
 'BioPlex_2017',
 'CCLE_Proteomics_2020',
 'CORUM',
 'Cancer_Cell_Line_Encyclopedia',
 'ChEA_2013',
 'ChEA_2015',
 'ChEA_2016',
 'Chromosome_Location',
 'Chromosome_Location_hg19',
 'ClinVar_2019',
 'DSigDB',
 'Data_Acquisition_Method_Most_Popular_Genes',
 'DepMap_WG_CRISPR_Screens_Broad_CellLines_2019',
 'DepMap_WG_CRISPR_Screens_Sanger_CellLines_2019',
 'DisGeNET',
 'Disease_Perturbations_from_GEO_down',
 'Disease_Perturbations_from_GEO_up',
 'Disease_Signatures_from_GEO_down_2014',
 'Disease_Signatures_from_GEO_up_2014',
 'DrugMatrix',
 'Drug_Perturbations_from_GEO_2014',
 'Drug_Perturbations_from_GEO_down',
 'Drug_Perturb

In [11]:
#https://www.gsea-msigdb.org/gsea/downloads.jsp
gene_sets = ['GO_Molecular_Function_2018',
             'GO_Biological_Process_2018',
             'GO_Cellular_Component_2018',
             'Human_Phenotype_Ontology',
             'WikiPathways_2019_Human',
             '/home/jovyan/work/phd/MSigDB/c1.all.v7.1.symbols.gmt',
             '/home/jovyan/work/phd/MSigDB/c2.all.v7.1.symbols.gmt',
             '/home/jovyan/work/phd/MSigDB/c3.all.v7.1.symbols.gmt',
             '/home/jovyan/work/phd/MSigDB/c4.all.v7.1.symbols.gmt',
             '/home/jovyan/work/phd/MSigDB/c5.all.v7.1.symbols.gmt',
             '/home/jovyan/work/phd/MSigDB/c6.all.v7.1.symbols.gmt',
             '/home/jovyan/work/phd/MSigDB/c7.all.v7.1.symbols.gmt',
            ]

In [None]:
threshhold = 5e-1
cutoff = 5e-1
background = len([g for g in get_sea()])
os.system("mkdir -p gsea")
for itopic,topic in enumerate(df_topics.columns):
    try:
        enriched_topic = pd.read_csv("gsea/gsea_level_%d_topic_%d.csv"%(l,itopic+1), index_col=[0])
        print(topic)
    except:
        try:
            gene_list = ensg_to_symbol(df_topics.loc[:,topic].dropna().values)
            print(topic)
            enriched_topic = get_ontology_df(gene_list, cutoff=cutoff, threshhold = threshhold, gene_sets = gene_sets, background=background)
            enriched_topic = enriched_topic.sort_values(by=['Adjusted P-value'], ascending=True)[:20]
            enriched_topic.to_csv("gsea/gsea_level_%d_topic_%d.csv"%(l,itopic+1))
        except:
            print(*sys.exc_info())
            continue
    print(enriched_topic)

Topic 1
                                                   Term  Adjusted P-value  \
113           neutrophil mediated immunity (GO:0002446)      3.806490e-19   
112   neutrophil activation involved in immune respo...      4.565767e-19   
111               neutrophil degranulation (GO:0043312)      7.287136e-19   
790                 ficolin-1-rich granule (GO:0101002)      7.873850e-12   
791                       tertiary granule (GO:0070820)      6.855181e-09   
792        ficolin-1-rich granule membrane (GO:0101003)      9.533894e-08   
793                secretory granule lumen (GO:0034774)      5.425050e-05   
114                  inflammatory response (GO:0006954)      6.184760e-05   
794              tertiary granule membrane (GO:0070821)      1.466165e-04   
795           ficolin-1-rich granule lumen (GO:1904813)      1.941121e-04   
116              cellular defense response (GO:0006968)      2.853182e-04   
115          defense response to bacterium (GO:0042742)      3.33177



                                                   Term  Adjusted P-value  \
70                      muscle contraction (GO:0006936)      2.654274e-21   
72           actin-myosin filament sliding (GO:0033275)      5.166076e-14   
71                 muscle filament sliding (GO:0030049)      7.749114e-14   
467           Striated Muscle Contraction Pathway WP383      2.681280e-12   
1517                                           GNF2_TTN      9.955159e-12   
73                      myofibril assembly (GO:0030239)      4.600890e-11   
74             striated muscle contraction (GO:0006941)      3.379010e-10   
75                       heart contraction (GO:0060047)      2.269503e-09   
347          Muscle fiber inclusion bodies (HP:0100299)      2.010768e-08   
77       actomyosin structure organization (GO:0031032)      5.681283e-08   
76              cardiac muscle contraction (GO:0060048)      5.871863e-08   
1959                         GO_MUSCLE_FILAMENT_SLIDING      2.020521e-07   



                                          Term  Adjusted P-value  \
243  Prader-Willi and Angelman Syndrome WP3998          0.021605   

                    Gene_set  
243  WikiPathways_2019_Human  
Topic 8




                                                   Term  Adjusted P-value  \
2796                                          GNF2_MLF1      1.580500e-09   
96    cell wall macromolecule catabolic process (GO:...      5.808003e-08   
2784                                         GNF2_CCNA1      2.595495e-06   
97    defense response to Gram-positive bacterium (G...      1.191393e-03   
98              flagellated sperm motility (GO:0030317)      3.062850e-03   
1586                   WEBER_METHYLATED_LCP_IN_SPERM_UP      4.372024e-03   
1580                 WEBER_METHYLATED_ICP_IN_FIBROBLAST      4.372024e-03   
1584              WEBER_METHYLATED_LCP_IN_FIBROBLAST_UP      6.931979e-03   
99    defense response to Gram-negative bacterium (G...      1.424606e-02   
100   JAK-STAT cascade involved in growth hormone si...      1.648562e-02   
101                           fibrinolysis (GO:0042730)      1.816332e-02   
612                        Blood Clotting Cascade WP272      2.311573e-02   



Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 10




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 11




                                                   Term  Adjusted P-value  \
988   GSE10240_CTRL_VS_IL22_STIM_PRIMARY_BRONCHIAL_E...          0.499046   
1219          GSE360_L_DONOVANI_VS_M_TUBERCULOSIS_DC_DN          0.499046   
1218                GSE360_L_DONOVANI_VS_L_MAJOR_MAC_DN          0.499046   
1217                 GSE360_L_DONOVANI_VS_L_MAJOR_DC_UP          0.499046   
1216       GSE360_L_DONOVANI_VS_B_MALAYI_LOW_DOSE_DC_DN          0.499046   
1215            GSE360_HIGH_VS_LOW_DOSE_B_MALAYI_MAC_DN          0.499046   
1214                     GSE360_DC_VS_MAC_L_DONOVANI_DN          0.499046   
1213                                GSE360_DC_VS_MAC_DN          0.499046   
1212              GSE360_DC_VS_MAC_B_MALAYI_LOW_DOSE_DN          0.499046   
1211                      GSE360_CTRL_VS_L_MAJOR_MAC_DN          0.499046   
1210                    GSE360_CTRL_VS_L_DONOVANI_DC_DN          0.499046   
1209            GSE360_CTRL_VS_B_MALAYI_LOW_DOSE_MAC_DN          0.499046   



                                                  Term  Adjusted P-value  \
37                                             chr2p12          0.082884   
245  GSE9960_GRAM_POS_VS_GRAM_NEG_AND_POS_SEPSIS_PB...          0.187079   
213     GSE21360_NAIVE_VS_TERTIARY_MEMORY_CD8_TCELL_DN          0.187079   
212  GSE19888_ADENOSINE_A3R_INH_VS_ACT_WITH_INHIBIT...          0.187079   
211            GSE19825_NAIVE_VS_DAY3_EFF_CD8_TCELL_UP          0.187079   
210                GSE19512_NAUTRAL_VS_INDUCED_TREG_UP          0.187079   
186                   GSE11924_TFH_VS_TH1_CD4_TCELL_DN          0.187079   
208  GSE19401_PAM2CSK4_VS_RETINOIC_ACID_STIM_FOLLIC...          0.187079   
207  GSE19401_PAM2CSK4_VS_RETINOIC_ACID_AND_PAM2CSK...          0.187079   
206            GSE19198_1H_VS_6H_IL21_TREATED_TCELL_UP          0.187079   
205  GSE18203_CTRL_VS_INTRATUMORAL_CPG_INJ_MC38_TUM...          0.187079   
204           GSE17721_POLYIC_VS_PAM3CSK4_0.5H_BMDC_UP          0.187079   
203         



      Term  Adjusted P-value               Gene_set
0  chr5q21           0.03962  CUSTOM140535622807952
Topic 14




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 15
Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 16




                                                 Term  Adjusted P-value  \
21          GSE11057_EFF_MEM_VS_CENT_MEM_CD4_TCELL_DN          0.495449   
22          GSE11864_UNTREATED_VS_CSF1_IFNG_IN_MAC_DN          0.495449   
23       GSE11864_UNTREATED_VS_CSF1_PAM3CYS_IN_MAC_DN          0.495449   
24  GSE21063_WT_VS_NFATC1_KO_16H_ANTI_IGM_STIM_BCE...          0.495449   
25  GSE30971_2H_VS_4H_LPS_STIM_MACROPHAGE_WBP7_HET_DN          0.495449   
26  GSE30971_CTRL_VS_LPS_STIM_MACROPHAGE_WBP7_HET_...          0.495449   
27  GSE30971_CTRL_VS_LPS_STIM_MACROPHAGE_WBP7_KO_2...          0.495449   
28  GSE30971_CTRL_VS_LPS_STIM_MACROPHAGE_WBP7_KO_4...          0.495449   
29  GSE34156_UNTREATED_VS_6H_TLR1_TLR2_LIGAND_TREA...          0.495449   
30  GSE37416_CTRL_VS_0H_F_TULARENSIS_LVS_NEUTROPHI...          0.495449   
31    GSE9988_ANTI_TREM1_VS_CTRL_TREATED_MONOCYTES_UP          0.495449   
32  GSE9988_ANTI_TREM1_VS_VEHICLE_TREATED_MONOCYTE...          0.495449   

                 Gene_se



                               Term  Adjusted P-value               Gene_set
209  RICKMAN_HEAD_AND_NECK_CANCER_E          0.001276  CUSTOM140535618018432
Topic 18
                                          Term  Adjusted P-value  \
0  interleukin-1 receptor binding (GO:0005149)           0.22586   

                     Gene_set  
0  GO_Molecular_Function_2018  
Topic 19




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 20




                                             Term  Adjusted P-value  \
446                       REACTOME_KERATINIZATION      3.686663e-62   
1383                            GO_KERATINIZATION      2.262118e-57   
1385              GO_KERATINOCYTE_DIFFERENTIATION      1.485640e-48   
1386                          GO_KERATIN_FILAMENT      1.070029e-45   
1319            GO_EPIDERMAL_CELL_DIFFERENTIATION      4.405385e-43   
1377        GO_INTERMEDIATE_FILAMENT_CYTOSKELETON      9.513905e-42   
1677                          GO_SKIN_DEVELOPMENT      2.848780e-39   
1320                     GO_EPIDERMIS_DEVELOPMENT      3.893461e-38   
1323           GO_EPITHELIAL_CELL_DIFFERENTIATION      3.996480e-19   
25      keratinocyte differentiation (GO:0030216)      1.201490e-12   
1521              GO_POLYMERIC_CYTOSKELETAL_FIBER      1.218149e-12   
26    epidermal cell differentiation (GO:0009913)      3.108105e-12   
27                  skin development (GO:0043588)      4.377544e-12   
28    



                                                  Term  Adjusted P-value  \
12   complement activation, classical pathway (GO:0...      3.416553e-74   
13   humoral immune response mediated by circulatin...      4.382452e-74   
14   regulation of protein activation cascade (GO:2...      3.681734e-53   
15    regulation of complement activation (GO:0030449)      4.108519e-53   
16   regulation of humoral immune response (GO:0002...      1.541908e-52   
17   regulation of immune effector process (GO:0002...      1.871344e-52   
18   regulation of acute inflammatory response (GO:...      2.001734e-51   
19       regulation of protein processing (GO:0070613)      1.843626e-50   
20   Fc-gamma receptor signaling pathway involved i...      8.029318e-50   
21    Fc-gamma receptor signaling pathway (GO:0038094)      9.846011e-50   
22   Fc receptor mediated stimulatory signaling pat...      1.216260e-49   
0      serine-type endopeptidase activity (GO:0004252)      9.077423e-43   
23          



                                                  Term  Adjusted P-value  \
11             thyroid hormone generation (GO:0006590)          0.000055   
50   Aplasia/Hypoplasia of the abdominal wall muscu...          0.004144   
353               GO_THYROID_HORMONE_METABOLIC_PROCESS          0.007937   
352                      GO_THYROID_HORMONE_GENERATION          0.007937   
52                 Coarse facial features (HP:0000280)          0.018047   
51                         Hypothyroidism (HP:0000821)          0.021804   
53                           Constipation (HP:0002019)          0.030819   
110                    REACTOME_THYROXINE_BIOSYNTHESIS          0.037516   
103      REACTOME_METABOLISM_OF_AMINE_DERIVED_HORMONES          0.062540   
54                                 Goiter (HP:0000853)          0.068197   
108   REACTOME_SODIUM_COUPLED_PHOSPHATE_COTRANSPORTERS          0.351183   
100  REACTOME_DISEASES_ASSOCIATED_WITH_SURFACTANT_M...          0.417231   
105         



                                                   Term  Adjusted P-value  \
349                       specific granule (GO:0042581)          0.001264   
350                 specific granule lumen (GO:0035580)          0.003863   
46            neutrophil mediated immunity (GO:0002446)          0.004905   
45    neutrophil activation involved in immune respo...          0.006878   
44                neutrophil degranulation (GO:0043312)          0.012852   
351                       tertiary granule (GO:0070820)          0.111240   
47    granzyme-mediated apoptotic signaling pathway ...          0.116462   
1748                                         MODULE_575          0.172525   
48         regulation of hormone secretion (GO:0046883)          0.406036   
50    negative regulation of lipid catabolic process...          0.459199   

                        Gene_set  
349   GO_Cellular_Component_2018  
350   GO_Cellular_Component_2018  
46    GO_Biological_Process_2018  
45    GO_Bio



                                                  Term  Adjusted P-value  \
558                      primary lysosome (GO:0005766)          0.198991   
559  very-low-density lipoprotein particle (GO:0034...          0.229774   

                       Gene_set  
558  GO_Cellular_Component_2018  
559  GO_Cellular_Component_2018  
Topic 25




                                                  Term  Adjusted P-value  \
38                                             chr9q32          0.179958   
37                                            chr20p13          0.213717   
36                                            chr17p11          0.232631   
217                    GSE27786_LSK_VS_ERYTHROBLAST_DN          0.241346   
218                    GSE27786_LSK_VS_LIN_NEG_CELL_DN          0.241346   
219                 GSE31082_DP_VS_CD8_SP_THYMOCYTE_DN          0.241346   
220  GSE33292_DN3_THYMOCYTE_VS_TCF1_KO_TCELL_LYMPHO...          0.241346   
221  GSE3720_UNSTIM_VS_LPS_STIM_VD1_GAMMADELTA_TCEL...          0.241346   
222  GSE3720_VD1_VS_VD2_GAMMADELTA_TCELL_WITH_PMA_S...          0.241346   
223       GSE39820_CTRL_VS_IL1B_IL6_IL23A_CD4_TCELL_DN          0.241346   
224   GSE39820_CTRL_VS_TGFBETA1_IL6_IL23A_CD4_TCELL_DN          0.241346   
225  GSE39820_IL1B_IL6_VS_IL1B_IL6_IL23A_TREATED_CD...          0.241346   
226       GS



                                                   Term  Adjusted P-value  \
425                secretory granule lumen (GO:0034774)          0.000071   
1513                                          GNF2_MATK          0.044727   
1517                                         GNF2_PTPN4          0.044727   
1507                                         GNF2_IL2RB          0.044727   
1496                                           GNF2_CD7          0.044727   
1530                                         GNF2_ZAP70          0.045416   
56    positive regulation of apoptotic process (GO:0...          0.064376   
55            neutrophil mediated immunity (GO:0002446)          0.065862   
59    interleukin-2-mediated signaling pathway (GO:0...          0.069841   
1520                                        GNF2_RAB7L1          0.076151   
61    interleukin-15-mediated signaling pathway (GO:...          0.076909   
58      cellular response to interleukin-2 (GO:0071352)          0.081482   



                                                  Term  Adjusted P-value  \
737                 Chemokine signaling pathway WP3929          0.000659   
99                  inflammatory response (GO:0006954)          0.000913   
1    phosphatidylinositol-3,4-bisphosphate binding ...          0.041601   
0             chemokine receptor activity (GO:0004950)          0.055074   
738             B Cell Receptor Signaling Pathway WP23          0.078778   
101   cytokine-mediated signaling pathway (GO:0019221)          0.088814   
100  cellular response to tumor necrosis factor (GO...          0.130630   
4              chemokine receptor binding (GO:0042379)          0.142546   
3                      chemokine activity (GO:0008009)          0.147882   
2    superoxide-generating NADPH oxidase activator ...          0.154917   
102  cellular response to cytokine stimulus (GO:007...          0.195454   
106  positive regulation of cytokine biosynthetic p...          0.216495   
107         



                                                  Term  Adjusted P-value  \
292                                          GNF2_CDH3          0.386014   
293                                      GNF2_SERPINB5          0.386014   
302                                         MODULE_514          0.386014   
515  GSE40274_CTRL_VS_FOXP3_AND_IRF4_TRANSDUCED_ACT...          0.424664   
514       GSE3994_WT_VS_PAC1_KO_ACTIVATED_MAST_CELL_UP          0.424664   
513  GSE37605_FOXP3_FUSION_GFP_VS_IRES_GFP_TREG_NOD_DN          0.424664   
512  GSE36891_UNSTIM_VS_POLYIC_TLR3_STIM_PERITONEAL...          0.424664   
511  GSE36891_POLYIC_TLR3_VS_PAM_TLR2_STIM_PERITONE...          0.424664   
510                   GSE360_L_MAJOR_VS_T_GONDII_DC_DN          0.424664   
509             GSE360_L_MAJOR_VS_M_TUBERCULOSIS_DC_DN          0.424664   
508         GSE360_L_MAJOR_VS_B_MALAYI_HIGH_DOSE_DC_DN          0.424664   
507                                GSE360_DC_VS_MAC_UP          0.424664   
506         



                                                   Term  Adjusted P-value  \
26         regulation of B cell activation (GO:0050864)      1.640474e-15   
0          immunoglobulin receptor binding (GO:0034987)      5.739320e-15   
28    humoral immune response mediated by circulatin...      1.695227e-14   
27    complement activation, classical pathway (GO:0...      2.081658e-14   
32       B cell receptor signaling pathway (GO:0050853)      6.485834e-14   
31                phagocytosis, engulfment (GO:0006911)      7.566806e-14   
30    positive regulation of lymphocyte activation (...      9.080167e-14   
29            plasma membrane invagination (GO:0099024)      1.016323e-13   
33    positive regulation of B cell activation (GO:0...      2.629431e-13   
34                            phagocytosis (GO:0006909)      9.711620e-13   
1518                          GO_IMMUNOGLOBULIN_COMPLEX      8.772568e-11   
35           defense response to bacterium (GO:0042742)      5.365300e-10   



    Term  Adjusted P-value               Gene_set
181   MT          0.024682  CUSTOM140535826321072
Topic 31




                                                  Term  Adjusted P-value  \
465                                     NOTCH_DN.V1_UP          0.143753   
306                                         MODULE_328          0.157460   
302                                         MODULE_267          0.157460   
301                                         MODULE_215          0.157460   
300                                         MODULE_214          0.167697   
298                                        GNF2_TM4SF2          0.167697   
305                                         MODULE_316          0.341355   
304                                         MODULE_274          0.341355   
107                           REACTOME_SYNTHESIS_OF_PE          0.400021   
53                 CHEMELLO_SOLEUS_VS_EDL_MYOFIBERS_DN          0.400021   
70                  KIM_MYCL1_AMPLIFICATION_TARGETS_UP          0.400021   
80                 MCCOLLUM_GELDANAMYCIN_RESISTANCE_UP          0.400021   
85          



                                                Term  Adjusted P-value  \
405                         microvillus (GO:0005902)          0.103543   
404                         Golgi lumen (GO:0005796)          0.192674   
406  intermediate filament cytoskeleton (GO:0045111)          0.318968   

                       Gene_set  
405  GO_Cellular_Component_2018  
404  GO_Cellular_Component_2018  
406  GO_Cellular_Component_2018  
Topic 33




                                            Term  Adjusted P-value  \
652                                   GNF2_MKI67      4.321722e-07   
646                                   GNF2_ESPL1      5.525894e-07   
640                                    GNF2_CDC2      1.962435e-06   
639                                   GNF2_CCNB2      1.962435e-06   
643                                   GNF2_CENPF      1.962435e-06   
641                                   GNF2_CDC20      1.962435e-06   
638                                   GNF2_CCNA2      2.148685e-06   
654                                    GNF2_PCNA      2.148685e-06   
261               KANG_DOXORUBICIN_RESISTANCE_UP      3.563245e-05   
663                                     GNF2_TTK      6.189996e-05   
659                                    GNF2_RRM2      6.189996e-05   
166                CROONQUIST_IL6_DEPRIVATION_DN      8.967364e-05   
436              SMID_BREAST_CANCER_LUMINAL_A_DN      8.967364e-05   
337          NAKAYAM



                                                  Term  Adjusted P-value  \
66                     muscle contraction (GO:0006936)      4.464643e-10   
67   positive regulation of skeletal muscle tissue ...      1.541065e-05   
68   positive regulation of skeletal muscle fiber d...      2.322455e-04   
69   regulation of skeletal muscle fiber developmen...      3.044132e-04   
70     skeletal muscle tissue development (GO:0007519)      9.863383e-04   
71   skeletal muscle cell differentiation (GO:0035914)      1.267082e-03   
72                     myofibril assembly (GO:0030239)      1.510794e-03   
73   positive regulation of myotube differentiation...      2.405971e-03   
364                             myofibril (GO:0030016)      2.885794e-03   
362         striated muscle thin filament (GO:0005865)      2.899139e-03   
74            striated muscle contraction (GO:0006941)      3.371273e-03   
363                     contractile fiber (GO:0043292)      3.852917e-03   
75       str



                                                 Term  Adjusted P-value  \
32  GSE24972_WT_VS_IRF8_KO_MARGINAL_ZONE_SPLEEN_BC...          0.066689   
34                  GSE27786_BCELL_VS_ERYTHROBLAST_DN          0.066689   
35              GSE27786_CD8_TCELL_VS_ERYTHROBLAST_DN          0.066689   
36                 GSE27786_NKCELL_VS_ERYTHROBLAST_DN          0.066689   
37                     GSE27786_NKCELL_VS_MONO_MAC_DN          0.066689   
38                GSE27786_NKTCELL_VS_ERYTHROBLAST_DN          0.066689   
39                      GSE2826_WT_VS_BTK_KO_BCELL_DN          0.066689   
40         GSE32423_IL7_VS_IL7_IL4_NAIVE_CD8_TCELL_UP          0.066689   
33  GSE2770_IL12_AND_TGFB_VS_IL4_TREATED_ACT_CD4_T...          0.066689   
41            GSE32901_NAIVE_VS_TH17_NEG_CD4_TCELL_UP          0.066689   
43  GSE34156_UNTREATED_VS_6H_NOD2_AND_TLR1_TLR2_LI...          0.066689   
44  GSE3720_UNSTIM_VS_LPS_STIM_VD1_GAMMADELTA_TCEL...          0.066689   
45    GSE37301_MULTIPOTEN



                                                  Term  Adjusted P-value  \
703                                      RELA_DN.V1_DN          0.290069   
698                                      LEF1_UP.V1_DN          0.290069   
865   GSE2770_IL12_VS_IL4_TREATED_ACT_CD4_TCELL_48H_UP          0.292008   
864   GSE2770_IL12_AND_TGFB_ACT_VS_ACT_CD4_TCELL_6H_DN          0.292008   
863  GSE27241_CTRL_VS_DIGOXIN_TREATED_CD4_TCELL_IN_...          0.292008   
862                    GSE2706_UNSTIM_VS_8H_R848_DC_DN          0.292008   
861                     GSE2706_UNSTIM_VS_8H_LPS_DC_DN          0.292008   
860            GSE2706_UNSTIM_VS_8H_LPS_AND_R848_DC_DN          0.292008   
859                    GSE2706_UNSTIM_VS_2H_R848_DC_DN          0.292008   
858                     GSE2706_UNSTIM_VS_2H_LPS_DC_DN          0.292008   
857            GSE2706_UNSTIM_VS_2H_LPS_AND_R848_DC_DN          0.292008   
856           GSE26928_NAIVE_VS_CXCR5_POS_CD4_TCELL_DN          0.292008   
855    GSE26



          Term  Adjusted P-value               Gene_set
144  MODULE_59          0.309071  CUSTOM140535618216496
145   MORF_IL9          0.472896  CUSTOM140535618216496
Topic 38




                                                  Term  Adjusted P-value  \
116                                         MODULE_571          0.244888   
113                                            CAR_HPX          0.273380   
114                                         CAR_IGFBP1          0.273380   
115                                         MODULE_164          0.273380   
196                                       P53_DN.V2_DN          0.485983   
195                                       NRL_DN.V1_DN          0.485983   
194                               KRAS.BREAST_UP.V1_DN          0.485983   
193                                  KRAS.600_UP.V1_DN          0.485983   
197                                       P53_DN.V2_UP          0.485983   
189                                      CTIP_DN.V1_DN          0.485983   
188                                       ATM_DN.V1_UP          0.485983   
187                                       ALK_DN.V1_DN          0.485983   
191         



                                                  Term  Adjusted P-value  \
156                                Goiter (HP:0000853)          0.000029   
157  Aplasia/Hypoplasia of the abdominal wall muscu...          0.006236   
158    Thyroid-stimulating hormone excess (HP:0002925)          0.007537   
160                Coarse facial features (HP:0000280)          0.032231   
159                        Hypothyroidism (HP:0000821)          0.032494   
162                      Hyperpituitarism (HP:0010514)          0.053151   
161                          Constipation (HP:0002019)          0.060782   
206                                           chr14q13          0.253709   
207                                           chr14q31          0.253709   

                     Gene_set  
156  Human_Phenotype_Ontology  
157  Human_Phenotype_Ontology  
158  Human_Phenotype_Ontology  
160  Human_Phenotype_Ontology  
159  Human_Phenotype_Ontology  
162  Human_Phenotype_Ontology  
161  Human_Phen



                                                 Term  Adjusted P-value  \
0                                             chr1p32          0.049016   
7           GSE13762_CTRL_VS_125_VITAMIND_DAY12_DC_DN          0.057019   
8   GSE40274_FOXP3_VS_FOXP3_AND_SATB1_TRANSDUCED_A...          0.057019   
9   GSE40274_FOXP3_VS_FOXP3_AND_XBP1_TRANSDUCED_AC...          0.057019   
10           GSE6674_UNSTIM_VS_ANTI_IGM_STIM_BCELL_DN          0.057019   
11  GSE9601_UNTREATED_VS_NFKB_INHIBITOR_TREATED_HC...          0.057019   
1                                          MIR3059_5P          0.122841   
2                                         MIR4524A_3P          0.122841   
3                                           MIR504_3P          0.122841   
5                                             MIR6079          0.122841   
6                                          MIR892C_5P          0.122841   
4                     MIR548AA_MIR548AP_3P_MIR548T_3P          0.182728   

                 Gene_se



                                              Term  Adjusted P-value  \
309                        REACTOME_KERATINIZATION      7.603704e-13   
932                              GO_KERATINIZATION      3.359906e-12   
934                GO_KERATINOCYTE_DIFFERENTIATION      7.579259e-11   
305   REACTOME_FORMATION_OF_THE_CORNIFIED_ENVELOPE      3.258584e-10   
879              GO_EPIDERMAL_CELL_DIFFERENTIATION      1.868545e-09   
1124                           GO_SKIN_DEVELOPMENT      4.403985e-09   
853                               GO_CORNIFICATION      1.184551e-07   
880                       GO_EPIDERMIS_DEVELOPMENT      2.180857e-07   
935                            GO_KERATIN_FILAMENT      1.838365e-06   
922          GO_INTERMEDIATE_FILAMENT_CYTOSKELETON      3.485990e-06   
19              peptide cross-linking (GO:0018149)      7.964574e-05   
18                   skin development (GO:0043588)      1.347127e-04   
883             GO_EPITHELIAL_CELL_DIFFERENTIATION      1.416806



                  Term  Adjusted P-value               Gene_set
759  PDGF_ERK_DN.V1_UP          0.409465  CUSTOM140535516602528
Topic 43




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 44




                                                  Term  Adjusted P-value  \
311  Oligodendrocyte Specification and differentiat...          0.092381   

                    Gene_set  
311  WikiPathways_2019_Human  
Topic 45




                                                  Term  Adjusted P-value  \
801                                 HINATA_NFKB_MATRIX          0.177937   
272                                        GNF2_CDKN1C          0.351246   
273                                        GNF2_IGFBP1          0.351246   
274                                         GNF2_KISS1          0.351246   
275                                         GNF2_MMP11          0.351246   
276                                         GNF2_TIMP2          0.351246   
279                                         MODULE_298          0.351246   
292                                          MODULE_68          0.351246   
837  GSE22935_UNSTIM_VS_12H_MBOVIS_BCG_STIM_MYD88_K...          0.383451   
856  GSE32034_UNTREATED_VS_ROSIGLIZATONE_TREATED_LY...          0.383451   
857  GSE3203_WT_VS_IFNAR1_KO_INFLUENZA_INFECTED_LN_...          0.383451   
858              GSE32423_MEMORY_VS_NAIVE_CD8_TCELL_UP          0.383451   
861         



                                     Term  Adjusted P-value  \
93   azurophil granule lumen (GO:0035578)          0.065206   
95            vacuolar lumen (GO:0005775)          0.119755   
94         azurophil granule (GO:0042582)          0.157897   
162                              chr14q11          0.217396   

                       Gene_set  
93   GO_Cellular_Component_2018  
95   GO_Cellular_Component_2018  
94   GO_Cellular_Component_2018  
162       CUSTOM140535668492560  
Topic 47




                                                  Term  Adjusted P-value  \
74                                            chr16p12          0.360319   
75                                            chr17q12          0.360319   
76                                             chr1p22          0.360319   
77                                             chr2p24          0.360319   
79                                             chr7q31          0.360319   
80                                             chr8p21          0.360319   
803   GSE29949_MICROGLIA_BRAIN_VS_CD8_NEG_DC_SPLEEN_UP          0.383451   
804       GSE3039_CD4_TCELL_VS_ALPHAALPHA_CD8_TCELL_DN          0.383451   
805              GSE32034_LY6C_HIGH_VS_LOW_MONOCYTE_UP          0.383451   
806           GSE32423_CTRL_VS_IL7_MEMORY_CD8_TCELL_DN          0.383451   
807              GSE32901_TH1_VS_TH17_NEG_CD4_TCELL_DN          0.383451   
808  GSE33162_HDAC3_KO_VS_HDAC3_KO_4H_LPS_STIM_MACR...          0.383451   
809  GSE3415



                                            Term  Adjusted P-value  \
160  Abnormality of abdominal situs (HP:0011620)          0.020942   
242             Dopaminergic Neurogenesis WP2855          0.035498   
241         Mesodermal Commitment Pathway WP2857          0.039151   
159        Abdominal situs inversus (HP:0003363)          0.041883   
240              Endoderm Differentiation WP2853          0.069227   
161                   Hydrocephalus (HP:0000238)          0.212563   
163       Tracheoesophageal fistula (HP:0002575)          0.389729   
162               Holoprosencephaly (HP:0001360)          0.472603   

                     Gene_set  
160  Human_Phenotype_Ontology  
242   WikiPathways_2019_Human  
241   WikiPathways_2019_Human  
159  Human_Phenotype_Ontology  
240   WikiPathways_2019_Human  
161  Human_Phenotype_Ontology  
163  Human_Phenotype_Ontology  
162  Human_Phenotype_Ontology  
Topic 49




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 50




                                   Term  Adjusted P-value  \
9   sarcomere organization (GO:0045214)          0.424243   
10      myofibril assembly (GO:0030239)          0.491290   

                      Gene_set  
9   GO_Biological_Process_2018  
10  GO_Biological_Process_2018  
Topic 51




                                                  Term  Adjusted P-value  \
17                                                  MT          0.170334   
21                                             chr2p12          0.170334   
382   GSE30971_2H_VS_4H_LPS_STIM_MACROPHAGE_WBP7_KO_UP          0.292008   
383            GSE32901_NAIVE_VS_TH17_NEG_CD4_TCELL_DN          0.292008   
384            GSE32901_NAIVE_VS_TH17_NEG_CD4_TCELL_UP          0.292008   
385                 GSE32901_NAIVE_VS_TH1_CD4_TCELL_DN          0.292008   
386    GSE32901_TH17_EMRICHED_VS_TH17_NEG_CD4_TCELL_UP          0.292008   
387              GSE32901_TH1_VS_TH17_NEG_CD4_TCELL_DN          0.292008   
388     GSE32986_UNSTIM_VS_CURDLAN_HIGHDOSE_STIM_DC_DN          0.292008   
389  GSE32986_UNSTIM_VS_GMCSF_AND_CURDLAN_LOWDOSE_S...          0.292008   
390  GSE33292_DN3_THYMOCYTE_VS_TCF1_KO_TCELL_LYMPHO...          0.292008   
391            GSE360_CTRL_VS_B_MALAYI_LOW_DOSE_MAC_UP          0.292008   
392         



                                                  Term  Adjusted P-value  \
0        aspartic-type peptidase activity (GO:0070001)      5.577320e-07   
1    aspartic-type endopeptidase activity (GO:0004190)      7.000448e-07   
76              multivesicular body lumen (GO:0097486)      5.317282e-06   
77                    late endosome lumen (GO:0031906)      6.375090e-06   
23        macromolecule catabolic process (GO:0009057)      4.782611e-05   
78                    multivesicular body (GO:0005771)      1.832305e-04   
24   organonitrogen compound catabolic process (GO:...      1.875668e-04   
25              protein catabolic process (GO:0030163)      5.602275e-04   
625                       GO_MULTIVESICULAR_BODY_LUMEN      9.626368e-04   
452                GO_ASPARTIC_TYPE_PEPTIDASE_ACTIVITY      9.626368e-04   
594                             GO_LATE_ENDOSOME_LUMEN      1.111624e-03   
524                                       GO_DIGESTION      3.146150e-03   
3    sodium:



                                             Term  Adjusted P-value  \
233                   Nail dystrophy (HP:0008404)          0.000672   
234                    Hypotrichosis (HP:0001006)          0.002554   
235                        Fine hair (HP:0002213)          0.030823   
236                     Erythroderma (HP:0001019)          0.104836   
237                     Brittle hair (HP:0002299)          0.105277   
239  Abnormality of hair growth rate (HP:0011363)          0.120868   
240         Palmoplantar keratoderma (HP:0000982)          0.129425   
238                Slow-growing hair (HP:0002217)          0.141013   
241                  Corneal erosion (HP:0200020)          0.200818   
242                     Hypohidrosis (HP:0000966)          0.367493   

                     Gene_set  
233  Human_Phenotype_Ontology  
234  Human_Phenotype_Ontology  
235  Human_Phenotype_Ontology  
236  Human_Phenotype_Ontology  
237  Human_Phenotype_Ontology  
239  Human_Phenotype_Ontol



                                                  Term  Adjusted P-value  \
112                                            chr1p12          0.157396   
91                        Amino Acid metabolism WP3925          0.212348   
90                       PPAR signaling pathway WP3942          0.230777   
92    Synthesis and Degradation of Ketone Bodies WP311          0.392973   
93   Glucocorticoid and Mineralcorticoid Metabolism...          0.471250   

                    Gene_set  
112    CUSTOM140535369504560  
91   WikiPathways_2019_Human  
90   WikiPathways_2019_Human  
92   WikiPathways_2019_Human  
93   WikiPathways_2019_Human  
Topic 55




                                                  Term  Adjusted P-value  \
118                            KORKOLA_CHORIOCARCINOMA          0.396367   
140          PYEON_CANCER_HEAD_AND_NECK_VS_CERVICAL_DN          0.396367   
142      REACTOME_ACYL_CHAIN_REMODELING_OF_DAG_AND_TAG          0.396367   
144                    REACTOME_ANTIMICROBIAL_PEPTIDES          0.396367   
145                            REACTOME_BETA_DEFENSINS          0.396367   
147                                 REACTOME_DEFENSINS          0.396367   
159  REACTOME_METAL_SEQUESTRATION_BY_ANTIMICROBIAL_...          0.396367   
160            REACTOME_MINERALOCORTICOID_BIOSYNTHESIS          0.396367   
163                     RICKMAN_HEAD_AND_NECK_CANCER_C          0.396367   
180                              WANG_THOC1_TARGETS_UP          0.396367   
99                      AUJLA_IL22_AND_IL17A_SIGNALING          0.439615   
141                       PYEON_HPV_POSITIVE_TUMORS_DN          0.439615   
143         



       Term  Adjusted P-value               Gene_set
33  chr4q33          0.402003  CUSTOM140535574249552
Topic 57




                            Term  Adjusted P-value               Gene_set
1212  GO_T_CELL_RECEPTOR_COMPLEX          0.001177  CUSTOM140535129409008
Topic 58




                            Term  Adjusted P-value               Gene_set
214                     chr14q11          0.000941  CUSTOM140535483311040
218                      chr7q34          0.024784  CUSTOM140535483311040
1194  GO_T_CELL_RECEPTOR_COMPLEX          0.144116  CUSTOM140535483310240
Topic 59




                                                 Term  Adjusted P-value  \
31                 epidermis development (GO:0008544)      4.681405e-14   
1056                                      GNF2_SPRR1B      1.768877e-12   
148                intermediate filament (GO:0005882)      2.318909e-07   
32                 peptide cross-linking (GO:0018149)      1.518408e-06   
149   intermediate filament cytoskeleton (GO:0045111)      1.705710e-06   
33          keratinocyte differentiation (GO:0030216)      4.169812e-06   
1123                                        MODULE_68      4.260336e-06   
1090                                       MODULE_298      4.260336e-06   
34        epidermal cell differentiation (GO:0009913)      6.841627e-06   
170      Abnormal blistering of the skin (HP:0008066)      1.247676e-05   
1066                                       MODULE_153      2.366645e-05   
35                      skin development (GO:0043588)      4.283187e-05   
1270                     



                                             Term  Adjusted P-value  \
14             epidermis development (GO:0008544)          0.002234   
695                             GO_KERATINIZATION          0.007443   
696               GO_KERATINOCYTE_DIFFERENTIATION          0.007443   
855                           GO_SKIN_DEVELOPMENT          0.007443   
660             GO_EPIDERMAL_CELL_DIFFERENTIATION          0.009106   
661                      GO_EPIDERMIS_DEVELOPMENT          0.009106   
693         GO_INTERMEDIATE_FILAMENT_CYTOSKELETON          0.009106   
229                       REACTOME_KERATINIZATION          0.012903   
697                           GO_KERATIN_FILAMENT          0.049515   
638                              GO_CORNIFICATION          0.082641   
142                 HOLLERN_SQUAMOUS_BREAST_TUMOR          0.147786   
210                       ONDER_CDH1_TARGETS_3_DN          0.208633   
226  REACTOME_FORMATION_OF_THE_CORNIFIED_ENVELOPE          0.208633   
208   



Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 62




                                                  Term  Adjusted P-value  \
307                                          GNF2_CDH3          0.013723   
308                                      GNF2_SERPINB5          0.013723   
475                    SINGH_KRAS_DEPENDENCY_SIGNATURE          0.013919   
312                                         MODULE_154          0.043293   
315                                         MODULE_297          0.043293   
316                                         MODULE_357          0.043293   
319                                         MODULE_385          0.150448   
320                                         MODULE_488          0.150448   
311                                         MODULE_139          0.341207   
441                       GO_REGULATION_OF_GROWTH_RATE          0.357258   
420  GO_POSITIVE_REGULATION_OF_FIBROBLAST_GROWTH_FA...          0.357258   
410                 GO_POLYPHOSPHATE_METABOLIC_PROCESS          0.357258   
398         



Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 64




                                                  Term  Adjusted P-value  \
65                          lamellar body (GO:0042599)          0.021197   
508                GO_ANTIMICROBIAL_PEPTIDE_PRODUCTION          0.053923   
730    GO_REGULATION_OF_ANTIMICROBIAL_HUMORAL_RESPONSE          0.053923   
677  GO_POSITIVE_REGULATION_OF_DEFENSE_RESPONSE_TO_...          0.106700   
18                  epidermis development (GO:0008544)          0.115105   
618                                   GO_LAMELLAR_BODY          0.219321   
686  GO_POSITIVE_REGULATION_OF_HUMORAL_IMMUNE_RESPONSE          0.219321   
743     GO_REGULATION_OF_DEFENSE_RESPONSE_TO_BACTERIUM          0.219321   
499                                    GO_AMELOGENESIS          0.316455   

                       Gene_set  
65   GO_Cellular_Component_2018  
508       CUSTOM140535130326112  
730       CUSTOM140535130326112  
677       CUSTOM140535130326112  
18   GO_Biological_Process_2018  
618       CUSTOM140535130326112  
6



                                                  Term  Adjusted P-value  \
255                                 CYCLIN_D1_UP.V1_DN          0.329317   
256                                       IL2_UP.V1_DN          0.329317   
257                                      MTOR_UP.V1_DN          0.329317   
258                                  PRC1_BMI_UP.V1_UP          0.329317   
275  GSE1460_INTRATHYMIC_T_PROGENITOR_VS_NAIVE_CD4_...          0.339302   
298  GSE28783_ANTI_MIR33_VS_UNTREATED_ATHEROSCLEROS...          0.339302   
297  GSE28737_BCL6_HET_VS_BCL6_KO_MARGINAL_ZONE_BCE...          0.339302   
296         GSE27291_0H_VS_6H_STIM_GAMMADELTA_TCELL_DN          0.339302   
295             GSE2585_AIRE_KO_VS_WT_CD80_LOW_MTEC_UP          0.339302   
294      GSE23321_CENTRAL_MEMORY_VS_NAIVE_CD8_TCELL_DN          0.339302   
293                  GSE22886_NAIVE_VS_MEMORY_TCELL_UP          0.339302   
292  GSE22443_NAIVE_VS_ACT_AND_IL2_TREATED_CD8_TCEL...          0.339302   
299     GSE2



                                                   Term  Adjusted P-value  \
105              cytoplasmic vesicle lumen (GO:0060205)          0.181785   
124                             Hypertrophy Model WP516          0.471098   
128   miRNAs involvement in the immune response in s...          0.483363   
129   Fas Ligand (FasL) pathway and Stress induction...          0.493567   
141                                            chr12q12          0.495655   
143                                            chr21q22          0.495655   
144                                             chr2q14          0.495655   
132                        IL-1 signaling pathway WP195          0.496537   
1031          GSE3982_BCELL_VS_CENT_MEMORY_CD4_TCELL_UP          0.499046   
1032            GSE3982_CENT_MEMORY_CD4_TCELL_VS_TH1_UP          0.499046   
1033               GSE3982_CTRL_VS_LPS_1H_NEUTROPHIL_DN          0.499046   
1037                               GSE3982_DC_VS_TH1_UP          0.499046   



                                               Term  Adjusted P-value  \
466                      GO_TOXIN_METABOLIC_PROCESS          0.222812   
371  GO_NEGATIVE_REGULATION_OF_FIBROBLAST_MIGRATION          0.222812   
358     GO_MATERNAL_PROCESS_INVOLVED_IN_PARTURITION          0.222812   
355   GO_LONG_CHAIN_FATTY_ACID_BIOSYNTHETIC_PROCESS          0.222812   
351                          GO_LIPID_HYDROXYLATION          0.222812   
348              GO_ISOPRENOID_BIOSYNTHETIC_PROCESS          0.222812   
342       GO_HYDROGEN_PEROXIDE_BIOSYNTHETIC_PROCESS          0.222812   
341                 GO_HYALURONAN_METABOLIC_PROCESS          0.222812   
340              GO_HYALURONAN_BIOSYNTHETIC_PROCESS          0.222812   
339                        GO_HSP90_PROTEIN_BINDING          0.222812   
338                        GO_HSP70_PROTEIN_BINDING          0.222812   
335                   GO_HEPATOCYTE_DIFFERENTIATION          0.222812   
330                  GO_FLAVONOID_METABOLIC_PROCESS



                                                  Term  Adjusted P-value  \
577                                         MODULE_357          0.031018   
573                                         MODULE_297          0.031018   
564                                         MODULE_154          0.031018   
556                                          GNF2_CDH3          0.031607   
557                                      GNF2_SERPINB5          0.031607   
56                   Acanthosis nigricans (HP:0000956)          0.072880   
55                      Sparse scalp hair (HP:0002209)          0.112335   
198                               JAEGER_METASTASIS_DN          0.320600   
348             WIEDERSCHAIN_TARGETS_OF_BMI1_AND_PCGF2          0.352884   
306                      SABATES_COLORECTAL_ADENOMA_UP          0.352884   
240                                    LY_AGING_OLD_UP          0.352884   
311  SATO_SILENCED_EPIGENETICALLY_IN_PANCREATIC_CANCER          0.352884   
235         



Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 71




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 72




                                      Term  Adjusted P-value  \
193             Tamoxifen metabolism WP691          0.075063   
194      Vitamin D Receptor Pathway WP2877          0.129072   
195  Nuclear Receptors Meta-Pathway WP2882          0.432024   

                    Gene_set  
193  WikiPathways_2019_Human  
194  WikiPathways_2019_Human  
195  WikiPathways_2019_Human  
Topic 73




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 74




                                                 Term  Adjusted P-value  \
49                                          MODULE_59          0.024527   
48                                         MODULE_247          0.024527   
51                      GO_ALPHA_GLUCOSIDASE_ACTIVITY          0.031984   
65                        GO_POLYSACCHARIDE_DIGESTION          0.031984   
60                            GO_GLUCOSIDASE_ACTIVITY          0.049686   
16         REACTOME_DIGESTION_OF_DIETARY_CARBOHYDRATE          0.062995   
9                                 LIU_CDX2_TARGETS_UP          0.062995   
10              MYLLYKANGAS_AMPLIFICATION_HOT_SPOT_17          0.062995   
11              MYLLYKANGAS_AMPLIFICATION_HOT_SPOT_24          0.062995   
18       REACTOME_DISEASES_OF_CARBOHYDRATE_METABOLISM          0.062995   
14                                 REACTOME_DIGESTION          0.062995   
15                  REACTOME_DIGESTION_AND_ABSORPTION          0.062995   
5                        



                                             Term  Adjusted P-value  \
185                                       CAR_HPX          0.051782   
87         LOPEZ_MESOTHELIOMA_SURVIVAL_OVERALL_UP          0.154389   
52                     CAIRO_LIVER_DEVELOPMENT_DN          0.154389   
94                MIKKELSEN_IPS_LCP_WITH_H3K27ME3          0.154389   
67                 HOLLERN_PAPILLARY_BREAST_TUMOR          0.154389   
85                 LOPEZ_EPITHELIOID_MESOTHELIOMA          0.154389   
86             LOPEZ_MESOTELIOMA_SURVIVAL_TIME_DN          0.154389   
124                       RUAN_RESPONSE_TO_TNF_UP          0.154389   
123          RUAN_RESPONSE_TO_TNF_TROGLITAZONE_UP          0.154389   
40                       BIOCARTA_GHRELIN_PATHWAY          0.154389   
69                     HSIAO_LIVER_SPECIFIC_GENES          0.154389   
135                        TSUTSUMI_FBXW8_TARGETS          0.154389   
136                        VERNOCHET_ADIPOGENESIS          0.154389   
145   



                                                  Term  Adjusted P-value  \
564       GSE10239_MEMORY_VS_KLRG1INT_EFF_CD8_TCELL_UP          0.499046   
716              GSE360_DC_VS_MAC_B_MALAYI_LOW_DOSE_UP          0.499046   
717                     GSE360_DC_VS_MAC_L_DONOVANI_UP          0.499046   
718                 GSE360_DC_VS_MAC_M_TUBERCULOSIS_DN          0.499046   
719  GSE360_HIGH_DOSE_B_MALAYI_VS_M_TUBERCULOSIS_DC_DN          0.499046   
720             GSE360_HIGH_VS_LOW_DOSE_B_MALAYI_DC_UP          0.499046   
721            GSE360_HIGH_VS_LOW_DOSE_B_MALAYI_MAC_UP          0.499046   
722   GSE360_LOW_DOSE_B_MALAYI_VS_M_TUBERCULOSIS_DC_UP          0.499046   
723      GSE360_L_DONOVANI_VS_B_MALAYI_HIGH_DOSE_DC_UP          0.499046   
724     GSE360_L_DONOVANI_VS_B_MALAYI_HIGH_DOSE_MAC_UP          0.499046   
725       GSE360_L_DONOVANI_VS_B_MALAYI_LOW_DOSE_DC_UP          0.499046   
726      GSE360_L_DONOVANI_VS_B_MALAYI_LOW_DOSE_MAC_UP          0.499046   
727         



                                                   Term  Adjusted P-value  \
1117                                          GNF2_DNM1          0.020718   
1122                                        GNF2_TM4SF2          0.020718   
1119                                         GNF2_RAB3A          0.022621   
1164                                         MODULE_563          0.049426   
40    anterograde trans-synaptic signaling (GO:0098916)          0.167678   
41          chemical synaptic transmission (GO:0007268)          0.172893   
1118                                          GNF2_MAPT          0.236003   
1158                                          MODULE_49          0.236003   
187                     Synaptic Vesicle Pathway WP2267          0.352927   
1959                                     CAHOY_NEURONAL          0.450448   
1965                                   KRAS.50_UP.V1_UP          0.450448   
1969                               KRAS.KIDNEY_UP.V1_UP          0.462931   



Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 79




                                              Term  Adjusted P-value  \
168                                     MODULE_153          0.092505   
170                                     MODULE_298          0.092505   
174                                      MODULE_68          0.092505   
165                                    GNF2_SPRR1B          0.092505   
41                                              MT          0.109709   
82      MARIADASON_RESPONSE_TO_CURCUMIN_SULINDAC_5          0.173917   
78           LINDGREN_BLADDER_CANCER_CLUSTER_2A_UP          0.173917   
72                          JIANG_TIP30_TARGETS_DN          0.173917   
71                            JAEGER_METASTASIS_DN          0.173917   
68               HUMMERICH_MALIGNANT_SKIN_TUMOR_UP          0.173917   
106                SMID_BREAST_CANCER_LUMINAL_A_DN          0.173917   
111                 VANTVEER_BREAST_CANCER_ESR1_DN          0.173917   
67                  HUMMERICH_BENIGN_SKIN_TUMOR_UP          0.17



                                              Term  Adjusted P-value  \
28  negative regulation of cell death (GO:0060548)           0.35423   

                      Gene_set  
28  GO_Biological_Process_2018  
Topic 81




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 82




                                           Term  Adjusted P-value  \
1281                                 GNF2_RAB3A          0.014453   
0     clathrin light chain binding (GO:0032051)          0.014728   
1               syntaxin-1 binding (GO:0017075)          0.051287   
1279                                  GNF2_DNM1          0.105271   
34          clathrin coat assembly (GO:0048268)          0.394350   
1283                                GNF2_TM4SF2          0.442827   
1322                                 MODULE_438          0.442827   
1307                                 MODULE_274          0.492252   

                        Gene_set  
1281       CUSTOM140535657640816  
0     GO_Molecular_Function_2018  
1     GO_Molecular_Function_2018  
1279       CUSTOM140535657640816  
34    GO_Biological_Process_2018  
1283       CUSTOM140535657640816  
1322       CUSTOM140535657640816  
1307       CUSTOM140535657640816  
Topic 83




                                                  Term  Adjusted P-value  \
36                            WILLIAMS_ESR1_TARGETS_UP          0.097845   
34            REACTOME_INTERLEUKIN_20_FAMILY_SIGNALING          0.097845   
30                NIKOLSKY_BREAST_CANCER_1Q32_AMPLICON          0.097845   
19                                            chr13q32          0.124722   
138  GSE21063_WT_VS_NFATC1_KO_16H_ANTI_IGM_STIM_BCE...          0.128951   
137  GSE19888_ADENOSINE_A3R_INH_VS_INH_PRETREAT_AND...          0.128951   
136  GSE17974_CTRL_VS_ACT_IL4_AND_ANTI_IL12_1H_CD4_...          0.128951   
139  GSE23505_UNTREATED_VS_4DAY_IL6_IL1_TREATED_CD4...          0.128951   
135   GSE1791_CTRL_VS_NEUROMEDINU_IN_T_CELL_LINE_3H_UP          0.128951   
133                GSE17721_LPS_VS_PAM3CSK4_4H_BMDC_UP          0.128951   
132              GSE17721_LPS_VS_PAM3CSK4_0.5H_BMDC_DN          0.128951   
131             GSE17721_LPS_VS_GARDIQUIMOD_2H_BMDC_UP          0.128951   
130         



                                                 Term  Adjusted P-value  \
1                                             chr1q41          0.108867   
2                                             chr3q22          0.108867   
4                             NABA_BASEMENT_MEMBRANES          0.145639   
5                                      NABA_COLLAGENS          0.145639   
8   REACTOME_ASSEMBLY_OF_COLLAGEN_FIBRILS_AND_OTHE...          0.145639   
9   REACTOME_COLLAGEN_BIOSYNTHESIS_AND_MODIFYING_E...          0.145639   
10              REACTOME_COLLAGEN_CHAIN_TRIMERIZATION          0.145639   
11                      REACTOME_COLLAGEN_DEGRADATION          0.145639   
12                        REACTOME_COLLAGEN_FORMATION          0.145639   
15                         REACTOME_ECM_PROTEOGLYCANS          0.145639   
21                         REACTOME_SIGNALING_BY_PDGF          0.145639   
17        REACTOME_INTEGRIN_CELL_SURFACE_INTERACTIONS          0.145639   
18                       



                                                  Term  Adjusted P-value  \
94                                            chr15q22          0.242613   
95                                             chr2q12          0.242613   
96                                             chr4q32          0.242613   
520                     GSE17721_LPS_VS_CPG_2H_BMDC_UP          0.292008   
547                   GSE30083_SP1_VS_SP2_THYMOCYTE_UP          0.292008   
546  GSE29164_CD8_TCELL_VS_CD8_TCELL_AND_IL12_TREAT...          0.292008   
545  GSE2770_UNTREATED_VS_IL4_TREATED_ACT_CD4_TCELL...          0.292008   
544     GSE27670_CTRL_VS_BLIMP1_TRANSDUCED_GC_BCELL_UP          0.292008   
543      GSE26928_EFF_MEMORY_VS_CXCR5_POS_CD4_TCELL_DN          0.292008   
542                   GSE2585_CD80_HIGH_VS_LOW_MTEC_UP          0.292008   
541  GSE25147_UNSTIM_VS_HELIOBACTER_PYLORI_LPS_STIM...          0.292008   
540  GSE25088_WT_VS_STAT6_KO_MACROPHAGE_ROSIGLITAZO...          0.292008   
539  GSE2463



                                                  Term  Adjusted P-value  \
117                        Hypothyroidism (HP:0000821)          0.001503   
118                                Goiter (HP:0000853)          0.002159   
174      Thyroxine (Thyroid Hormone) Production WP1981          0.002757   
119  Aplasia/Hypoplasia of the abdominal wall muscu...          0.003273   
120                Coarse facial features (HP:0000280)          0.031805   
121                      Umbilical hernia (HP:0001537)          0.040611   
122                          Constipation (HP:0002019)          0.048057   
32              thyroid gland development (GO:0030878)          0.089295   
863               GO_THYROID_HORMONE_METABOLIC_PROCESS          0.118137   
33           endocrine system development (GO:0035270)          0.320572   
123                              Dry skin (HP:0000958)          0.454819   

                       Gene_set  
117    Human_Phenotype_Ontology  
118    Human_Phenot



                                            Term  Adjusted P-value  \
431            Glial Cell Differentiation WP2276          0.032815   
2075                                   GNF2_DNM1          0.114218   
55    neuron projection development (GO:0031175)          0.424894   

                        Gene_set  
431      WikiPathways_2019_Human  
2075       CUSTOM140535773187040  
55    GO_Biological_Process_2018  
Topic 88




                           Term  Adjusted P-value               Gene_set
137  BIOCARTA_RHODOPSIN_PATHWAY          0.195239  CUSTOM140535374643600
Topic 89




                                               Term  Adjusted P-value  \
0  G-protein coupled receptor activity (GO:0004930)          0.023245   

                     Gene_set  
0  GO_Molecular_Function_2018  
Topic 90




Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 91




                                                  Term  Adjusted P-value  \
55                                             chr2p21          0.016274   
31              bicellular tight junction (GO:0005923)          0.034040   
97                         HOLLERN_EMT_BREAST_TUMOR_DN          0.080518   
120                   MCBRYAN_PUBERTAL_BREAST_3_4WK_UP          0.198848   
408                                  BCAT.100_UP.V1_DN          0.259903   
412                                       P53_DN.V1_UP          0.259903   
69                   CAFFAREL_RESPONSE_TO_THC_8HR_3_UP          0.263966   
121                   MCBRYAN_PUBERTAL_BREAST_4_5WK_UP          0.263966   
411                            GCNP_SHH_UP_EARLY.V1_UP          0.272818   
413                                  PRC1_BMI_UP.V1_UP          0.272818   
414                                     RPS14_DN.V1_DN          0.272818   
407                                       ATM_DN.V1_DN          0.272818   
409         



                                                  Term  Adjusted P-value  \
0                       cytokine activity (GO:0005125)          0.042655   
3              chemokine receptor binding (GO:0042379)          0.060247   
2                      chemokine activity (GO:0008009)          0.070748   
1          CCR chemokine receptor binding (GO:0048020)          0.072215   
191                                           chr16q21          0.166463   
14                   neutrophil migration (GO:1990266)          0.214093   
13                 granulocyte chemotaxis (GO:0071621)          0.232809   
12                  neutrophil chemotaxis (GO:0030593)          0.250158   
15           response to interferon-gamma (GO:0034341)          0.257561   
16              response to interleukin-1 (GO:0070555)          0.285895   
11   chemokine-mediated signaling pathway (GO:0070098)          0.300968   
144               azurophil granule lumen (GO:0035578)          0.314991   
10   positiv



            Term  Adjusted P-value               Gene_set
435    GNF2_ANK1          0.437307  CUSTOM140535451849744
436  GNF2_BNIP3L          0.437307  CUSTOM140535451849744
438  GNF2_MAP2K3          0.437307  CUSTOM140535451849744
439    GNF2_PCAF          0.437307  CUSTOM140535451849744
440  GNF2_RAD23A          0.437307  CUSTOM140535451849744
442    GNF2_SPTB          0.437307  CUSTOM140535451849744
443    GNF2_TAL1          0.437307  CUSTOM140535451849744
480   MODULE_539          0.437307  CUSTOM140535451849744
441   GNF2_SPTA1          0.469899  CUSTOM140535451849744
Topic 94




                       Term  Adjusted P-value               Gene_set
38                  chr2p12          0.080948  CUSTOM140535443920528
253  BMI1_DN_MEL18_DN.V1_DN          0.453509  CUSTOM140535574448720
254          MEL18_DN.V1_DN          0.453509  CUSTOM140535574448720
Topic 95




                                                  Term  Adjusted P-value  \
72                                             chr2p11          0.002018   
70                                            chr12q22          0.295934   
73                                             chr3q11          0.295934   
75                                             chr9q32          0.295934   
171                   RUAN_RESPONSE_TO_TROGLITAZONE_DN          0.392272   
164            REACTOME_SCAVENGING_OF_HEME_FROM_PLASMA          0.392272   
163     REACTOME_ROLE_OF_PHOSPHOLIPIDS_IN_PHAGOCYTOSIS          0.392272   
162  REACTOME_ROLE_OF_LAT2_NTAL_LAB_ON_CALCIUM_MOBI...          0.392272   
159                        REACTOME_PARASITE_INFECTION          0.392272   
155          REACTOME_INITIAL_TRIGGERING_OF_COMPLEMENT          0.392272   
149                           REACTOME_FCGR_ACTIVATION          0.392272   
148            REACTOME_FCGR3A_MEDIATED_IL10_SYNTHESIS          0.392272   
145         



                                                  Term  Adjusted P-value  \
12   humoral immune response mediated by circulatin...          0.000180   
11   complement activation, classical pathway (GO:0...          0.000332   
774                          GO_IMMUNOGLOBULIN_COMPLEX          0.000469   
17   regulation of immune effector process (GO:0002...          0.002100   
18   regulation of acute inflammatory response (GO:...          0.002330   
16   regulation of humoral immune response (GO:0002...          0.002365   
15    regulation of complement activation (GO:0030449)          0.002458   
19       regulation of protein processing (GO:0070613)          0.002590   
14   regulation of protein activation cascade (GO:2...          0.002961   
13      B cell receptor signaling pathway (GO:0050853)          0.003395   
21   antigen receptor-mediated signaling pathway (G...          0.032809   
20          regulation of immune response (GO:0050776)          0.032928   
163         



                                                  Term  Adjusted P-value  \
145         GO_STRUCTURAL_CONSTITUENT_OF_MYELIN_SHEATH          0.058559   
140  GO_PROTEIN_LOCALIZATION_TO_PARANODE_REGION_OF_...          0.058559   
137                    GO_PROTEIN_LOCALIZATION_TO_AXON          0.058559   
134  GO_POSITIVE_REGULATION_OF_EXTRINSIC_APOPTOTIC_...          0.058559   
122                      GO_MEMBRANE_RAFT_LOCALIZATION          0.058559   
121                      GO_MEMBRANE_RAFT_DISTRIBUTION          0.058559   
106                     GO_APICAL_PROTEIN_LOCALIZATION          0.058559   
127  GO_PEPTIDASE_ACTIVATOR_ACTIVITY_INVOLVED_IN_AP...          0.069481   
123                      GO_MEMBRANE_RAFT_ORGANIZATION          0.081182   
10                                             chr2q11          0.095011   
11                                             chr3q25          0.095011   
126                    GO_PEPTIDASE_ACTIVATOR_ACTIVITY          0.110816   
149         



        Term  Adjusted P-value               Gene_set
144  chr3q22          0.488496  CUSTOM140535817455968
147  chr6p12          0.488496  CUSTOM140535817455968
Topic 99




                                              Term  Adjusted P-value  \
142               HUPER_BREAST_BASAL_VS_LUMINAL_UP          0.010275   
334                                    GNF2_SPRR1B          0.022384   
369                                      MODULE_84          0.035258   
362                                      MODULE_45          0.035258   
341                                     MODULE_154          0.035258   
355                                     MODULE_357          0.035258   
352                                     MODULE_297          0.035258   
344                                     MODULE_164          0.035258   
161         LIN_SILENCED_BY_TUMOR_MICROENVIRONMENT          0.042346   
368                                       MODULE_6          0.096019   
336                                     MODULE_111          0.096019   
184                        ONDER_CDH1_TARGETS_2_DN          0.135213   
93                         CROMER_TUMORIGENESIS_DN          0.13



Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 101




                                                  Term  Adjusted P-value  \
80              Tracheoesophageal fistula (HP:0002575)          0.000608   
81              Ulnar deviation of finger (HP:0009465)          0.019292   
82   Ulnar deviation of the hand or of fingers of t...          0.021889   
235                                            chr2q31          0.125719   
84               Patent ductus arteriosus (HP:0001643)          0.136187   
83                Camptodactyly of finger (HP:0100490)          0.159534   
145              Knee flexion contracture (HP:0006380)          0.168060   
144                           Broad thumb (HP:0011304)          0.170645   
141                  Mitral regurgitation (HP:0001653)          0.171764   
138                     Tracheal stenosis (HP:0002777)          0.172994   
143  Abnormalities of placenta or umbilical cord (H...          0.173312   
171  Aplasia/Hypoplasia of metatarsal bones (HP:000...          0.173423   
135         



                                                  Term  Adjusted P-value  \
760  GSE17974_CTRL_VS_ACT_IL4_AND_ANTI_IL12_0.5H_CD...          0.339302   
832          GSE2770_UNTREATED_VS_ACT_CD4_TCELL_48H_DN          0.339302   
831  GSE2770_IL12_VS_TGFB_AND_IL12_TREATED_ACT_CD4_...          0.339302   
830            GSE2770_IL12_ACT_VS_ACT_CD4_TCELL_2H_UP          0.339302   
829         GSE2706_R848_VS_R848_AND_LPS_2H_STIM_DC_DN          0.339302   
828           GSE2706_2H_VS_8H_R848_AND_LPS_STIM_DC_DN          0.339302   
827         GSE26928_NAIVE_VS_CENT_MEMORY_CD4_TCELL_DN          0.339302   
826   GSE26669_CD4_VS_CD8_TCELL_IN_MLR_COSTIM_BLOCK_DN          0.339302   
825            GSE26495_PD1HIGH_VS_PD1LOW_CD8_TCELL_UP          0.339302   
824          GSE2585_THYMIC_DC_VS_THYMIC_MACROPHAGE_DN          0.339302   
823             GSE2585_AIRE_KO_VS_WT_CD80_LOW_MTEC_UP          0.339302   
822        GSE25677_R848_VS_MPL_AND_R848_STIM_BCELL_UP          0.339302   
821        G



                                                   Term  Adjusted P-value  \
1410                                      E2F3_UP.V1_UP          0.082488   
0     Rho guanyl-nucleotide exchange factor activity...          0.146585   
1419                               KRAS.BREAST_UP.V1_UP          0.263797   
1423                                       WNT_UP.V1_DN          0.263797   
1575  GSE22342_CD11C_HIGH_VS_LOW_DECIDUAL_MACROPHAGE...          0.339302   
1576  GSE22601_DOUBLE_POSITIVE_VS_CD4_SINGLE_POSITIV...          0.339302   
1577  GSE22601_IMMATURE_CD4_SINGLE_POSITIVE_VS_DOUBL...          0.339302   
1578               GSE22886_CD4_TCELL_VS_BCELL_NAIVE_UP          0.339302   
1579                 GSE22886_CD8_VS_CD4_NAIVE_TCELL_DN          0.339302   
1580       GSE22886_DAY0_VS_DAY1_MONOCYTE_IN_CULTURE_DN          0.339302   
1587                       GSE22886_NEUTROPHIL_VS_DC_UP          0.339302   
1574  GSE22229_RENAL_TRANSPLANT_IMMUNOSUPP_THERAPY_V...          0.339302   



                               Term  Adjusted P-value               Gene_set
234  RICKMAN_HEAD_AND_NECK_CANCER_E          0.118683  CUSTOM140535385807200
Topic 105




                                                  Term  Adjusted P-value  \
389  GSE22443_NAIVE_VS_ACT_AND_IL2_TREATED_CD8_TCEL...          0.383451   
392  GSE26488_CTRL_VS_PEPTIDE_INJECTION_HDAC7_DELTA...          0.383451   
393    GSE26928_CENTR_MEMORY_VS_CXCR5_POS_CD4_TCELL_UP          0.383451   
394         GSE26928_EFF_MEM_VS_CENTR_MEM_CD4_TCELL_DN          0.383451   
395           GSE26928_NAIVE_VS_CXCR5_POS_CD4_TCELL_UP          0.383451   
396                  GSE2706_R848_VS_LPS_2H_STIM_DC_UP          0.383451   
397  GSE29164_DAY3_VS_DAY7_CD8_TCELL_TREATED_MELANO...          0.383451   
398  GSE3203_HEALTHY_VS_INFLUENZA_INFECTED_LN_BCELL_DN          0.383451   
399            GSE32423_IL7_VS_IL4_MEMORY_CD8_TCELL_DN          0.383451   
400      GSE32423_MEMORY_VS_NAIVE_CD8_TCELL_IL7_IL4_UP          0.383451   
401  GSE32533_MIR17_KO_VS_MIR17_OVEREXPRESS_ACT_CD4...          0.383451   
402              GSE32901_TH1_VS_TH17_NEG_CD4_TCELL_UP          0.383451   
403  GSE3316



             Term  Adjusted P-value               Gene_set
283    GNF2_CCNA1          0.451878  CUSTOM140535568210832
300     MODULE_59          0.451878  CUSTOM140535568210832
299    MODULE_562          0.451878  CUSTOM140535568210832
308    MORF_RAB3A          0.451878  CUSTOM140535568210832
296    MODULE_291          0.451878  CUSTOM140535568210832
295    MODULE_280          0.451878  CUSTOM140535568210832
301     MODULE_61          0.451878  CUSTOM140535568210832
294    MODULE_241          0.451878  CUSTOM140535568210832
292    MODULE_209          0.451878  CUSTOM140535568210832
291    MODULE_174          0.451878  CUSTOM140535568210832
290    MODULE_172          0.451878  CUSTOM140535568210832
288    MODULE_153          0.451878  CUSTOM140535568210832
285    MODULE_107          0.451878  CUSTOM140535568210832
284     GNF2_MLF1          0.451878  CUSTOM140535568210832
293    MODULE_210          0.451878  CUSTOM140535568210832
304    MORF_BMPR2          0.451878  CUSTOM1405355682108



                                                  Term  Adjusted P-value  \
295                               HINATA_NFKB_IMMU_INF          0.198206   
195                                         MORF_BMPR2          0.225000   
190                                         MODULE_385          0.225000   
189                                         MODULE_263          0.225000   
187                                         MODULE_108          0.225000   
186                                          GNF2_PTX3          0.225000   
185                                          GNF2_MMP1          0.225000   
199                                         MORF_RAB3A          0.262352   
193                                          MODULE_76          0.262352   
196                                           MORF_DCC          0.277196   
198                                       MORF_MAP3K14          0.277196   
197                                         MORF_EPHA7          0.281329   
194         



                                                   Term  Adjusted P-value  \
296                                  Adipogenesis WP236          0.002890   
297                       PPAR signaling pathway WP3942          0.007907   
268                          lipid droplet (GO:0005811)          0.013020   
298                                Thermogenesis WP4321          0.021920   
299                     Lipid Metabolism Pathway WP3965          0.040340   
392                          CADWELL_ATG16L1_TARGETS_UP          0.043896   
16          brown fat cell differentiation (GO:0050873)          0.045682   
747                     SMID_BREAST_CANCER_LUMINAL_A_UP          0.059061   
622                 NAKAYAMA_SOFT_TISSUE_TUMORS_PCA2_DN          0.059061   
759                              STEGER_ADIPOGENESIS_UP          0.059061   
15      regulation of biosynthetic process (GO:0009889)          0.065297   
301   AMP-activated Protein Kinase (AMPK) Signaling ...          0.151922   



                                                  Term  Adjusted P-value  \
315                                    ATF2_S_UP.V1_UP          0.087140   
32                                            chr13q11          0.091022   
34                                             chr7q33          0.091889   
33                                             chr3p25          0.178751   
297         GO_SODIUM_DICARBOXYLATE_SYMPORTER_ACTIVITY          0.184221   
302    GO_SUCCINATE_TRANSMEMBRANE_TRANSPORTER_ACTIVITY          0.184221   
303                             GO_SUCCINATE_TRANSPORT          0.184221   
304      GO_SULFATE_TRANSMEMBRANE_TRANSPORTER_ACTIVITY          0.184221   
305                               GO_SULFATE_TRANSPORT          0.184221   
313  GO_TRICARBOXYLIC_ACID_TRANSMEMBRANE_TRANSPORTE...          0.184221   
314                    GO_TRICARBOXYLIC_ACID_TRANSPORT          0.184221   
227  GO_CALCIUM_TRANSMEMBRANE_TRANSPORTER_ACTIVITY_...          0.184221   
223  GO_C4_D



Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 111




                                                  Term  Adjusted P-value  \
107                               KRAS.KIDNEY_UP.V1_UP          0.047349   
108                             KRAS.PROSTATE_UP.V1_DN          0.047349   
0                                              chr3p22          0.054018   
110  GSE12003_MIR223_KO_VS_WT_BM_PROGENITOR_8D_CULT...          0.066689   
127  GSE22611_NOD2_VS_MUTANT_NOD2_TRANSDUCED_HEK293...          0.066689   
126  GSE21546_WT_VS_SAP1A_KO_ANTI_CD3_STIM_DP_THYMO...          0.066689   
125                 GSE21379_WT_VS_SAP_KO_CD4_TCELL_DN          0.066689   
124               GSE21033_1H_VS_24H_POLYIC_STIM_DC_UP          0.066689   
123         GSE1925_CTRL_VS_3H_IFNG_STIM_MACROPHAGE_DN          0.066689   
122  GSE18281_CORTICAL_THYMOCYTE_VS_WHOLE_CORTEX_TH...          0.066689   
121                     GSE18148_CBFB_KO_VS_WT_TREG_UP          0.066689   
120         GSE17721_PAM3CSK4_VS_GADIQUIMOD_2H_BMDC_UP          0.066689   
119         



                                                  Term  Adjusted P-value  \
109          GSE4984_LPS_VS_VEHICLE_CTRL_TREATED_DC_DN          0.240261   
108        GSE40685_TREG_VS_FOXP3_KO_TREG_PRECURSOR_UP          0.240261   
17                                             chr3p14          0.301543   
14                                            chr11q22          0.301543   
31                                          MIR6737_3P          0.385122   
23                                           MIR204_3P          0.385122   
24                                        MIR219A_2_3P          0.385122   
26                                          MIR4646_5P          0.385122   
27                                          MIR4768_3P          0.385122   
28                                          MIR5008_3P          0.385122   
33                                  SRPK2_TARGET_GENES          0.385122   
32                                          MIR7157_3P          0.385122   
29          



                                                  Term  Adjusted P-value  \
370         GO_ANGIOTENSIN_ACTIVATED_SIGNALING_PATHWAY          0.036605   
629                         GO_RESPONSE_TO_ANGIOTENSIN          0.075551   
790  GSE43863_NAIVE_VS_MEMORY_TH1_CD4_TCELL_D150_LC...          0.118984   
814                 GSE9988_LPS_VS_LOW_LPS_MONOCYTE_DN          0.241346   
684        GSE13485_CTRL_VS_DAY1_YF17D_VACCINE_PBMC_DN          0.241346   
685        GSE13485_DAY1_VS_DAY3_YF17D_VACCINE_PBMC_UP          0.241346   
686        GSE13485_DAY1_VS_DAY7_YF17D_VACCINE_PBMC_UP          0.241346   
687       GSE13485_DAY3_VS_DAY21_YF17D_VACCINE_PBMC_DN          0.241346   
688  GSE13522_CTRL_VS_T_CRUZI_BRAZIL_STRAIN_INF_SKI...          0.241346   
689  GSE13522_CTRL_VS_T_CRUZI_Y_STRAIN_INF_SKIN_IFN...          0.241346   
690  GSE13946_CTRL_VS_DSS_COLITIS_GD_TCELL_FROM_COL...          0.241346   
691     GSE14000_UNSTIM_VS_4H_LPS_DC_TRANSLATED_RNA_DN          0.241346   
692         



                                                  Term  Adjusted P-value  \
528         GSE22282_HYPOXIA_VS_NORMOXIA_MYELOID_DC_UP          0.383451   
551  GSE30971_CTRL_VS_LPS_STIM_MACROPHAGE_WBP7_KO_4...          0.383451   
552  GSE32034_LY6C_HIGH_VS_LOW_ROSIGLIZATONE_TREATE...          0.383451   
553  GSE32034_UNTREATED_VS_ROSIGLIZATONE_TREATED_LY...          0.383451   
554  GSE32986_CURDLAN_HIGHDOSE_VS_GMCSF_AND_CURDLAN...          0.383451   
555  GSE32986_CURDLAN_LOWDOSE_VS_GMCSF_AND_CURDLAN_...          0.383451   
556           GSE3337_CTRL_VS_16H_IFNG_IN_CD8POS_DC_DN          0.383451   
557           GSE339_CD4POS_VS_CD8POS_DC_IN_CULTURE_DN          0.383451   
550  GSE30971_CTRL_VS_LPS_STIM_MACROPHAGE_WBP7_KO_2...          0.383451   
558          GSE339_EX_VIVO_VS_IN_CULTURE_CD8POS_DC_DN          0.383451   
560  GSE36392_TYPE_2_MYELOID_VS_EOSINOPHIL_IL25_TRE...          0.383451   
561  GSE36392_TYPE_2_MYELOID_VS_MAC_IL25_TREATED_LU...          0.383451   
562  GSE369_



                                                  Term  Adjusted P-value  \
47                                             chr3q24          0.032389   
742                                  PDGF_ERK_DN.V1_UP          0.358260   
741                                      IL21_UP.V1_UP          0.358260   
151                            YAN_ESCAPE_FROM_ANOIKIS          0.435991   
148                          WONG_ENDMETRIUM_CANCER_UP          0.435991   
144                   TESAR_ALK_TARGETS_HUMAN_ES_5D_UP          0.435991   
143                      TESAR_ALK_TARGETS_EPISC_4D_UP          0.435991   
142                      TESAR_ALK_TARGETS_EPISC_3D_UP          0.435991   
141           TESAR_ALK_AND_JAK_TARGETS_MOUSE_ES_D4_UP          0.435991   
136                     SHIN_B_CELL_LYMPHOMA_CLUSTER_1          0.435991   
128      REACTOME_SUPPRESSION_OF_PHAGOSOMAL_MATURATION          0.435991   
121  REACTOME_NITRIC_OXIDE_STIMULATES_GUANYLATE_CYC...          0.435991   
113       RA



       Term  Adjusted P-value               Gene_set
1   chr4q34          0.170698  CUSTOM140535804041488
0  chr15q24          0.174832  CUSTOM140535804041488
Topic 117




                                                  Term  Adjusted P-value  \
23                 specific granule lumen (GO:0035580)          0.012600   
22                 tertiary granule lumen (GO:1904724)          0.019799   
25                       tertiary granule (GO:0070820)          0.044229   
24                       specific granule (GO:0042581)          0.056136   
26                secretory granule lumen (GO:0034774)          0.131236   
355                          GO_TERTIARY_GRANULE_LUMEN          0.138820   
350                          GO_SPECIFIC_GRANULE_LUMEN          0.138820   
267                            GO_COBALT_ION_TRANSPORT          0.225510   
257                          GO_ALVEOLAR_LAMELLAR_BODY          0.225510   
354                                GO_TERTIARY_GRANULE          0.234184   
349                                GO_SPECIFIC_GRANULE          0.234184   
266                             GO_COBALAMIN_TRANSPORT          0.234184   
264         



                                                  Term  Adjusted P-value  \
119     GO_GLYCEROL_TRANSMEMBRANE_TRANSPORTER_ACTIVITY          0.378438   
154                   GO_RESPONSE_TO_WATER_DEPRIVATION          0.378438   
150                         GO_RESPONSE_TO_MERCURY_ION          0.378438   
116                     GO_COLLECTING_DUCT_DEVELOPMENT          0.378438   
164        GO_WATER_TRANSMEMBRANE_TRANSPORTER_ACTIVITY          0.378438   
120                              GO_GLYCEROL_TRANSPORT          0.378438   
125         GO_METANEPHRIC_COLLECTING_DUCT_DEVELOPMENT          0.378438   
132       GO_POLYOL_TRANSMEMBRANE_TRANSPORTER_ACTIVITY          0.378438   
133                                GO_POLYOL_TRANSPORT          0.378438   
144                           GO_RENAL_WATER_TRANSPORT          0.378438   
153                               GO_RESPONSE_TO_WATER          0.395254   
42            REACTOME_PASSIVE_TRANSPORT_BY_AQUAPORINS          0.403242   
45        SE



                                                  Term  Adjusted P-value  \
547                                RAPA_EARLY_UP.V1_UP          0.118272   
258                                         MODULE_326          0.163139   
251                                         MODULE_148          0.163139   
265                                         MODULE_462          0.163139   
264                                         MODULE_431          0.163139   
259                                         MODULE_327          0.181804   
172  REACTOME_DEFECTIVE_GALNT3_CAUSES_FAMILIAL_HYPE...          0.289174   
177                        REACTOME_ENDOGENOUS_STEROLS          0.289174   
183               REACTOME_GLUCOCORTICOID_BIOSYNTHESIS          0.289174   
185  REACTOME_INTERLEUKIN_4_AND_INTERLEUKIN_13_SIGN...          0.289174   
197  REACTOME_SYNTHESIS_OF_15_EICOSATETRAENOIC_ACID...          0.289174   
198  REACTOME_SYNTHESIS_OF_LEUKOTRIENES_LT_AND_EOXI...          0.289174   
199      REA



                                                  Term  Adjusted P-value  \
5                                             chr13q33          0.407165   
6                                             chr18q12          0.407165   
7                                             chr21q21          0.407165   
8                                              chr4q13          0.407165   
230                  GSE29949_MICROGLIA_VS_DC_BRAIN_DN          0.424664   
229  GSE29949_MICROGLIA_BRAIN_VS_MONOCYTE_BONE_MARR...          0.424664   
228   GSE29949_MICROGLIA_BRAIN_VS_CD8_POS_DC_SPLEEN_DN          0.424664   
227  GSE29949_CD8_NEG_DC_SPLEEN_VS_CD8_POS_DC_SPLEE...          0.424664   
226  GSE29618_PRE_VS_DAY7_POST_LAIV_FLU_VACCINE_MON...          0.424664   
225  GSE29618_PRE_VS_DAY7_POST_LAIV_FLU_VACCINE_BCE...          0.424664   
223          GSE29618_PRE_VS_DAY7_FLU_VACCINE_BCELL_UP          0.424664   
231                 GSE360_L_DONOVANI_VS_L_MAJOR_DC_DN          0.424664   
222      GSE



                                                  Term  Adjusted P-value  \
4                                              chr4q13          0.041616   
9               PEPPER_CHRONIC_LYMPHOCYTIC_LEUKEMIA_DN          0.126590   
11                    STAMBOLSKY_BOUND_BY_MUTATED_TP53          0.126590   
7       DAVICIONI_RHABDOMYOSARCOMA_PAX_FOXO1_FUSION_UP          0.143233   
10                      RICKMAN_HEAD_AND_NECK_CANCER_D          0.143233   
12                         XU_GH1_EXOGENOUS_TARGETS_UP          0.143233   
106          GO_STRUCTURAL_CONSTITUENT_OF_TOOTH_ENAMEL          0.244137   
103                                GO_SALIVA_SECRETION          0.244137   
92       GO_NEGATIVE_REGULATION_OF_BONE_MINERALIZATION          0.244137   
90   GO_EXTRACELLULAR_MATRIX_STRUCTURAL_CONSTITUENT...          0.244137   
91         GO_NEGATIVE_REGULATION_OF_BIOMINERALIZATION          0.256254   
105                             GO_SECRETION_BY_TISSUE          0.271003   
132  GSE4027



                                           Term  Adjusted P-value  \
47                specific granule (GO:0042581)          0.084650   
149                     ONDER_CDH1_TARGETS_3_DN          0.085878   
82                      CROMER_TUMORIGENESIS_UP          0.085878   
217                    WESTON_VEGFA_TARGETS_3HR          0.085878   
99             GHANDHI_BYSTANDER_IRRADIATION_UP          0.095891   
100               GHANDHI_DIRECT_IRRADIATION_UP          0.098233   
216                        WESTON_VEGFA_TARGETS          0.098233   
187               SABATES_COLORECTAL_ADENOMA_UP          0.154649   
226  ZHANG_RESPONSE_TO_IKK_INHIBITOR_AND_TNF_UP          0.161127   
220                         WU_HBX_TARGETS_1_UP          0.161127   
94                      FREDERICK_PRKCI_TARGETS          0.161127   
131                          LY_AGING_MIDDLE_UP          0.161127   
165  REACTOME_ELEVATION_OF_CYTOSOLIC_CA2_LEVELS          0.161127   
133                        MATTHEW



                                                  Term  Adjusted P-value  \
429                                       CRX_DN.V1_DN          0.203050   
284                   GO_CHRONIC_INFLAMMATORY_RESPONSE          0.290604   
286                GO_CXCR3_CHEMOKINE_RECEPTOR_BINDING          0.290604   
297               GO_DETECTION_OF_TEMPERATURE_STIMULUS          0.290604   
302                GO_FIBROBLAST_GROWTH_FACTOR_BINDING          0.290604   
303                       GO_GERMINAL_CENTER_FORMATION          0.290604   
316                             GO_INTEGRIN_ACTIVATION          0.290604   
317        GO_INTRAMEMBRANE_LIPID_TRANSPORTER_ACTIVITY          0.290604   
334  GO_NEGATIVE_REGULATION_OF_CELL_CHEMOTAXIS_TO_F...          0.290604   
337  GO_NEGATIVE_REGULATION_OF_ENDOTHELIAL_CELL_CHE...          0.290604   
276         GO_CELL_CELL_ADHESION_MEDIATED_BY_INTEGRIN          0.290604   
346                GO_PHOSPHOLIPID_SCRAMBLASE_ACTIVITY          0.290604   
355  GO_POSI



                                                  Term  Adjusted P-value  \
123                                                 MT          0.000135   
286                                 HJURP_TARGET_GENES          0.009262   
293                               LAMTOR5_TARGET_GENES          0.009262   
125                                            chr4q23          0.128569   
126                                            chr5q32          0.178675   
338                                  TFAM_TARGET_GENES          0.201109   
124                                           chr12q21          0.223977   
276                                 F2RL1_TARGET_GENES          0.286638   
614     GSE27670_CTRL_VS_BLIMP1_TRANSDUCED_GC_BCELL_UP          0.339302   
598              GSE22886_NAIVE_VS_IGM_MEMORY_BCELL_UP          0.339302   
597                 GSE22886_CD8_VS_CD4_NAIVE_TCELL_UP          0.339302   
596  GSE22443_NAIVE_VS_ACT_AND_IL2_TREATED_CD8_TCEL...          0.339302   
595       GS



               Term  Adjusted P-value               Gene_set
0          chr16p11          0.303351  CUSTOM140535388713120
7            DR3_Q4          0.370110  CUSTOM140535388510720
8            MYB_Q3          0.370110  CUSTOM140535388510720
9            MYB_Q6          0.370110  CUSTOM140535388510720
10          RP58_01          0.370110  CUSTOM140535388510720
11          STAT_Q6          0.370110  CUSTOM140535388510720
12  TAL1ALPHAE47_01          0.370110  CUSTOM140535388510720
13   TAL1BETAE47_01          0.370110  CUSTOM140535388510720
14  TAL1BETAITF2_01          0.370110  CUSTOM140535388510720
15          TCF4_Q5          0.370110  CUSTOM140535388510720
17   TGCAAAC_MIR452          0.370110  CUSTOM140535388510720
Topic 126




                                                Term  Adjusted P-value  \
0                                            chr4q35          0.066877   
1  GSE21670_UNTREATED_VS_TGFB_IL6_TREATED_CD4_TCE...          0.125836   

                Gene_set  
0  CUSTOM140535634469344  
1  CUSTOM140535138163680  
Topic 127




                                                  Term  Adjusted P-value  \
8                             REACTOME_ALPHA_DEFENSINS          0.208187   
6                FIGUEROA_AML_METHYLATION_CLUSTER_5_DN          0.259535   
7                FIGUEROA_AML_METHYLATION_CLUSTER_6_DN          0.259535   
10                                  REACTOME_DEFENSINS          0.259535   
0                                             chr12q12          0.358853   
1                                             chr14q11          0.358853   
2                                              chr4q35          0.358853   
3                                              chr7q34          0.367849   
4                                              chr8p23          0.367849   
9                      REACTOME_ANTIMICROBIAL_PEPTIDES          0.370391   
119        GSE13485_CTRL_VS_DAY1_YF17D_VACCINE_PBMC_DN          0.381906   
120               GSE18281_CORTEX_VS_MEDULLA_THYMUS_DN          0.381906   
121  GSE4022



                                                  Term  Adjusted P-value  \
295                                         MODULE_162          0.213750   
298                                         MODULE_218          0.213750   
299                                         MODULE_368          0.213750   
783  GSE23308_WT_VS_MINERALCORTICOID_REC_KO_MACROPH...          0.383451   
784      GSE23321_CENTRAL_MEMORY_VS_NAIVE_CD8_TCELL_UP          0.383451   
785  GSE23502_BM_VS_COLON_TUMOR_HDC_KO_MYELOID_DERI...          0.383451   
786  GSE23502_WT_VS_HDC_KO_MYELOID_DERIVED_SUPPRESS...          0.383451   
787  GSE24972_WT_VS_IRF8_KO_MARGINAL_ZONE_SPLEEN_BC...          0.383451   
788        GSE25677_R848_VS_MPL_AND_R848_STIM_BCELL_UP          0.383451   
789                   GSE2585_CD80_HIGH_VS_LOW_MTEC_UP          0.383451   
790  GSE26290_WT_VS_PDK1_KO_ANTI_CD3_AND_IL2_STIM_C...          0.383451   
791  GSE26488_CTRL_VS_PEPTIDE_INJECTION_HDAC7_DELTA...          0.383451   
792  GSE2648



                                                  Term  Adjusted P-value  \
43                   HUMMERICH_MALIGNANT_SKIN_TUMOR_DN          0.106364   
42                      HUMMERICH_BENIGN_SKIN_TUMOR_DN          0.106364   
37                       BROWNE_HCMV_INFECTION_12HR_DN          0.117747   
44                HUMMERICH_SKIN_CANCER_PROGRESSION_DN          0.117747   
40        EBAUER_MYOGENIC_TARGETS_OF_PAX3_FOXO1_FUSION          0.117747   
49                REACTOME_STRIATED_MUSCLE_CONTRACTION          0.117747   
47                             LEE_AGING_CEREBELLUM_UP          0.117747   
51                                SHEPARD_BMYB_TARGETS          0.117747   
46                              KEGG_VIRAL_MYOCARDITIS          0.117747   
103                      GO_MYOSIN_LIGHT_CHAIN_BINDING          0.119885   
102                               GO_MYOSIN_II_COMPLEX          0.119885   
101                                 GO_MYOSIN_FILAMENT          0.119885   
92          



Empty DataFrame
Columns: [Term, Adjusted P-value, Gene_set]
Index: []
Topic 131




                                                  Term  Adjusted P-value  \
45                 BOYAULT_LIVER_CANCER_SUBCLASS_G6_DN          0.220258   
100                   WANG_METASTASIS_OF_BREAST_CANCER          0.220258   
98                             WAGNER_APO2_SENSITIVITY          0.220258   
92       REACTOME_TERMINATION_OF_O_GLYCAN_BIOSYNTHESIS          0.220258   
58   KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_...          0.220258   
59                            LIU_IL13_MEMORY_MODEL_UP          0.220258   
91   REACTOME_REACTIONS_SPECIFIC_TO_THE_COMPLEX_N_G...          0.220258   
75           REACTOME_BLOOD_GROUP_SYSTEMS_BIOSYNTHESIS          0.220258   
77                            REACTOME_DECTIN_2_FAMILY          0.220258   
78   REACTOME_DEFECTIVE_C1GALT1C1_CAUSES_TN_POLYAGG...          0.220258   
79   REACTOME_DEFECTIVE_GALNT3_CAUSES_FAMILIAL_HYPE...          0.220258   
86   REACTOME_N_GLYCAN_ANTENNAE_ELONGATION_IN_THE_M...          0.220258   
330         



                                                  Term  Adjusted P-value  \
183                                 KRAS.LUNG_UP.V1_DN          0.077672   
136                                         MODULE_154          0.077936   
137                                         MODULE_297          0.077936   
138                                         MODULE_357          0.077936   
65            SCHUETZ_BREAST_CANCER_DUCTAL_INVASIVE_DN          0.133289   
180                      KRAS.600.LUNG.BREAST_UP.V1_DN          0.150811   
259      GSE9946_IMMATURE_VS_LISTERIA_INF_MATURE_DC_DN          0.187079   
208         GSE17721_PAM3CSK4_VS_GADIQUIMOD_2H_BMDC_UP          0.187079   
209             GSE17721_POLYIC_VS_PAM3CSK4_8H_BMDC_UP          0.187079   
210          GSE18791_CTRL_VS_NEWCASTLE_VIRUS_DC_1H_UP          0.187079   
211  GSE19888_CTRL_VS_T_CELL_MEMBRANES_ACT_MAST_CEL...          0.187079   
212  GSE19941_IL10_KO_VS_IL10_KO_AND_NFKBP50_KO_UNS...          0.187079   
213  GSE1994



In [None]:
topic_pvalues = []
topic_gos = []
for itopic,topic in enumerate(df_topics.columns):
    try:
        enriched_topic = pd.read_csv("%s/gsea/gsea_level_%d_topic_%d.csv"%(directory,l,itopic+1))
        if len(enriched_topic.index) >0:
            p_val = np.sort(enriched_topic['Adjusted P-value'])[0]
            topic_pvalues.append(-np.log10(p_val))
            for goc in enriched_topic['Gene_set'][:10].unique():
                topic_gos.append(goc)
        print(topic)
    except:
        print("error", sys.exc_info()[0])

In [None]:
fig = plt.figure()
x = np.arange(1,1+len(topic_pvalues))
c, _, _ = plt.hist(topic_pvalues, histtype='step', lw=2)
plt.plot([-np.log10(0.05) for _ in np.linspace(1,10,num=10)],np.arange(0,np.max(c)+5,(np.max(c)+5)/10), ls='--', lw=5, label="$\\alpha=0.05$")
plt.xlabel('-log(P-value)', fontsize=16)
plt.ylabel("number of topics")
#plt.ylim(0,0.055)
#plt.yscale('log')
plt.legend(fontsize=16)
fig.savefig("%s/pvaluescrosstopic(%d).png"%(directory,l))

In [None]:
fig = plt.figure(figsize=(20,10))
gos, goscounts = np.unique(topic_gos,return_counts=True)
plt.barh(["\n".join(wrap(str(l).replace('_',' '),20)) for l in gos], goscounts)
plt.yticks(fontsize=15)
plt.show()
fig.savefig("%s/pvaluecategories(%d).pdf"%(directory,l))

# WGCNA vs hSBM

In [None]:
from scipy.stats import hypergeom
from sklearn.metrics import v_measure_score
import seaborn as sns
sns.set_context('paper')

In [None]:
hsbm_list = pd.read_csv("topsbm/topsbm_level_2_word-dist.csv", index_col=0).apply(pd.Series.idxmax,axis=1)
hsbm_list.index = [g[:15] for g in hsbm_list.index]
hsbm_list

In [None]:
wgcna_list = pd.read_csv("wgcna/wgcna_level_0_word-dist.csv", index_col=0).apply(pd.Series.idxmax,axis=1)

In [None]:
tm_list = pd.read_csv("tm/tm_level_0_word-dist.csv", index_col=0).apply(pd.Series.idxmax,axis=1)

In [None]:
lda_list = pd.read_csv("lda/lda_level_2_word-dist.csv", index_col=0).apply(pd.Series.idxmax,axis=1)

In [None]:
list_1 = hsbm_list
list_2 = lda_list

#to uniform
list_1 = list_1[list_1.index.isin(list_2.index)]
list_2 = list_2[list_2.index.isin(list_1.index)]

In [None]:
population_size = len(list_1[list_1.index.isin(list_2.index)])
pop_successes = {module:len(list_2[list_2==module]) for module in list_2.unique()}
sample_sizes = {topic:len(list_1[list_1==topic]) for topic in list_1.unique()}
num_successes = pd.DataFrame(index=list_1.unique(), columns=list_2.unique()).fillna(0)
for g in list_2.index:
    if g in list_1:
        num_successes.at[list_1[g],list_2[g]]+=1

In [None]:
df_cmap=pd.DataFrame(index=list_1.unique(), columns=list_2.unique()).fillna(0.5)
for module in df_cmap.columns:
    for topic in df_cmap.index:
        x = num_successes.at[topic,module].astype(int) # number of successes
        M = population_size # pop size
        k = pop_successes[module] # successes in pop
        N = sample_sizes[topic] # sample size
        pval = hypergeom.sf(x-1, M, k, N)
        df_cmap.at[topic,module]=-np.log10(float(pval))

In [None]:
df_cmap[df_cmap<3]=0

In [None]:
#df_cmap = df_cmap.sort_values(by=[c for c in df_cmap.columns], axis=0, ascending=True)
#create a color palette with the same number of colors as unique values in the Source column
network_pal = sns.color_palette('husl',n_colors=len(df_cmap.columns))

#Create a dictionary where the key is the category and the values are the
#colors from the palette we just created
network_lut = dict(zip(df_cmap.columns, network_pal))
network_col = df_cmap.columns.map(network_lut)
#Create a dictionary where the key is the category and the values are the
#colors from the palette we just created
network_lut = dict(zip(df_cmap.columns, network_pal))
network_col = df_cmap.columns.map(network_lut)

fig = plt.figure()
cm = sns.clustermap(df_cmap, 
                    row_cluster=False, 
                    col_cluster=False, 
                    metric='euclidean', 
                    vmin=0, 
                    cmap='Blues_r', 
                    col_colors=network_col)

ax = cm.ax_heatmap
ax.tick_params(labelsize=15)
cax = cm.ax_cbar
cax.tick_params(labelsize=24)
cax.set_title("-Log(P-value)", fontsize=30)

cm.fig.suptitle('Algorithm comparison', fontsize=40)
cm.fig.savefig("topics_logp.pdf")

In [None]:
print("hsbm - lda %.3f"%v_measure_score(list_1, list_2))

In [None]:
list(map(print,list_1[list_1=="Topic 39"].index))

In [None]:
print("hsbm - wgcna %.3f"%v_measure_score(hsbm_list, wgcna_list))
print("hsbm - tm %.3f"%v_measure_score(hsbm_list.reindex_like(tm_list), tm_list))
print("tm - wgcna %.3f"%v_measure_score(tm_list, wgcna_list.reindex_like(tm_list)))
print("hsbm - lda %.3f"%v_measure_score(hsbm_list, lda_list.reindex_like(hsbm_list)))