# Perform GSEA using GSEAPY  

Following the potocol defined here: https://gseapy.readthedocs.io/en/latest/gseapy_tutorial.html#use-gsea-command-or-gsea


In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina' # mac
import pandas as pd
import gseapy as gp
import numpy as np
import matplotlib.pyplot as plt

In [2]:
gp.__version__

'0.9.9'

In [3]:
def run_GSEA_gene(celltype='alpha',
             rnkfile="../dat/figdata/fig2_prom_ttest_res_genelevel.csv",
             glist='../dat/glists/gsea_all.gmt'):
    gene_exp_alpha = pd.read_csv(rnkfile,index_col=1)
    #gene_exp_alpha.head()
    rnk = gene_exp_alpha.loc[(gene_exp_alpha["celltype"]==celltype) & (~gene_exp_alpha["isAmbious"])].sort_values(by='mlog10P')["mlog10P"]

    print(rnk.shape)
    print(rnk.head(1))
    print(rnk.tail(1))

    gs_res_a = gp.prerank(rnk=rnk, # or data='./P53_resampling_data.txt'
                     gene_sets=glist, # enrichr library names or gmt file
                     #set permutation_type to phenotype if samples >=15
                     permutation_num=10000, # reduce number to speed up test
                     outdir=None,  # do not write output to disk
                     no_plot=True, # Skip plotting
                     weighted_score_type=0,
                     #ascending=False,
                        seed=1000,
                     min_size=1,
                     max_size=5000,
                     processes=8)
                     #format='png')
    return(gs_res_a)


## alpha

In [4]:
gs_res_a=run_GSEA_gene('alpha')
gs_res_a.res2d.sort_index()

(13130,)
gene
FOSL1   -34.795613
Name: mlog10P, dtype: float64
gene
SPAG16    24.642316
Name: mlog10P, dtype: float64


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha-state 1 down,-0.155033,-7.36666,0.0,0.0,2423,1961,GCG;KCNA5;CERKL;LANCL1;TTC14;ARRDC3;EMB;PDZK1;...,SPRYD7;ZNF394;MAPK9;TCF7L2;COMMD3;GUCY1A3;ZNF3...
Alpha-state 1 up,-0.191691,-11.370169,0.0,0.0,4578,3696,TTR;SND1;FEV;NEUROD1;EPHX1;NDRG1;PTPN6;NUPR1L;...,RPL26L1;SMOC1;ABCD1;BCAP31;RNASET2;POLR2J;C9or...
Alpha-state 2 down,-0.193246,-7.969225,0.0,0.0,1781,1434,TTR;SND1;FEV;EPHX1;NUPR1L;CHGA;SH3TC1;ANXA6;RE...,RBM3;STOML2;LPCAT3;WNT4;EIF3F;ELOF1;ECHDC3;LSR...
Alpha-state 2 up,-0.123456,-5.729493,0.0,0.0,2262,1832,GCG;SLC30A8;HTR1F;PCDH17;TTC14;ARRDC3;SLC7A2;R...,TRNT1;GPATCH2;LSG1;SLC39A9;NDUFC2;GPATCH1;C5or...
Alpha-state 3 down,-0.246412,-6.92439,0.0,0.0,821,616,PCDH17;ARRDC3;TNS3;RPS26;TFF3;GSTM2;FABP5;CENP...,XPO6;TCERG1;POM121C;ACTN4;ARHGAP21;UHRF1BP1L;C...
Alpha-state 3 up,-0.113119,-4.496312,0.0,0.0,1533,1286,SPAG16;GCG;SND1;NEUROD1;SLC30A8;NDRG1;NUPR1L;S...,POMP;MRPS28;RPL26L1;SMOC1;POLR2J;APOA1BP;SGSM3...
Alpha-state 4 down,-0.198612,-12.644754,0.0,0.0,5779,4727,SND1;KCNA5;NEUROD1;EPHX1;NDRG1;HTR1F;ZNF736;PT...,ZNF280B;L2HGDH;GTF3A;TOR3A;GTF3C1;KIAA0556;DPP...
Alpha-state 4 up,-0.156485,-7.48548,0.0,0.0,2535,2021,SPAG16;TTR;C1orf168;SLC30A8;PCDH17;EMG1;MOB4;T...,PEF1;SP1;FAM58A;CNBP;TOP2B;RPL41;ZNF655;RNLS;C...
Alpha-state 5 down,-0.129438,-4.363101,0.0,0.0,1110,900,TTR;C1orf168;FEV;NUPR1L;CHGA;IDNK;SPINK4;TMEM8...,SEC11C;NDUFB1;U2AF1L4;PSENEN;RPS13;TXNL4A;RPL1...
Alpha-state 5 up,-0.176696,-7.406839,0.0,0.0,1842,1470,SND1;KCNA5;NEUROD1;HTR1F;ZNF736;RSAD2;HIST1H2B...,UQCRFS1;RAB1A;SC5D;TADA1;MAN2A1;PDCL3;ACVR1C;P...


## beta

In [5]:
gs_res=run_GSEA_gene('beta')
gs_res.res2d.sort_index()

(12988,)
gene
SLC2A13   -40.521506
Name: mlog10P, dtype: float64
gene
INS-IGF2    36.40042
Name: mlog10P, dtype: float64


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha-state 1 down,-0.159409,-7.575267,0.0,0.0,2423,1949,GPD1L;CRADD;KCNA5;CCDC150;SLC38A4;PRKAB1;PLCL2...,EI24;DPH5;MTF2;SMARCA5;HMGN3;MTMR6;BLZF1;TARS;...
Alpha-state 1 up,-0.176153,-10.402792,0.0,0.0,4578,3639,INS;NPEPL1;G6PC2;DHRS2;IPO13;KCNK17;KLHDC4;MS4...,PGAP3;THYN1;HNF4A;MRPL1;C15orf48;CKAP4;EMC10;C...
Alpha-state 2 down,-0.197538,-8.136809,0.0,0.0,1781,1436,INS;KLHDC4;KCNK16;CHGA;ATP1A1;WWC1;REC8;PCSK1N...,EIF4EBP1;MAP1S;CYB5R3;CREB3L2;SRPR;FOXRED1;EMD...
Alpha-state 2 up,-0.133413,-6.143825,0.0,0.0,2262,1830,G6PC2;ETFDH;TAF2;SLC38A4;ATP8B1;HADH;BMP5;VPS4...,AKAP11;FAM98A;MTMR11;TTF1;C7orf55;TOMM5;LETM1;...
Alpha-state 3 down,-0.24621,-6.955557,0.0,0.0,821,614,PCSK1N;GSTM2;FABP5;RPL3;THAP5;CRYBA2;HIST1H1E;...,USP34;DNAL1;CNOT4;VCPIP1;LATS1;HLF;ZNF525;SLC8...
Alpha-state 3 up,-0.13873,-5.493009,0.0,0.0,1533,1296,NPEPL1;G6PC2;CRADD;MS4A8;ETFDH;NEUROD1;SLC38A4...,CSTB;MCOLN1;EIF6;WDR83OS;TMEM60;UBE2V2;LRPAP1;...
Alpha-state 4 down,-0.208748,-13.179117,0.0,0.0,5779,4705,INS;NPEPL1;G6PC2;DHRS2;IPO13;GPD1L;ETFDH;KCNA5...,COX6B1;LETM1;WASL;SLC38A2;ARL5A;ATP5SL;CEP44;F...
Alpha-state 4 up,-0.160203,-7.640794,0.0,0.0,2535,2002,CRADD;MS4A8;TAF2;WWC1;C1orf168;SLC37A3;RBP1;SP...,CSTF3;FAM175B;NOP16;SYNCRIP;ZNF506;PGRMC1;THEM...
Alpha-state 5 down,-0.141176,-4.702687,0.0,0.0,1110,886,INS;NPEPL1;KCNK17;KCNK16;FAM159B;CHGA;C1orf168...,FTH1;WTAP;CSTB;MCOLN1;EIF6;WDR83OS;LRPAP1;ECHS...
Alpha-state 5 up,-0.165056,-6.94559,0.0,0.0,1842,1483,IPO13;GPD1L;KCNA5;RSAD2;NEUROD1;ATP1A1;SSTR1;S...,IRF6;MAP2K4;TWSG1;CNOT8;IFT57;COX10;NOLC1;DHX1...


## delta

gs_res_d=run_GSEA_gene('delta')
gs_res_d.res2d.sort_index()

### Save results

In [7]:
gs_res.res2d[['es','nes','pval','fdr','geneset_size','matched_size','ledge_genes']].to_csv('../dat/figdata/GSEA_beta_all_seed1000_p10000_noweight.csv')
gs_res_a.res2d[['es','nes','pval','fdr','geneset_size','matched_size','ledge_genes']].to_csv('../dat/figdata/GSEA_alpha_all_seed1000_p10000_noweight.csv')
#gs_res_d.res2d[['es','nes','pval','fdr','geneset_size','matched_size']].to_csv('../dat/figdata/GSEA_delta_all_seed1000_p10000_noweight.csv')

from gseapy.plot import gseaplot, heatmap
terms = gs_res.res2d.index 
for i in range(len(terms)):
    gseaplot(gs_res.ranking, term=terms[i], **gs_res.results[terms[i]],ofname=terms[i]+'_beta_all_seed1000_p10000_noweight.pdf')
terms = gs_res_a.res2d.index 
for i in range(len(terms)):
    gseaplot(gs_res_a.ranking, term=terms[i], **gs_res_a.results[terms[i]],ofname=terms[i]+'_all_seed1000_p10000_noweight.pdf')
#terms = gs_res_d.res2d.index 
#for i in range(len(terms)):
#    gseaplot(gs_res_d.ranking, term=terms[i], **gs_res_d.results[terms[i]],ofname=terms[i]+'_delta_all_seed1000_p10000_noweight.pdf')    