# Perform GSEA using GSEAPY  

Following the potocol defined here: https://gseapy.readthedocs.io/en/latest/gseapy_tutorial.html#use-gsea-command-or-gsea


In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina' # mac
import pandas as pd
import gseapy as gp
import numpy as np
import matplotlib.pyplot as plt

In [2]:
gp.__version__

'0.9.9'

In [17]:
def run_GSEA_gene(celltype='alpha',
             rnkfile="../dat/figdata/fig2_prom_ttest_res_genelevel.csv",
             glist='../dat/glists/gsea_all.gmt'):
    gene_exp_alpha = pd.read_csv(rnkfile,index_col=1)
    #gene_exp_alpha.head()
    rnk = gene_exp_alpha.loc[(gene_exp_alpha["celltype"]==celltype) & (~gene_exp_alpha["isAmbious"])].sort_values(by='mlog10P')["mlog10P"]
    rnk = -rnk
    print(rnk.shape)
    print(rnk.head(1))
    print(rnk.tail(1))

    gs_res_a = gp.prerank(rnk=rnk, # or data='./P53_resampling_data.txt'
                     gene_sets=glist, # enrichr library names or gmt file
                     #set permutation_type to phenotype if samples >=15
                     permutation_num=10000, # reduce number to speed up test
                     outdir=None,  # do not write output to disk
                     no_plot=True, # Skip plotting
                     #weighted_score_type=1,
                     #ascending=False,
                        seed=1000,
                     min_size=1,
                     max_size=5000,
                     processes=8)
                     #format='png')
    return(gs_res_a)


## alpha

In [18]:
gs_res_a=run_GSEA_gene('alpha')
gs_res_a.res2d.sort_index()

(13130,)
gene
FOSL1    34.795613
Name: mlog10P, dtype: float64
gene
SPAG16   -24.642316
Name: mlog10P, dtype: float64


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha-state 1 down,0.448197,1.246213,0.0,0.04103,2423,1961,CREB5;GADD45B;TOR1AIP1;MGA;ATF3;PRPSAP1;CNOT7;...,CREB5;GADD45B;TOR1AIP1;MGA;ATF3;PRPSAP1;CNOT7;...
Alpha-state 1 up,0.466623,1.30256,0.0,0.033445,4578,3696,MARCKSL1;SPTAN1;HMGA1;HNRNPAB;EZH1;ABCB9;MAP7D...,MARCKSL1;SPTAN1;HMGA1;HNRNPAB;EZH1;ABCB9;MAP7D...
Alpha-state 2 down,0.47809,1.32716,0.0,0.032727,1781,1434,GADD45B;HMGA1;HNRNPAB;ABHD2;TNFRSF12A;NOP56;DE...,GADD45B;HMGA1;HNRNPAB;ABHD2;TNFRSF12A;NOP56;DE...
Alpha-state 2 up,0.423301,1.177005,0.0,0.061568,2262,1832,MGA;FOSL2;CLGN;POLR3E;ZNF652;PDS5A;ANKIB1;HP1B...,MGA;FOSL2;CLGN;POLR3E;ZNF652;PDS5A;ANKIB1;HP1B...
Alpha-state 3 down,0.545584,1.499354,0.0,0.014877,821,616,MARCKSL1;CREB5;HMGA1;TOR1AIP1;ATF3;ABHD2;YBX1;...,MARCKSL1;CREB5;HMGA1;TOR1AIP1;ATF3;ABHD2;YBX1;...
Alpha-state 3 up,0.375801,1.042652,0.1666,0.270467,1533,1286,PRPSAP1;GNPTG;PTPRN;GABARAP;WDR18;TRAPPC6A;P4H...,PRPSAP1;GNPTG;PTPRN;GABARAP;WDR18;TRAPPC6A;P4H...
Alpha-state 4 down,0.48065,1.342669,0.0,0.043459,5779,4727,HMGA1;MGA;HNRNPAB;ABHD2;PRPSAP1;VPS37A;MAP7D1;...,HMGA1;MGA;HNRNPAB;ABHD2;PRPSAP1;VPS37A;MAP7D1;...
Alpha-state 4 up,0.462909,1.287551,0.0,0.031964,2535,2021,FOSL1;MARCKSL1;CREB5;GADD45B;SPTAN1;ATF3;EZH1;...,FOSL1;MARCKSL1;CREB5;GADD45B;SPTAN1;ATF3;EZH1;...
Alpha-state 5 down,0.422793,1.169002,0.0001,0.064035,1110,900,TNFRSF12A;TRIM28;FOSB;GNPTG;TSR3;PTPRN;GABARAP...,TNFRSF12A;TRIM28;FOSB;GNPTG;TSR3;PTPRN;GABARAP...
Alpha-state 5 up,0.481419,1.336061,0.0,0.040637,1842,1470,MGA;CNOT7;VPS37A;PHLPP2;RUNDC1;PDS5A;HP1BP3;NF...,MGA;CNOT7;VPS37A;PHLPP2;RUNDC1;PDS5A;HP1BP3;NF...


## beta

In [19]:
gs_res=run_GSEA_gene('beta')
gs_res.res2d.sort_index()

(12988,)
gene
SLC2A13    40.521506
Name: mlog10P, dtype: float64
gene
INS-IGF2   -36.40042
Name: mlog10P, dtype: float64


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha-state 1 down,0.427283,1.459663,0.0,0.012297,2423,1949,CREB5;SEC23B;TOR1AIP1;POLR3A;ITGB1;SYDE2;SCG5;...,CREB5;SEC23B;TOR1AIP1;POLR3A;ITGB1;SYDE2;SCG5;...
Alpha-state 1 up,0.39328,1.350049,0.0,0.021285,4578,3639,WDR70;SESN2;ANO10;ATP6V1E2;DDIT3;MBD6;STK40;UB...,WDR70;SESN2;ANO10;ATP6V1E2;DDIT3;MBD6;STK40;UB...
Alpha-state 2 down,0.428845,1.461013,0.0,0.01306,1781,1436,ABHD2;SESN2;MBD6;SCG5;STK40;TNFRSF12A;MAML3;AC...,ABHD2;SESN2;MBD6;SCG5;STK40;TNFRSF12A;MAML3;AC...
Alpha-state 2 up,0.400605,1.36835,0.0,0.019831,2262,1830,SLC2A13;ANKRD28;FOSL2;WDR70;CALU;IGF1R;SYDE2;R...,SLC2A13;ANKRD28;FOSL2;WDR70;CALU;IGF1R;SYDE2;R...
Alpha-state 3 down,0.53582,1.806047,0.0,0.000441,821,614,ANKRD28;CREB5;TOR1AIP1;SAMD4A;ABHD2;IGF1R;FNIP...,ANKRD28;CREB5;TOR1AIP1;SAMD4A;ABHD2;IGF1R;FNIP...
Alpha-state 3 up,0.361131,1.229148,0.0002,0.051281,1533,1296,SEC23B;SESN2;SCG5;UBAC2;RRAGA;IP6K2;ARPC5;PCK2...,SEC23B;SESN2;SCG5;UBAC2;RRAGA;IP6K2;ARPC5;PCK2...
Alpha-state 4 down,0.426146,1.464317,0.0,0.013633,5779,4705,SEC23B;POLR3A;WDR70;ABHD2;SESN2;ANO10;NFAT5;DD...,SEC23B;POLR3A;WDR70;ABHD2;SESN2;ANO10;NFAT5;DD...
Alpha-state 4 up,0.444722,1.519411,0.0,0.008979,2535,2002,SLC2A13;ANKRD28;CREB5;DDX10;ITGB1;CALU;FNIP2;A...,SLC2A13;ANKRD28;CREB5;DDX10;ITGB1;CALU;FNIP2;A...
Alpha-state 5 down,0.366388,1.240182,0.0004,0.048721,1110,886,RHOQ;DDIT3;SCG5;TNFRSF12A;ERCC1;FAM134A;ETS1;P...,RHOQ;DDIT3;SCG5;TNFRSF12A;ERCC1;FAM134A;ETS1;P...
Alpha-state 5 up,0.446525,1.523107,0.0,0.009545,1842,1483,SEC23B;DDX10;POLR3A;ITGB1;CALU;ANO10;SYDE2;NFA...,SEC23B;DDX10;POLR3A;ITGB1;CALU;ANO10;SYDE2;NFA...


## delta

In [20]:
gs_res_d=run_GSEA_gene('delta')
gs_res_d.res2d.sort_index()

(12818,)
gene
CDS1    4.778295
Name: mlog10P, dtype: float64
gene
USP5   -2.942512
Name: mlog10P, dtype: float64


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha-state 1 down,0.258303,1.299326,0.0,0.091291,2423,2014,CDS1;GADD45B;HBP1;PLK2;MARCKS;UBC;PSIP1;C9orf1...,CDS1;GADD45B;HBP1;PLK2;MARCKS;UBC;PSIP1;C9orf1...
Alpha-state 1 up,0.191305,0.966744,0.7264,0.63264,4578,3796,MED20;SLC35B2;COMMD1;TYMP;PCIF1;GPATCH3;EPPK1;...,MED20;SLC35B2;COMMD1;TYMP;PCIF1;GPATCH3;EPPK1;...
Alpha-state 2 down,0.23456,1.174814,0.0205,0.149299,1781,1483,HSPA1B;GADD45B;SLC35B2;UBC;PCIF1;NUDC;CRY1;GDI...,HSPA1B;GADD45B;SLC35B2;UBC;PCIF1;NUDC;CRY1;GDI...
Alpha-state 2 up,0.257793,1.295272,0.0,0.087245,2262,1913,RB1CC1;ANKRD28;HBP1;PPP3CA;MARCKS;COMMD1;PSIP1...,RB1CC1;ANKRD28;HBP1;PPP3CA;MARCKS;COMMD1;PSIP1...
Alpha-state 3 down,0.342205,1.681232,0.0,0.017063,821,628,RB1CC1;HSPA1B;ANKRD28;MARCKS;UBC;ADNP2;ATF3;TR...,RB1CC1;HSPA1B;ANKRD28;MARCKS;UBC;ADNP2;ATF3;TR...
Alpha-state 3 up,0.205072,1.026909,0.388739,0.48152,1533,1332,PLK2;PPP3CA;SLC35B2;CCT4;CYSTM1;APOA1BP;AUH;WB...,PLK2;PPP3CA;SLC35B2;CCT4;CYSTM1;APOA1BP;AUH;WB...
Alpha-state 4 down,0.223898,1.133118,0.0038,0.199395,5779,4884,CDS1;RB1CC1;HBP1;PLK2;PPP3CA;MARCKS;SLC35B2;C9...,CDS1;RB1CC1;HBP1;PLK2;PPP3CA;MARCKS;SLC35B2;C9...
Alpha-state 4 up,0.28808,1.449142,0.0,0.04906,2535,2111,ZNF791;HSPA1B;GADD45B;ANKRD28;MED20;AZIN1;COMM...,ZNF791;HSPA1B;GADD45B;ANKRD28;MED20;AZIN1;COMM...
Alpha-state 5 down,0.200145,0.995626,0.516019,0.559133,1110,917,HSPA1B;PPP3CA;MARCKS;UBC;PCIF1;CYSTM1;FAM134A;...,HSPA1B;PPP3CA;MARCKS;UBC;PCIF1;CYSTM1;FAM134A;...
Alpha-state 5 up,0.266779,1.337295,0.0,0.081982,1842,1541,MED20;AZIN1;CCT4;DNAJC6;DDX5;C1orf63;TRAM1;LBH...,MED20;AZIN1;CCT4;DNAJC6;DDX5;C1orf63;TRAM1;LBH...


### Save results

In [21]:
gs_res.res2d[['es','nes','pval','fdr','geneset_size','matched_size']].to_csv('../dat/figdata/GSEA_beta_all_seed1000_p10000_rev.csv')
gs_res_a.res2d[['es','nes','pval','fdr','geneset_size','matched_size']].to_csv('../dat/figdata/GSEA_alpha_all_seed1000_p10000_rev.csv')
gs_res_d.res2d[['es','nes','pval','fdr','geneset_size','matched_size']].to_csv('../dat/figdata/GSEA_delta_all_seed1000_p10000_rev.csv')

from gseapy.plot import gseaplot, heatmap
terms = gs_res.res2d.index 
for i in range(len(terms)):
    gseaplot(gs_res.ranking, term=terms[i], **gs_res.results[terms[i]],ofname=terms[i]+'_beta_all_seed1000_p10000_rev.pdf')
terms = gs_res_a.res2d.index 
for i in range(len(terms)):
    gseaplot(gs_res_a.ranking, term=terms[i], **gs_res_a.results[terms[i]],ofname=terms[i]+'_alpha_seed1000_p10000_rev.pdf')
terms = gs_res_a.res2d.index 
for i in range(len(terms)):
    gseaplot(gs_res_a.ranking, term=terms[i], **gs_res_a.results[terms[i]],ofname=terms[i]+'_delta_all_seed1000_p10000_rev.pdf')    