# Perform GSEA using GSEAPY  

Following the potocol defined here: https://gseapy.readthedocs.io/en/latest/gseapy_tutorial.html#use-gsea-command-or-gsea


In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina' # mac
import pandas as pd
import gseapy as gp
import numpy as np
import matplotlib.pyplot as plt

In [2]:
gp.__version__

'0.9.9'

In [3]:
def run_GSEA_gene(celltype,nperm=1000,rseed=1000,
             rnkfile="../dat/figdata/fig2_prom_ttest_res.csv",
             glist='../dat/glists/gsea_final.gmt',col=0,
             rank_metrics='odds',**kwargs):
    '''
    col=1 - Z, 0 - logFC
    '''

    gene_exp_alpha = pd.read_csv(rnkfile)[['celltype','gene',rank_metrics]]
            
    #gene_exp_alpha.head()
    rnk = gene_exp_alpha.loc[gene_exp_alpha["celltype"]==celltype].drop(columns='celltype').sort_values(by=rank_metrics)
    if(rank_metrics=='odds'):
        rnk['odds'] =np.log2(rnk['odds'])
        rnk=rnk.reset_index(drop=True)
        df=rnk.drop(columns='gene')        
        np.random.seed(seed=rseed)
        for i in  rnk.index[(rnk['odds']==np.inf).tolist()].tolist():
            rnk.iloc[i,1]= float(df[~df.isin([np.inf])].max(0)*(1+np.random.uniform()/100))

        for i in  rnk.index[(rnk['odds']==-np.inf).tolist()].tolist():
            rnk.iloc[i,1]= float(df[~df.isin([-np.inf])].min(0)*(1+np.random.uniform()/100))
        #rnk['odds']=rnk['odds']/max(abs(rnk['odds']))
        
    print(rnk.shape)
    print(rnk.head(1))
    print(rnk.tail(1))
    
    gs_res_a = gp.prerank(rnk=rnk, # or data='./P53_resampling_data.txt'
                     gene_sets=glist, # enrichr library names or gmt file
                     #set permutation_type to phenotype if samples >=15
                     permutation_num=nperm, # reduce number to speed up test
                     outdir=None,  # do not write output to disk
                     no_plot=True, # Skip plotting
                     #weighted_score_type=1,
                     #ascending=False,
                        seed=rseed,
                     min_size=1,
                     max_size=2100,
                     processes=8,**kwargs)
                     #format='png')
    return(gs_res_a)

## Run

In [4]:
gs_res_a=run_GSEA_gene(celltype='alpha',nperm=6000,rnkfile="../figures/Fig2/subfigs/fig_2.prom.sub_vs_sub.fisher.csv")
gs_res_a.res2d.sort_index()

  


(18771, 2)
   gene      odds
0  MT1G -5.646229
          gene      odds
18770  OSBPL1A  4.822249


2019-11-04 19:24:39,727 Input gene rankings contains duplicated IDs, Only use the duplicated ID with highest value!


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha1,0.768935,2.964384,0.0,0.0,179,174,INPP4B;WWC1;ARMC4;LLGL2;LANCL1;AL136376.1;GSTZ...,INPP4B;WWC1;ARMC4;LLGL2;LANCL1;AL136376.1;GSTZ...
Alpha2,-0.554086,-2.938518,0.0,0.0,500,489,PLEKHA7;SSH1;FOXK2;KLC1;IQSEC1;SUN1;DRAM1;TECR...,SMCHD1;MSH6;GORASP2;MED13L;PSMD13;PPP1R14C;CRI...
Beta sub1_xin,0.539711,1.291934,0.186695,0.198818,13,13,FXYD2;FFAR4;RBP4;PPP1R1A;SCGB2A1;TAGLN2;SCGN;P...,FXYD2;FFAR4;RBP4;PPP1R1A;SCGB2A1;TAGLN2;SCGN
Beta sub2_xin,-0.169407,-0.558104,0.985788,0.979871,28,28,NPY;TFF3;RBP1;STMN2;SCG2;GNAS;ID1;PAM;SEC11C;R...,FOS;JUNB;MLLT11
Beta sub3_xin,0.283955,0.625557,0.880117,0.962937,13,9,CHGA;INS;ASB9;CPE;IGFBP7;TIMP1;LAMP1;CHGB;CKB,CHGA;INS
Beta sub4_xin,-0.385038,-1.988918,0.0,0.000302,390,381,GDF15;PSMF1;GCG;MMP7;TM4SF4;SERPINA1;PACSIN2;K...,TRAPPC4;PNO1;PTGES3;RGS2;ACP1;EIF5;BUD31;SAT1;...
Beta1,0.718763,2.761255,0.0,0.0,178,173,RAPGEF2;INPP4B;WWC1;PDE6C;SMPD4;GSTZ1;MS4A8;ZH...,RAPGEF2;INPP4B;WWC1;PDE6C;SMPD4;GSTZ1;MS4A8;ZH...
Beta2,-0.363383,-1.98396,0.0,0.000201,682,665,PKIB;PALLD;MAML3;DTNA;TNS3;CD44;FADS2;NEDD9;DL...,TBCB;POLR2I;CORO1C;COG5;MARCH1;NUDC;TNFAIP3;BR...
Mawla_Beta_1,0.504267,1.190048,0.280702,0.274114,14,12,IGSF1;G6PC2;EDN3;ASB9;PCSK1;CECR1;SORL1;KLHDC8...,IGSF1;G6PC2;EDN3
Mawla_Beta_2,-0.338437,-1.146021,0.246295,0.299879,38,32,CPA1;NPY;SERPINA1;CPB1;SYNGR4;PRG4;C10orf10;SO...,PTPRH;HSPA6;IL11;GADD45B;HS3ST2


In [7]:
gs_res_b=run_GSEA_gene(celltype='beta',nperm=6000,rnkfile="../figures/Fig2/subfigs/fig_2.prom.sub_vs_sub.fisher.csv")
gs_res_b.res2d.sort_index()

(18655, 2)
      gene      odds
0  PLA2G4E -4.440136
         gene      odds
18654  ZNF541  4.099641


2019-11-04 19:33:35,151 Input gene rankings contains duplicated IDs, Only use the duplicated ID with highest value!


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha1,0.690707,1.853337,0.0,4.521e-05,179,172,FADS2;PSMF1;TPCN1;SEPT9;NCOA7;SND1;ARHGAP1;AL1...,FADS2;PSMF1;TPCN1;SEPT9;NCOA7;SND1;ARHGAP1;AL1...
Alpha2,-0.259525,,,1000000000.0,500,489,TOM1L2;TECR;PDE4A;UCK2;SDK1;FAM214A;NUP35;POR;...,MTFMT;LHFPL2;HACL1;CALU;PTGES2;VDAC1;GNAQ;ECE1...
Beta sub1_xin,0.410182,0.861082,0.674202,0.7844538,13,13,FXYD2;SCGB2A1;FFAR4;RBP4;SCGN;PPP1R1A;TUBA4A;P...,FXYD2;SCGB2A1;FFAR4;RBP4;SCGN;PPP1R1A
Beta sub2_xin,0.539887,1.265278,0.154294,0.1773136,28,27,ID1;TFF3;GNAS;RBP1;ID3;IAPP;STMN2;NPY;RPL6;FOS...,ID1;TFF3;GNAS;RBP1;ID3;IAPP;STMN2;NPY;RPL6;FOS...
Beta sub3_xin,0.789514,1.559552,0.010657,0.01009087,13,10,INS;ASB9;DLK1;CHGA;CKB;LAMP1;IGFBP7;TIMP1;CPE;...,INS;ASB9;DLK1
Beta sub4_xin,-0.26169,,,1000000000.0,390,380,PSMF1;RAB7A;KRT8;ATP6V0D1;MMP7;COPZ1;PFN1;DBP;...,RER1;NOP10;SAT1;EIF1;BTG1;NXT1;TSC22D1;PPIB;UP...
Beta1,0.779931,2.0961,0.0,0.0,178,174,CCR8;RHBDL2;CASR;INS-IGF2;INS;SLC39A11;RFX4;MG...,CCR8;RHBDL2;CASR;INS-IGF2;INS;SLC39A11;RFX4;MG...
Beta2,-0.533401,,,1000000000.0,682,667,PGM1;FAM160A1;FADS2;UBR3;RRAS2;ARAP3;PALLD;RAS...,ZNF789;AZIN1;FOXO1;RNF38;P4HA1;SCN4B;RHEB;ANKL...
Mawla_Beta_1,0.574737,1.198873,0.244944,0.2859985,14,13,ASB9;G6PC2;CECR1;PCSK1;IAPP;KLHDC8A;IGSF1;MAFA...,ASB9;G6PC2;CECR1;PCSK1;IAPP;KLHDC8A
Mawla_Beta_2,0.370886,0.881572,0.66568,1.0,38,31,CPA1;IL11;SYNGR4;SETD7;C10orf10;SOD2;NPY;CTRB2...,CPA1;IL11;SYNGR4;SETD7


In [15]:
gs_res_b=run_GSEA_gene(celltype='beta',nperm=5000,rseed=3000)
gs_res_b.res2d.sort_index()

(21825, 2)
   gene     odds
0  PDHX -5.19691
        gene      odds
21824  TIGIT  5.021062


2019-10-14 12:45:01,021 Input gene rankings contains duplicated IDs, Only use the duplicated ID with highest value!


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha1,0.772896,2.57181,0.0,0.0,179,177,INPP4B;TLE3;MTCH1;CEP350;GSTZ1;CTBP1;SEPT9;DLG...,INPP4B;TLE3;MTCH1;CEP350;GSTZ1;CTBP1;SEPT9;DLG...
Alpha2,-0.286097,-1.590795,0.0,0.041097,500,499,TOM1L2;CKAP5;FBXL13;UCK2;TECR;C12orf65;SDK1;FA...,EIF4A1;HOMER1;ECE1;GOT1;PLEKHH2;VDAC1;PRKCA;BD...
Beta sub1_xin,0.359062,0.825018,0.687208,0.796864,13,13,PRSS23;FXYD2;SCGB2A1;FFAR4;RBP4;PPP1R1A;SCGN;A...,PRSS23;FXYD2;SCGB2A1;FFAR4;RBP4
Beta sub2_xin,0.569174,1.503907,0.037497,0.038715,28,27,PCP4;PEMT;AP3B1;ID1;STMN2;RBP1;TFF3;GNAS;IAPP;...,PCP4;PEMT;AP3B1;ID1;STMN2;RBP1;TFF3;GNAS;IAPP;...
Beta sub3_xin,0.643159,1.398873,0.110706,0.059858,13,10,INS;ASB9;DLK1;CHGA;LAMP1;IGFBP7;CPE;CKB;TIMP1;...,INS;ASB9;DLK1;CHGA;LAMP1;IGFBP7
Beta sub4_xin,-0.340284,-1.80574,0.0,0.020323,390,385,PSMF1;WDR45B;HSPA9;ANXA2;ATP6V0D1;KRT8;ATP6V1H...,ARID5B;EIF2S2;ZFAND2A;ARPP19;XBP1;TMEM258;U2AF...
Beta1,0.851367,2.842941,0.0,0.0,178,178,RHBDL2;INPP4B;CCR8;CASR;ATF7IP;NPEPL1;INS;INS-...,RHBDL2;INPP4B;CCR8;CASR;ATF7IP;NPEPL1;INS;INS-...
Beta2,-0.483746,,,1.0,682,682,PRKCH;PGM1;CKAP5;RPS6KA5;ARAP3;ARID1B;CHKA;SUG...,TNPO1;MAP4;SLC25A25;B3GALNT2;USO1;WDSUB1;MSL2;...
Mawla_Beta_1,0.621505,1.454054,0.066302,0.047146,14,14,SORL1;ASB9;HADH;G6PC2;IGSF1;IAPP;CECR1;PCSK1;T...,SORL1;ASB9;HADH;G6PC2;IGSF1;IAPP;CECR1;PCSK1
Mawla_Beta_2,-0.332739,-1.166957,0.224678,0.24173,38,31,CPA1;SYNGR4;C10orf10;CTRB2;NPY;TMSB4X;SOD2;S10...,SAT1;SST;TIMP1;NAMPT;PTGS2;BIRC3;SPP1;GADD45B;...


In [8]:
gs_res_d=run_GSEA_gene(celltype='delta',nperm=5000,rseed=3000,rnkfile="../figures/Fig2/subfigs/fig_2.prom.sub_vs_sub.fisher.csv")
gs_res_d.res2d.sort_index()

(16219, 2)
     gene      odds
0  AMOTL1 -4.862405
        gene      odds
16218  ACACA  3.191321


2019-11-04 19:53:58,650 Input gene rankings contains duplicated IDs, Only use the duplicated ID with highest value!


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha1,0.471804,2.427362,0.0,0.0,179,165,SCML4;SLC30A8;TMEM130;BANK1;TMEM72;HTR1F;PGAM2...,SCML4;SLC30A8;TMEM130;BANK1;TMEM72;HTR1F;PGAM2...
Alpha2,-0.421108,-1.854161,0.0,0.001265,500,483,PPP1R1C;NUP35;ST5;SLC16A3;POR;AFF1;SUN1;SCN8A;...,UIMC1;KPNA1;CCDC64;SMIM19;HIST1H2AG;CDC42EP4;P...
Beta sub1_xin,0.519077,1.523708,0.05948,0.054769,13,13,FXYD2;TAGLN2;SCGN;FFAR4;RBP4;ASCL2;SCGB2A1;PRS...,FXYD2;TAGLN2;SCGN;FFAR4
Beta sub2_xin,-0.487971,-1.443061,0.04797,0.071406,28,25,NENF;RBP1;GPX3;STMN2;PEMT;IGFBP5;ID1;SEC11C;PA...,PCP4;RASD1;ID3;RPS4X;MLLT11
Beta sub3_xin,-0.343159,-0.763234,0.796275,0.877497,13,8,CHGA;ASB9;CHGB;LAMP1;IGFBP7;CKB;CPE;TIMP1,TIMP1
Beta sub4_xin,-0.305729,-1.329276,0.008168,0.130585,390,376,IFRD1;HLA-E;KRT8;GDF15;SST;MMP7;LRRC59;CHCHD3;...,EIF6;ANXA7;HSPA5;PACSIN2;ARPP19;TIMM17A;DYNLT3...
Beta1,0.576665,2.949892,0.0,0.0,178,162,DYSF;RFX4;CASR;C14orf132;DGKI;B3GNT5;AGPAT3;RA...,DYSF;RFX4;CASR;C14orf132;DGKI;B3GNT5;AGPAT3;RA...
Beta2,-0.353978,-1.575573,0.0,0.031777,682,661,MAP3K7CL;MAP4;PPP1R1C;KCNIP4;MTHFD1L;FOXP1;ACA...,LIG4;TRIM44;GPHN;DAGLB;ATP5G1;COG8;NIP7;SDC2;W...
Mawla_Beta_1,0.318124,0.871579,0.65775,0.678727,14,11,PCSK1;KLHDC8A;EDN3;CECR1;ASB9;G6PC2;CDKN1C;IGS...,PCSK1;KLHDC8A;EDN3;CECR1
Mawla_Beta_2,-0.42523,-1.315141,0.112874,0.115029,38,30,HMOX1;SST;SYNGR4;C10orf10;SOD2;PRG4;CTRB2;CPB1...,CPA1;HSPA6;SAT1;TIMP1;S100A11;TRIB3;HS3ST2;GAD...


In [9]:
gs_res_d=run_GSEA_gene(celltype='delta',nperm=5000,rseed=3000)
gs_res_d.res2d.sort_index()

(18547, 2)
    gene      odds
0  TEKT3 -5.978187
        gene      odds
18546  PPIL2  3.418173


2019-10-14 23:19:50,193 Input gene rankings contains duplicated IDs, Only use the duplicated ID with highest value!


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha1,0.591037,1.543987,0.0,0.01208,179,174,LMO7;BACE1;TPM3;SH3KBP1;ABCC8;SH3TC1;HEG1;MYO3...,LMO7;BACE1;TPM3;SH3KBP1;ABCC8;SH3TC1;HEG1;MYO3...
Alpha2,0.245361,0.64991,1.0,1.0,500,495,FAM214A;SH3KBP1;HAGH;SDCBP2;STXBP1;MXRA7;UPF1;...,FAM214A;SH3KBP1;HAGH;SDCBP2;STXBP1;MXRA7;UPF1;...
Beta sub1_xin,-0.291108,-0.936816,0.531507,0.524362,13,13,SCGB2A1;PRSS23;TAGLN2;FFAR4;SCGN;FXYD2;TUBB2A;...,RBP4;ASCL2;PPP1R1A;TUBA4A;TMED6
Beta sub2_xin,0.443777,1.057415,0.423659,0.894425,28,25,GNAS;PEMT;TFF3;NPY;SEC11C;STMN2;RBP1;PAM;CDKN1...,GNAS;PEMT;TFF3;NPY;SEC11C;STMN2;RBP1;PAM;CDKN1...
Beta sub3_xin,0.658513,1.321366,0.085106,0.103502,13,8,IGFBP7;LAMP1;CHGB;ASB9;TIMP1;CPE;CHGA;CKB,IGFBP7;LAMP1
Beta sub4_xin,0.166042,0.437918,1.0,0.998988,390,382,BUD31;MMP7;TRA2B;HAX1;HLA-E;TALDO1;BAX;EMC2;YW...,BUD31;MMP7;TRA2B;HAX1;HLA-E;TALDO1;BAX;EMC2;YW...
Beta1,0.587551,1.533408,0.0,0.006701,178,173,BACE1;FUT9;SAMD11;RP11-723O4.6;SLC39A11;GPD1L;...,BACE1;FUT9;SAMD11;RP11-723O4.6;SLC39A11;GPD1L;...
Beta2,0.34299,0.911071,0.9544,1.0,682,678,MARS;ZBTB20;FOXP1;ATXN1;TOR1AIP1;ALDOA;MBIP;AR...,MARS;ZBTB20;FOXP1;ATXN1;TOR1AIP1;ALDOA;MBIP;AR...
Mawla_Beta_1,0.493192,1.047211,0.441973,0.781945,14,11,IGSF1;TSPAN1;EDN3;PCSK1;CDKN1C;MAFA;SORL1;ASB9...,IGSF1;TSPAN1;EDN3;PCSK1;CDKN1C
Mawla_Beta_2,0.308591,0.745495,0.883527,1.0,38,29,S100A11;SYNGR4;PTPRH;NPY;CPB1;CTRB2;SERPINA1;S...,S100A11;SYNGR4;PTPRH;NPY;CPB1;CTRB2


In [9]:
gs_res_b.res2d[['es','nes','pval','fdr']].to_csv('../figures/Fig2/subfigs/GSEA_beta_res_final.csv')
gs_res_a.res2d[['es','nes','pval','fdr']].to_csv('../figures/Fig2/subfigs/GSEA_alpha_res_final.csv')
gs_res_d.res2d[['es','nes','pval','fdr']].to_csv('../figures/Fig2/subfigs/GSEA_delta_res_final.csv')