# Perform GSEA using GSEAPY  

Following the potocol defined here: https://gseapy.readthedocs.io/en/latest/gseapy_tutorial.html#use-gsea-command-or-gsea


In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina' # mac
import pandas as pd
import gseapy as gp
import numpy as np
import matplotlib.pyplot as plt

In [2]:
gp.__version__

'0.9.9'

In [8]:
def run_GSEA_gene(celltype,nperm=1000,rseed=1000,
             rnkfile="../dat/figdata/fig2_prom_ttest_res.csv",
             glist='../dat/glists/gsea_final.gmt',col=0,
             rank_metrics='odds',**kwargs):
    '''
    col=1 - Z, 0 - logFC
    '''

    gene_exp_alpha = pd.read_csv(rnkfile)[['celltype','gene',rank_metrics]]
            
    #gene_exp_alpha.head()
    rnk = gene_exp_alpha.loc[gene_exp_alpha["celltype"]==celltype].drop(columns='celltype').sort_values(by=rank_metrics)
    if(rank_metrics=='odds'):
        rnk['odds'] =np.log2(rnk['odds'])
        rnk=rnk.reset_index(drop=True)
        df=rnk.drop(columns='gene')        
        np.random.seed(seed=rseed)
        for i in  rnk.index[(rnk['odds']==np.inf).tolist()].tolist():
            rnk.iloc[i,1]= float(df[~df.isin([np.inf])].max(0)*(1+np.random.uniform()/100))

        for i in  rnk.index[(rnk['odds']==-np.inf).tolist()].tolist():
            rnk.iloc[i,1]= float(df[~df.isin([-np.inf])].min(0)*(1+np.random.uniform()/100))
        #rnk['odds']=rnk['odds']/max(abs(rnk['odds']))
        
    print(rnk.shape)
    print(rnk.head(1))
    print(rnk.tail(1))
    
    gs_res_a = gp.prerank(rnk=rnk, # or data='./P53_resampling_data.txt'
                     gene_sets=glist, # enrichr library names or gmt file
                     #set permutation_type to phenotype if samples >=15
                     permutation_num=nperm, # reduce number to speed up test
                     outdir=None,  # do not write output to disk
                     no_plot=True, # Skip plotting
                     #weighted_score_type=1,
                     #ascending=False,
                        seed=rseed,
                     min_size=1,
                     max_size=2100,
                     processes=8,**kwargs)
                     #format='png')
    return(gs_res_a)

## Run

In [6]:
gs_res_a=run_GSEA_gene(celltype='alpha',nperm=6000)
gs_res_a.res2d.sort_index()

  


(21715, 2)
     gene      odds
0  ZNF106 -5.977715
          gene      odds
21714  PLEKHS1  4.845848


2019-10-14 23:11:54,161 Input gene rankings contains duplicated IDs, Only use the duplicated ID with highest value!


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha1,0.863048,3.211043,0.0,0.0,179,179,WWC1;DDC;INPP4B;SLC7A8;AL136376.1;OR4N2;SLC6A4...,WWC1;DDC;INPP4B;SLC7A8;AL136376.1;OR4N2;SLC6A4...
Alpha2,-0.512263,-2.73402,0.0,0.0,500,500,FOXK2;PPP4R1;SH3KBP1;SSH1;SUN1;IQSEC1;C7orf55-...,NAA50;SMARCD1;ANP32B;MXRA7;POLR3A;NCAM1;DENND2...
Beta sub1_xin,0.373673,0.89275,0.597269,0.685844,13,13,FXYD2;RBP4;PPP1R1A;SCGB2A1;FFAR4;SCGN;PRSS23;T...,FXYD2;RBP4;PPP1R1A;SCGB2A1;FFAR4;SCGN
Beta sub2_xin,0.324017,0.919542,0.580313,0.765849,28,28,PCP4;TFF3;PEMT;NPY;IAPP;RBP1;AP3B1;RPS4X;PAM;G...,PCP4;TFF3;PEMT;NPY;IAPP
Beta sub4_xin,-0.308192,-1.630665,0.0,0.026549,390,386,C1orf43;C12orf29;PSMF1;GDF15;ARG2;GCG;MMP7;SNX...,RPS19;SYF2;SRSF7;MANF;RAD23B;ARF4;RPS27L;DDIT4...
Beta1,0.787686,2.942638,0.0,0.0,178,176,WWC1;DDC;INPP4B;LDLRAD3;ARHGAP26;MGAT5;GSTZ1;M...,WWC1;DDC;INPP4B;LDLRAD3;ARHGAP26;MGAT5;GSTZ1;M...
Beta2,-0.300245,-1.637208,0.0,0.038125,682,681,MDC1;GLYR1;CD44;TNS3;MAML3;CEP350;SLCO3A1;ARG2...,CRTC2;SLC25A38;IFT172;FBXO30;SDC2;RAPH1;ACAP1;...
Mawla_Beta_1,0.494201,1.18583,0.278441,0.265913,14,13,IGSF1;G6PC2;IAPP;SORL1;CECR1;ASB9;EDN3;TSPAN1;...,IGSF1;G6PC2;IAPP;SORL1;CECR1
Mawla_Beta_2,-0.365183,-1.258804,0.153548,0.184395,38,32,CPA1;SERPINA1;NPY;SYNGR4;C10orf10;S100A11;PRG4...,ACTG1;HSPA6;PPY;GADD45B;IL11;HS3ST2
multi_correlated,0.378206,1.210986,0.196185,0.302347,57,54,WDR59;F3;BMP5;ATP1B1;SLC30A8;CMC1;CCDC57;CHGA;...,WDR59;F3;BMP5;ATP1B1;SLC30A8;CMC1;CCDC57;CHGA;...


In [15]:
gs_res_b=run_GSEA_gene(celltype='beta',nperm=5000,rseed=3000)
gs_res_b.res2d.sort_index()

(21825, 2)
   gene     odds
0  PDHX -5.19691
        gene      odds
21824  TIGIT  5.021062


2019-10-14 12:45:01,021 Input gene rankings contains duplicated IDs, Only use the duplicated ID with highest value!


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha1,0.772896,2.57181,0.0,0.0,179,177,INPP4B;TLE3;MTCH1;CEP350;GSTZ1;CTBP1;SEPT9;DLG...,INPP4B;TLE3;MTCH1;CEP350;GSTZ1;CTBP1;SEPT9;DLG...
Alpha2,-0.286097,-1.590795,0.0,0.041097,500,499,TOM1L2;CKAP5;FBXL13;UCK2;TECR;C12orf65;SDK1;FA...,EIF4A1;HOMER1;ECE1;GOT1;PLEKHH2;VDAC1;PRKCA;BD...
Beta sub1_xin,0.359062,0.825018,0.687208,0.796864,13,13,PRSS23;FXYD2;SCGB2A1;FFAR4;RBP4;PPP1R1A;SCGN;A...,PRSS23;FXYD2;SCGB2A1;FFAR4;RBP4
Beta sub2_xin,0.569174,1.503907,0.037497,0.038715,28,27,PCP4;PEMT;AP3B1;ID1;STMN2;RBP1;TFF3;GNAS;IAPP;...,PCP4;PEMT;AP3B1;ID1;STMN2;RBP1;TFF3;GNAS;IAPP;...
Beta sub3_xin,0.643159,1.398873,0.110706,0.059858,13,10,INS;ASB9;DLK1;CHGA;LAMP1;IGFBP7;CPE;CKB;TIMP1;...,INS;ASB9;DLK1;CHGA;LAMP1;IGFBP7
Beta sub4_xin,-0.340284,-1.80574,0.0,0.020323,390,385,PSMF1;WDR45B;HSPA9;ANXA2;ATP6V0D1;KRT8;ATP6V1H...,ARID5B;EIF2S2;ZFAND2A;ARPP19;XBP1;TMEM258;U2AF...
Beta1,0.851367,2.842941,0.0,0.0,178,178,RHBDL2;INPP4B;CCR8;CASR;ATF7IP;NPEPL1;INS;INS-...,RHBDL2;INPP4B;CCR8;CASR;ATF7IP;NPEPL1;INS;INS-...
Beta2,-0.483746,,,1.0,682,682,PRKCH;PGM1;CKAP5;RPS6KA5;ARAP3;ARID1B;CHKA;SUG...,TNPO1;MAP4;SLC25A25;B3GALNT2;USO1;WDSUB1;MSL2;...
Mawla_Beta_1,0.621505,1.454054,0.066302,0.047146,14,14,SORL1;ASB9;HADH;G6PC2;IGSF1;IAPP;CECR1;PCSK1;T...,SORL1;ASB9;HADH;G6PC2;IGSF1;IAPP;CECR1;PCSK1
Mawla_Beta_2,-0.332739,-1.166957,0.224678,0.24173,38,31,CPA1;SYNGR4;C10orf10;CTRB2;NPY;TMSB4X;SOD2;S10...,SAT1;SST;TIMP1;NAMPT;PTGS2;BIRC3;SPP1;GADD45B;...


In [9]:
gs_res_d=run_GSEA_gene(celltype='delta',nperm=5000,rseed=3000)
gs_res_d.res2d.sort_index()

(18547, 2)
    gene      odds
0  TEKT3 -5.978187
        gene      odds
18546  PPIL2  3.418173


2019-10-14 23:19:50,193 Input gene rankings contains duplicated IDs, Only use the duplicated ID with highest value!


Unnamed: 0_level_0,es,nes,pval,fdr,geneset_size,matched_size,genes,ledge_genes
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alpha1,0.591037,1.543987,0.0,0.01208,179,174,LMO7;BACE1;TPM3;SH3KBP1;ABCC8;SH3TC1;HEG1;MYO3...,LMO7;BACE1;TPM3;SH3KBP1;ABCC8;SH3TC1;HEG1;MYO3...
Alpha2,0.245361,0.64991,1.0,1.0,500,495,FAM214A;SH3KBP1;HAGH;SDCBP2;STXBP1;MXRA7;UPF1;...,FAM214A;SH3KBP1;HAGH;SDCBP2;STXBP1;MXRA7;UPF1;...
Beta sub1_xin,-0.291108,-0.936816,0.531507,0.524362,13,13,SCGB2A1;PRSS23;TAGLN2;FFAR4;SCGN;FXYD2;TUBB2A;...,RBP4;ASCL2;PPP1R1A;TUBA4A;TMED6
Beta sub2_xin,0.443777,1.057415,0.423659,0.894425,28,25,GNAS;PEMT;TFF3;NPY;SEC11C;STMN2;RBP1;PAM;CDKN1...,GNAS;PEMT;TFF3;NPY;SEC11C;STMN2;RBP1;PAM;CDKN1...
Beta sub3_xin,0.658513,1.321366,0.085106,0.103502,13,8,IGFBP7;LAMP1;CHGB;ASB9;TIMP1;CPE;CHGA;CKB,IGFBP7;LAMP1
Beta sub4_xin,0.166042,0.437918,1.0,0.998988,390,382,BUD31;MMP7;TRA2B;HAX1;HLA-E;TALDO1;BAX;EMC2;YW...,BUD31;MMP7;TRA2B;HAX1;HLA-E;TALDO1;BAX;EMC2;YW...
Beta1,0.587551,1.533408,0.0,0.006701,178,173,BACE1;FUT9;SAMD11;RP11-723O4.6;SLC39A11;GPD1L;...,BACE1;FUT9;SAMD11;RP11-723O4.6;SLC39A11;GPD1L;...
Beta2,0.34299,0.911071,0.9544,1.0,682,678,MARS;ZBTB20;FOXP1;ATXN1;TOR1AIP1;ALDOA;MBIP;AR...,MARS;ZBTB20;FOXP1;ATXN1;TOR1AIP1;ALDOA;MBIP;AR...
Mawla_Beta_1,0.493192,1.047211,0.441973,0.781945,14,11,IGSF1;TSPAN1;EDN3;PCSK1;CDKN1C;MAFA;SORL1;ASB9...,IGSF1;TSPAN1;EDN3;PCSK1;CDKN1C
Mawla_Beta_2,0.308591,0.745495,0.883527,1.0,38,29,S100A11;SYNGR4;PTPRH;NPY;CPB1;CTRB2;SERPINA1;S...,S100A11;SYNGR4;PTPRH;NPY;CPB1;CTRB2


In [16]:
gs_res_b.res2d[['es','nes','pval','fdr']].to_csv('../dat/figdata/GSEA_beta_res_final.csv')
gs_res_a.res2d[['es','nes','pval','fdr']].to_csv('../dat/figdata/GSEA_alpha_res_final.csv')
gs_res_d.res2d[['es','nes','pval','fdr']].to_csv('../dat/figdata/GSEA_delta_res_final.csv')