# ssGSEA analysis of t-stastic derived from differential expression analysis using Hallmarks

Script related to figure 3g

### Aim:
- Calculate NES of hallmark gene sets using t-statistic derived from differential expression analysis

In [1]:
sessionInfo()

R version 3.5.0 (2018-04-23)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib

locale:
[1] C/UTF-8/C/C/C/C

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] compiler_3.5.0  IRdisplay_0.6.1 pbdZMQ_0.3-3    tools_3.5.0    
 [5] htmltools_0.3.6 base64enc_0.1-3 crayon_1.3.4    Rcpp_1.0.1     
 [9] uuid_0.1-2      IRkernel_0.8.14 jsonlite_1.6    digest_0.6.18  
[13] repr_0.17       evaluate_0.13  

## Load packages

In [1]:
library(GSVA)
library(BBmisc)
library(nortest)

“package ‘GSVA’ was built under R version 3.5.1”
Attaching package: ‘BBmisc’

The following object is masked from ‘package:base’:

    isFALSE



In [2]:
setwd("~/git_repos/HGSOC_TME_Heterogeneity/Scripts/3/")

## Load differential expression data

In [None]:
mix <- "../../Data/1/TreatmentNaive_log2exp_loess_norm.txt"

In [3]:
data <- read.csv(mix,
                 sep='\t',
                 header=T,
                 row.names='Hugo_symbol')

head(data)

Unnamed: 0,logFC,AveExpr,t,P.Value,adj.P.Val,B
CYBB,-1.870505,7.517236,-7.383829,9.506638e-09,0.0001394434,9.284987
VAMP5,-1.638811,7.159211,-5.762822,1.374465e-06,0.0100803241,5.020438
APOC1,-2.131354,12.82106,-5.479986,3.302432e-06,0.0105828459,4.257513
IL7R,-3.111353,10.07207,-5.426074,3.902423e-06,0.0105828459,4.111945
NKG7,-1.178869,6.407097,-5.360814,4.77579e-06,0.0105828459,3.935731
CD53,-1.710032,9.386776,-5.338679,5.114235e-06,0.0105828459,3.875965


### Select t-statistic

In [4]:
data_t <- data['t']

head(data_t)

Unnamed: 0,t
CYBB,-7.383829
VAMP5,-5.762822
APOC1,-5.479986
IL7R,-5.426074
NKG7,-5.360814
CD53,-5.338679


In [5]:
data_mtx <- as.matrix(data_t)

## Load hallmark gene sets

In [6]:
Hallmark_geneSets <- read.csv('../../Data/1/HallmarksStromaImmune_GeneSets.txt',
                              sep='\t',
                              header=F,
                              row.names=1)

head(Hallmark_geneSets)

Unnamed: 0,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,⋯,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202
HALLMARK_TNFA_SIGNALING_VIA_NFKB,http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB,JUNB,CXCL2,ATF3,NFKBIA,TNFAIP3,PTGS2,CXCL1,IER3,CD83,⋯,EIF1,BMP2,DUSP4,PDLIM5,ICOSLG,GFPT2,KLF2,TNC,SERPINB8,MXD1
HALLMARK_HYPOXIA,http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_HYPOXIA,PGK1,PDK1,GBE1,PFKL,ALDOA,ENO2,PGM1,NDRG1,HK2,⋯,HDLBP,ILVBL,NCAN,TGM2,ETS1,HOXB9,SELENBP1,FOSL2,SULT2B1,TGFB3
HALLMARK_CHOLESTEROL_HOMEOSTASIS,http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_CHOLESTEROL_HOMEOSTASIS,FDPS,CYP51A1,IDI1,FDFT1,DHCR7,SQLE,HMGCS1,NSDHL,LSS,⋯,,,,,,,,,,
HALLMARK_MITOTIC_SPINDLE,http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_MITOTIC_SPINDLE,ARHGEF2,CLASP1,KIF11,KIF23,ALS2,ARF6,MYO9B,MYH9,TUBGCP3,⋯,RHOT2,SORBS2,CDC42EP1,VCL,CLIP1,STK38L,YWHAE,RAPGEF5,CEP72,CSNK1D
HALLMARK_WNT_BETA_CATENIN_SIGNALING,http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_WNT_BETA_CATENIN_SIGNALING,MYC,CTNNB1,JAG2,NOTCH1,DLL1,AXIN2,PSEN2,FZD1,NOTCH4,⋯,,,,,,,,,,
HALLMARK_TGF_BETA_SIGNALING,http://www.broadinstitute.org/gsea/msigdb/cards/HALLMARK_TGF_BETA_SIGNALING,TGFBR1,SMAD7,TGFB1,SMURF2,SMURF1,BMPR2,SKIL,SKI,ACVR1,⋯,,,,,,,,,,


### Remove website column

In [7]:
Hallmark_geneSets <- Hallmark_geneSets[-1]

In [8]:
head(Hallmark_geneSets)

Unnamed: 0,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,⋯,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202
HALLMARK_TNFA_SIGNALING_VIA_NFKB,JUNB,CXCL2,ATF3,NFKBIA,TNFAIP3,PTGS2,CXCL1,IER3,CD83,CCL20,⋯,EIF1,BMP2,DUSP4,PDLIM5,ICOSLG,GFPT2,KLF2,TNC,SERPINB8,MXD1
HALLMARK_HYPOXIA,PGK1,PDK1,GBE1,PFKL,ALDOA,ENO2,PGM1,NDRG1,HK2,ALDOC,⋯,HDLBP,ILVBL,NCAN,TGM2,ETS1,HOXB9,SELENBP1,FOSL2,SULT2B1,TGFB3
HALLMARK_CHOLESTEROL_HOMEOSTASIS,FDPS,CYP51A1,IDI1,FDFT1,DHCR7,SQLE,HMGCS1,NSDHL,LSS,MVD,⋯,,,,,,,,,,
HALLMARK_MITOTIC_SPINDLE,ARHGEF2,CLASP1,KIF11,KIF23,ALS2,ARF6,MYO9B,MYH9,TUBGCP3,CKAP5,⋯,RHOT2,SORBS2,CDC42EP1,VCL,CLIP1,STK38L,YWHAE,RAPGEF5,CEP72,CSNK1D
HALLMARK_WNT_BETA_CATENIN_SIGNALING,MYC,CTNNB1,JAG2,NOTCH1,DLL1,AXIN2,PSEN2,FZD1,NOTCH4,LEF1,⋯,,,,,,,,,,
HALLMARK_TGF_BETA_SIGNALING,TGFBR1,SMAD7,TGFB1,SMURF2,SMURF1,BMPR2,SKIL,SKI,ACVR1,PMEPA1,⋯,,,,,,,,,,


In [9]:
Hallmark_geneSets <- convertRowsToList(Hallmark_geneSets)

## Run ssGSEA

In [10]:
hsi_NES <- gsva(data_mtx,
                Hallmark_geneSets,
                method='ssgsea',
                min.sz=0,
                max.sz=1000,
                ssgsea.norm=T)

“14668 genes with constant expression values throuhgout the samples.”

Estimating ssGSEA scores for 52 gene sets.
  |                                                                      |   0%Using parallel with 4 cores


In [1]:
head(hsi_NES)

ERROR: Error in head(hsi_NES): object 'hsi_NES' not found


In [None]:
hsi_NES <- data.frame("Term"=rownames(hsi_NES),
                      hsi_NES)

rownames(hsi_NES) <- NULL

head(hsi_NES)

In [None]:
colnames(hsi_NES)[2] <- "Hallmark_NES"

In [None]:
write.table(hsi_NES,
            file='../../Data/3/HallmarksStromaImmune_NES_after_DiffExp.txt',
            sep='\t',
            row.names=F,
            col.names=T)

# End script