In [1]:
suppressMessages(library(GSVA))
suppressMessages(library(GSVAdata))
suppressMessages(library(GSEABase))
library(DEGseq)
library(limma)

Loading required package: qvalue


Attaching package: ‘limma’


The following object is masked from ‘package:BiocGenerics’:

    plotMA




# load gmt data

In [2]:
gmt_file="./c5.all.v7.3.symbols.gmt"
geneset <- getGmt(gmt_file)  

In [3]:
head(geneset)

GeneSetCollection
  names: GOBP_MITOCHONDRIAL_GENOME_MAINTENANCE, GOBP_REPRODUCTION, ..., GOBP_RIBOSOMAL_LARGE_SUBUNIT_ASSEMBLY (6 total)
  unique identifiers: AKT3, PPARGC1A, ..., TRAF7 (1575 total)
  types in collection:
    geneIdType: NullIdentifier (1 total)
    collectionType: NullCollection (1 total)

# load data

In [4]:
df <- readRDS("/stor/public/chenyx/HHCAd/PseudoBulk/merged.rds")

In [5]:
# preprocesssing
for(j in 1:300){
    df[,j] <- df[,j]/sum(df[,j])*10000
}
df <- round(df,3)

df <- df[!rowSums(df)==0,]

In [6]:
dim(df)

# GSVA

In [7]:
es <- gsva(as.matrix(df), geneset,
                    min.sz=10, max.sz=500, verbose=TRUE)

Estimating GSVA scores for 10126 gene sets.
Estimating ECDFs with Gaussian kernels



In [8]:
saveRDS(es,"/stor/public/chenyx//HHCAd//GSVAresult/GSVA_matrix.rds")

# DE geneset

In [10]:
es <- readRDS("/stor/public/chenyx//HHCAd//GSVAresult/GSVA_matrix.rds")

In [11]:
label <- c(rep("IAS", times=50),rep("IVS", times=50),rep("LV", times=50),rep("LA", times=50),rep("RV", times=50),rep("RA", times=50))

# IAS

In [20]:
label_u <- label
label_u[!label_u =="IAS"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("IAS", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [21]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [22]:
result_u <- DEgeneSets[DEgeneSets$IAS>0,]
result_u <- result_u[order(result_u$IAS,decreasing = TRUE),]

In [23]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult/IAS.csv")

# IVS

In [24]:
label_u <- label
label_u[!label_u =="IVS"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("IVS", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [25]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [26]:
result_u <- DEgeneSets[DEgeneSets$IVS>0,]
result_u <- result_u[order(result_u$IVS,decreasing = TRUE),]

In [27]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult/IVS.csv")

# LV

In [28]:
label_u <- label
label_u[!label_u =="LV"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("LV", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [29]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [30]:
result_u <- DEgeneSets[DEgeneSets$LV>0,]
result_u <- result_u[order(result_u$LV,decreasing = TRUE),]

In [31]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult/LV.csv")

# LA

In [32]:
label_u <- label
label_u[!label_u =="LA"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("LA", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [33]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [34]:
result_u <- DEgeneSets[DEgeneSets$LA>0,]
result_u <- result_u[order(result_u$LA,decreasing = TRUE),]

In [35]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult/LA.csv")

# RA

In [36]:
label_u <- label
label_u[!label_u =="RA"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("RA", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [37]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [38]:
result_u <- DEgeneSets[DEgeneSets$RA>0,]
result_u <- result_u[order(result_u$RA,decreasing = TRUE),]

In [39]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult/RA.csv")

# RV

In [40]:
label_u <- label
label_u[!label_u =="RV"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("RV", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [41]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [42]:
result_u <- DEgeneSets[DEgeneSets$RV>0,]
result_u <- result_u[order(result_u$RV,decreasing = TRUE),]

In [43]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult/RV.csv")

# merge

In [44]:
result_IAS <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult/IAS.csv",row.names = 1)
result_IVS <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult/IVS.csv",row.names = 1)
result_LV <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult/LV.csv",row.names = 1)
result_LA <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult/LA.csv",row.names = 1)
result_RV <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult/RV.csv",row.names = 1)
result_RA <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult/RA.csv",row.names = 1)

In [45]:
selected_geneset <- union(row.names(result_IAS)[1:10],row.names(result_IVS)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_LV)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_LA)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_RV)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_RA)[1:10])

In [46]:
length(selected_geneset)

In [47]:
es_selected <- es[selected_geneset,]

In [48]:
result_all <- as.data.frame(matrix(0,nrow = dim(es_selected)[1],ncol = 6))
for(i in 1:dim(es_selected)[1]){
    for(j in 1:6){
        result_all[i,j] <- mean(es_selected[i,(j*50-50+1):(j*50)])
    }
}
row.names(result_all) <- row.names(es_selected)
colnames(result_all) <- c("IAS","IVS","LV","LA","RV","RA")

In [49]:
write.csv(result_all,"/stor/public/chenyx/HHCAd/GSVAresult/all.csv")