In [1]:
suppressMessages(library(GSVA))
suppressMessages(library(GSVAdata))
suppressMessages(library(GSEABase))
library(DEGseq)
library(limma)

Loading required package: qvalue


Attaching package: ‘limma’


The following object is masked from ‘package:BiocGenerics’:

    plotMA




# load gmt data

In [2]:
gmt_file="./c5.all.v7.3.symbols.gmt"
geneset <- getGmt(gmt_file)  

In [3]:
head(geneset)

GeneSetCollection
  names: GOBP_MITOCHONDRIAL_GENOME_MAINTENANCE, GOBP_REPRODUCTION, ..., GOBP_RIBOSOMAL_LARGE_SUBUNIT_ASSEMBLY (6 total)
  unique identifiers: AKT3, PPARGC1A, ..., TRAF7 (1575 total)
  types in collection:
    geneIdType: NullIdentifier (1 total)
    collectionType: NullCollection (1 total)

# load data

In [5]:
df <- readRDS("/stor/public/chenyx/HHCAd/PseudoBulk_fetal/merged.rds")

In [8]:
# preprocesssing
for(j in 1:450){
    df[,j] <- df[,j]/sum(df[,j])*10000
}
df <- round(df,3)

df <- df[!rowSums(df)==0,]

In [7]:
dim(df)

# GSVA

In [9]:
es <- gsva(as.matrix(df), geneset,
                    min.sz=10, max.sz=500, verbose=TRUE)

Estimating GSVA scores for 10039 gene sets.
Estimating ECDFs with Gaussian kernels



In [12]:
saveRDS(es,"/stor/public/chenyx//HHCAd//GSVAresult/GSVA_matrix_fetal.rds")

# DE geneset

In [13]:
es <- readRDS("/stor/public/chenyx//HHCAd//GSVAresult/GSVA_matrix_fetal.rds")

In [14]:
label <- c(rep("LV", times=50),rep("IVS", times=50),rep("RV", times=50),rep("LA", times=50),rep("RA", times=50),rep("AO", times=50),rep("PA", times=50),rep("MV", times=50),rep("TV", times=50))

# LV

In [15]:
label_u <- label
label_u[!label_u =="LV"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("LV", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [16]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [17]:
result_u <- DEgeneSets[DEgeneSets$LV>0,]
result_u <- result_u[order(result_u$LV,decreasing = TRUE),]

In [18]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal//LV.csv")

# IVS

In [19]:
label_u <- label
label_u[!label_u =="IVS"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("IVS", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [20]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [21]:
result_u <- DEgeneSets[DEgeneSets$IVS>0,]
result_u <- result_u[order(result_u$IVS,decreasing = TRUE),]

In [22]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/IVS.csv")

# RV

In [23]:
label_u <- label
label_u[!label_u =="RV"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("RV", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [24]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [25]:
result_u <- DEgeneSets[DEgeneSets$RV>0,]
result_u <- result_u[order(result_u$RV,decreasing = TRUE),]

In [26]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/RV.csv")

# LA

In [27]:
label_u <- label
label_u[!label_u =="LA"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("LA", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [28]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [29]:
result_u <- DEgeneSets[DEgeneSets$LA>0,]
result_u <- result_u[order(result_u$LA,decreasing = TRUE),]

In [30]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/LA.csv")

# RA

In [31]:
label_u <- label
label_u[!label_u =="RA"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("RA", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [32]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [33]:
result_u <- DEgeneSets[DEgeneSets$RA>0,]
result_u <- result_u[order(result_u$RA,decreasing = TRUE),]

In [34]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/RA.csv")

# AO

In [35]:
label_u <- label
label_u[!label_u =="AO"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("AO", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [36]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [37]:
result_u <- DEgeneSets[DEgeneSets$AO>0,]
result_u <- result_u[order(result_u$AO,decreasing = TRUE),]

In [38]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/AO.csv")

# PA

In [39]:
label_u <- label
label_u[!label_u =="PA"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("PA", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [40]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [41]:
result_u <- DEgeneSets[DEgeneSets$PA>0,]
result_u <- result_u[order(result_u$PA,decreasing = TRUE),]

In [42]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/PA.csv")

# MV

In [43]:
label_u <- label
label_u[!label_u =="MV"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("MV", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [44]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [45]:
result_u <- DEgeneSets[DEgeneSets$MV>0,]
result_u <- result_u[order(result_u$MV,decreasing = TRUE),]

In [46]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/MV.csv")

# TV

In [47]:
label_u <- label
label_u[!label_u =="TV"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("TV", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [48]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [49]:
result_u <- DEgeneSets[DEgeneSets$TV>0,]
result_u <- result_u[order(result_u$TV,decreasing = TRUE),]

In [50]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/TV.csv")

# merge

In [52]:
result_IVS <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/IVS.csv",row.names = 1)
result_LV <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/LV.csv",row.names = 1)
result_LA <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/LA.csv",row.names = 1)
result_RV <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/RV.csv",row.names = 1)
result_RA <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/RA.csv",row.names = 1)

result_AO <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/AO.csv",row.names = 1)
result_PA <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/PA.csv",row.names = 1)
result_MV <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/MV.csv",row.names = 1)
result_TV <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/TV.csv",row.names = 1)

In [53]:
selected_geneset <- union(row.names(result_LV)[1:10],row.names(result_IVS)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_RV)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_LA)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_RA)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_AO)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_PA)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_MV)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_TV)[1:10])

In [54]:
length(selected_geneset)

In [55]:
es_selected <- es[selected_geneset,]

In [57]:
result_all <- as.data.frame(matrix(0,nrow = dim(es_selected)[1],ncol = 9))
for(i in 1:dim(es_selected)[1]){
    for(j in 1:9){
        result_all[i,j] <- mean(es_selected[i,(j*50-50+1):(j*50)])
    }
}
row.names(result_all) <- row.names(es_selected)
colnames(result_all) <- c("LV","IVS","RV","LA","RA","AO","PA","MV","TV")

In [58]:
write.csv(result_all,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/all.csv")