In [1]:
suppressMessages(library(GSVA))
suppressMessages(library(GSVAdata))
suppressMessages(library(GSEABase))
library(DEGseq)
library(limma)

Loading required package: qvalue


Attaching package: ‘limma’


The following object is masked from ‘package:BiocGenerics’:

    plotMA




# load gmt data

In [2]:
gmt_file="./c5.all.v7.3.symbols.gmt"
geneset <- getGmt(gmt_file)  

In [3]:
head(geneset)

GeneSetCollection
  names: GOBP_MITOCHONDRIAL_GENOME_MAINTENANCE, GOBP_REPRODUCTION, ..., GOBP_RIBOSOMAL_LARGE_SUBUNIT_ASSEMBLY (6 total)
  unique identifiers: AKT3, PPARGC1A, ..., TRAF7 (1575 total)
  types in collection:
    geneIdType: NullIdentifier (1 total)
    collectionType: NullCollection (1 total)

# load data

In [4]:
df <- readRDS("/stor/public/chenyx/HHCAd/PseudoBulk_fetal/merged_region.rds")

In [5]:
# preprocesssing
for(j in 1:200){
    df[,j] <- df[,j]/sum(df[,j])*10000
}
df <- round(df,3)

df <- df[!rowSums(df)==0,]

In [6]:
dim(df)

# GSVA

In [7]:
es <- gsva(as.matrix(df), geneset,
                    min.sz=10, max.sz=500, verbose=TRUE)

Estimating GSVA scores for 10050 gene sets.
Estimating ECDFs with Gaussian kernels



In [8]:
saveRDS(es,"/stor/public/chenyx//HHCAd//GSVAresult/GSVA_matrix_region_fetal.rds")

# DE geneset

In [10]:
es <- readRDS("/stor/public/chenyx//HHCAd//GSVAresult/GSVA_matrix_region_fetal.rds")

In [11]:
label <- c(rep("Atrium", times=50),rep("Ventricle", times=50),rep("Vessel", times=50),rep("Valve", times=50))

# Atrium

In [12]:
label_u <- label
label_u[!label_u =="Atrium"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("Atrium", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [13]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [14]:
result_u <- DEgeneSets[DEgeneSets$Atrium>0,]
result_u <- result_u[order(result_u$Atrium,decreasing = TRUE),]

In [16]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal//Atrium.csv")

# Ventricle

In [17]:
label_u <- label
label_u[!label_u =="Ventricle"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("Ventricle", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [18]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [19]:
result_u <- DEgeneSets[DEgeneSets$Ventricle>0,]
result_u <- result_u[order(result_u$Ventricle,decreasing = TRUE),]

In [20]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/Ventricle.csv")

# Vessel

In [21]:
label_u <- label
label_u[!label_u =="Vessel"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("Vessel", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [22]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [23]:
result_u <- DEgeneSets[DEgeneSets$Vessel>0,]
result_u <- result_u[order(result_u$Vessel,decreasing = TRUE),]

In [24]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/Vessel.csv")

# Valve

In [26]:
label_u <- label
label_u[!label_u =="Valve"] <- "others"
# analyze DE gene set on the GSVA result
design <- model.matrix(~ factor(label_u))
colnames(design) <- c("Valve", "others")
row.names(design)<-colnames(exp)
fit <- lmFit(es, design)
fit <- eBayes(fit)

In [27]:
# total
allGeneSets <- topTable(fit,  number=Inf)
# differential
adjPvalueCutoff <- 0.001
DEgeneSets <- topTable(fit,  number=Inf,
                       p.value=adjPvalueCutoff, adjust="BH")
res <- decideTests(fit, p.value=adjPvalueCutoff)

In [28]:
result_u <- DEgeneSets[DEgeneSets$Valve>0,]
result_u <- result_u[order(result_u$Valve,decreasing = TRUE),]

In [29]:
write.csv(result_u,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/Valve.csv")

# merge

In [30]:
result_Atrium <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/Atrium.csv",row.names = 1)
result_Ventricle <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/Ventricle.csv",row.names = 1)
result_Vessel <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/Vessel.csv",row.names = 1)
result_Valve <- read.csv("/stor/public/chenyx/HHCAd/GSVAresult_fetal/Valve.csv",row.names = 1)

In [31]:
selected_geneset <- union(row.names(result_Atrium)[1:10],row.names(result_Ventricle)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_Vessel)[1:10])
selected_geneset <- union(selected_geneset,row.names(result_Valve)[1:10])

In [32]:
length(selected_geneset)

In [33]:
es_selected <- es[selected_geneset,]

In [34]:
result_all <- as.data.frame(matrix(0,nrow = dim(es_selected)[1],ncol = 4))
for(i in 1:dim(es_selected)[1]){
    for(j in 1:4){
        result_all[i,j] <- mean(es_selected[i,(j*50-50+1):(j*50)])
    }
}
row.names(result_all) <- row.names(es_selected)
colnames(result_all) <- c("Atrium","Ventricle","Vessel","Valve")

In [35]:
write.csv(result_all,"/stor/public/chenyx/HHCAd/GSVAresult_fetal/all_region.csv")