In [2]:
options(repr.plot.width = 8, repr.plot.height = 6, repr.plot.res = 300)
dir.create(snakemake@output[[1]], showWarnings = FALSE, recursive = TRUE, mode = "0777") 

In [3]:
suppressPackageStartupMessages({
    library(EDASeq)
    library(ggplot2)
    library(tximeta)
    library(tximport)
    library(DESeq2)
    library(pheatmap)
    library(RColorBrewer)
    library(magrittr) 
    library(tidyverse)
    library(HTSFilter)
    library(PCAtools)
    library(pheatmap)
    library(stringr)
    library(EnhancedVolcano)
    library(ggpubr)
    library(ggplotify)
    library(patchwork)
}
)

In [4]:
txi <- readRDS(snakemake@input$rds2)
gse <- summarizeToGene(txi)
samples <- colData(gse)
gene_info <- rowData(gse)
keep.samples <- samples$Sample_Group != "Hypoxia_48h_Reoxy_2h"
gse <- gse[,keep.samples]
samples <- colData(gse)
#keep.genes <- (gene_info$gene_biotype == "protein_coding") & str_starts(gene_info$gene_name, "MT-", negate=TRUE)
keep.genes <- (gene_info$gene_biotype == "protein_coding")
gse <- gse[keep.genes,]
gene_info <- rowData(gse)
gse



In [5]:
samples$Sample_Group <- factor(samples$Sample_Group)
samples$Sample_Group <- relevel(samples$Sample_Group, ref="Normoxia_48h")
colData(gse) <- samples

**Filter expression data**

In [13]:
filter <- HTSFilter(assay(gse, "counts"), samples$Sample_Group, s.min=5, s.max=20, s.len=100)


In [14]:
hist(log(filter$filteredData+1), col="grey", breaks=25, main="", xlab="Log(counts+1)")

In [15]:
dim(filter$filteredData)

In [16]:
gse <- gse[rownames(filter$filteredData),]

In [17]:
dds <- DESeq2::DESeqDataSet(gse, design= ~1 + Sample_Group)
dds <- DESeq(dds)
vsn <- rlog(dds, blind=TRUE)

In [18]:
pcamod <- pca(assay(vsn), metadata = samples, removeVar = 0.1)

In [19]:
pp <- screeplot(pcamod, axisLabSize = 18, titleLabSize = 22)
pp

In [20]:
p0 <- biplot(pcamod,
    colby = 'Sample_Group',
    colLegendTitle = '',
    # encircle config
      encircle = TRUE,
      encircleFill = TRUE,
    hline = 0, vline = 0,
    legendPosition = 'top', legendLabSize = 16, legendIconSize = 8.0)
p0
ggsave(file.path(snakemake@output[[1]], "pca0.pdf"))

In [21]:
p.pca <- biplot(pcamod,
    lab = NULL,
    colby = 'Sample_Group',
    hline = 0, vline = 0,
    gridlines.major = TRUE, gridlines.minor = FALSE,
    pointSize = 5,
    legendPosition = 'top', legendLabSize = 10, legendIconSize = 6.0,
    drawConnectors = FALSE,
    title = 'PCA')
p.pca
ggsave(file.path(snakemake@output[[1]], "pca1.pdf"))

In [22]:
sampleDists <- dist( t( assay(vsn) ) )
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- samples$Sample_ID
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)

In [23]:
p.heat <- heatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,col=colors)
#ggsave(file.path(snakemake@output[[1]], "pca1.pdf"))

In [24]:
res <- results(dds, independentFiltering=FALSE)
res

In [25]:
A <- cbind(as.data.frame(res), gene_info[rownames(res),])
A <- A[!is.na(A$padj),]
res <- res[rownames(A),]

In [26]:
# remove mt-genes
keep.a <- str_starts(A$gene_name, "MT-", negate=TRUE)
A <- A[keep.a, ]
res <- res[rownames(A),]

In [27]:
p.volcano <- EnhancedVolcano(res,
    lab = A$gene_name,
    subtitle='',                  
    x = 'log2FoldChange',
    y = 'padj',
    xlab = bquote(~Log[2]~ 'fold change'),
    pCutoff = 10e-20,
    FCcutoff = 2,
    pointSize = 2.0,
    labSize = 3.0,
    labCol = 'black',
    labFace = 'bold',
    boxedLabels = TRUE,
    colAlpha = 4/5,
    legendPosition = 'right',
    legendLabSize = 8,
    legendIconSize = 2.0,
    drawConnectors = TRUE,
    widthConnectors = 1.0,
                            col=c('black', 'darkgreen', 'darkgreen', 'red3'), 
    colConnectors = 'black',
                      gridlines.minor = FALSE,
                     caption='')
p.volcano
ggsave(file.path(snakemake@output[[1]], "volcano.pdf"))

In [28]:
m <- "ENSG00000162946"
A[m,]

In [29]:
keep <- (A$padj < 1E-10) & (abs(A$log2FoldChange) > 2)
keep.names <- rownames(A)[keep]


In [30]:
mat <- assay(vsn)[keep.names,]
#mat <- mat - rowMeans(mat)
rownames(mat) <- A[keep.names, "gene_name"]
dim(mat)

In [31]:
group <- samples[,c("Sample_Group", "Sample_ID")]
df <- as.data.frame(group)
anno <- df[,c("Sample_Group"), drop=FALSE]

In [32]:
#levels(anno$Sample_Group) <- rev(levels(anno$Sample_Group))

In [33]:
p.heat2 <- as.ggplot(pheatmap(mat, scale="row", annotation=anno, annotation_colors = list(Sample_Group=c(Normoxia_48h="#F8766D", Hypoxia_48h="#00BFC4")), fontsize_row=7, show_colnames=FALSE, cutree_cols=2, treeheight_row=0, main="Heatmap, rlog expression values\n", annotation_legend = TRUE))
ggsave(file.path(snakemake@output[[1]], "heatmap.pdf"))

In [34]:
cc <- plotCounts(dds, gene=m, intgroup = "Sample_Group", returnData=TRUE)

In [35]:
p.box <- ggboxplot(cc, x = "Sample_Group", y = "count",
                color = "Sample_Group", width=0.5,
                add = "jitter", xlab='', ylab="normalized count", title="DISC1")
p.box <- ggpar(p.box, legend="right")
p.box
ggsave(file.path(snakemake@output[[1]], "boxplot_disc1.pdf"))

In [36]:
(p.pca+p.volcano)/(as.ggplot(p.heat2) + p.box)

In [37]:
head(A)

In [38]:
fn <- file.path(snakemake@output[[1]], "Hypoxia_vs_Normoxia.txt")
A <- relocate(A, "gene_id")
keep.cols <- c("gene_id", "gene_name", "baseMean", "log2FoldChange", "padj")

write.table(A, file=fn, quote=FALSE, sep="\t", row.names=FALSE)

In [39]:
keep.cols <- c("gene_id", "gene_name", "baseMean", "log2FoldChange", "padj")
A <- A[,keep.cols]

In [40]:
write.table(A, file=fn, quote=FALSE, sep="\t", row.names=FALSE)

In [41]:
A

In [42]:
A[m,]