# I) Overview
- DESeq2 analysis of KLF6 E1 knockout in young HSPC
- Will use ashr for LFC shrinkage
- Will generate Wald statistic to rank genes for GSEA

In [None]:
setwd('/RNAseq/DESeq2/')

In [None]:
set.seed(123)

In [None]:
library(DESeq2)
library(tidyr)
library(made4)
library(ComplexHeatmap)
library(circlize)
library(ggplot2)
library(RColorBrewer)
library(pheatmap)
library(RColorBrewer)
library(pheatmap)
library(gridExtra)

In [4]:
sessionInfo()

R version 4.0.2 (2020-06-22)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Debian GNU/Linux 10 (buster)

Matrix products: default
BLAS/LAPACK: /opt/conda/envs/diffbind/lib/libopenblasp-r0.3.10.so

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

attached base packages:
 [1] grid      parallel  stats4    stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] gridExtra_2.3               pheatmap_1.0.12            
 [3] ggplot2_3.3.3               circlize_0.4.11            
 [5] ComplexHeatmap_2.6.2        made4_1.64.0               
 [7] scatterplot3d_0.3-41        gplots_3.1.1               
 [9] RColorBrewer_1.1-2          tidyr_1.1.2                
[11] DESeq2_1.30.

# II) Perform differential analysis


## A) Set directories, levels, and designs, and read in SampleInfo table

In [5]:
directory="/Counts/"
En_to_GeneSym=read.table("/genome//gencode.v19.chr_patch_hapl_scaff_GeneID2Symbol", sep="\t", col.names=c("ID", "GeneSym"))

In [6]:
SampleInfo=read.table("/RNAseq/SampleInfo_EnhKO.txt", sep="\t", header=T, stringsAsFactors=F)

In [7]:
SampleInfo$counts=paste(SampleInfo$SampleID, "txt", sep=".")

In [8]:
comparison_table=as.data.frame(cbind("comparison"=c("EnhKO"), "control"=c("NTC")))

In [9]:
mylevels = c("NTC", "EnhKO")

In [10]:
mydesign = "~ donorID + condition"

## B) Read in Counts and create table

In [11]:
sampleFiles=grep('*.txt', list.files(directory), value=TRUE)

In [12]:
sampleTable=data.frame(sampleName=SampleInfo[, "SampleID"],  fileName=SampleInfo[, "counts"], condition=SampleInfo[, "condition"], Sex=SampleInfo[, "Sex"], Age=SampleInfo[, "Age"], AgeGroup=SampleInfo[, "AgeGroup"],  Replicate=SampleInfo[, "Replicate"], Batch=SampleInfo[, "Batch"], donorID=SampleInfo[, "donorid"], seqBatch=SampleInfo[, "SeqBatch"])

In [13]:
sampleTable$seqBatch= as.factor(sampleTable$seqBatch)
sampleTable$Batch= as.factor(sampleTable$Batch)

In [14]:
sampleTable

sampleName,fileName,condition,Sex,Age,AgeGroup,Replicate,Batch,donorID,seqBatch
<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<fct>,<chr>,<fct>
AR0021,AR0021.txt,NTC,female,26,young,E1,1,A3539,1
AR0022,AR0022.txt,EnhKO,female,26,young,E1,1,A3539,1
AR0023,AR0023.txt,NTC,female,31,young,E2,1,A3618,1
AR0024,AR0024.txt,EnhKO,female,31,young,E2,1,A3618,1
AR0025,AR0025.txt,NTC,female,31,young,E3,1,A3618,1
AR0026,AR0026.txt,EnhKO,female,31,young,E3,1,A3618,1


# II) Main function

In [15]:
DESEQ_DEG <- function(sampleTable, comparison_table){
    suppressMessages(library(DESeq2)) 
    design_formula = as.formula(mydesign)
    
    ### Make ddsHTSeq object
    dds=DESeqDataSetFromHTSeqCount(sampleTable= sampleTable, directory=directory, design = design_formula)
    
    ### Re-level
    dds$condition <- factor(dds$condition, levels = mylevels)
    
    ### Perform differnential analysis
    dds_Diff=DESeq(dds)
    
    ### Get regularized log counts
    rld=rlog(dds_Diff, blind=FALSE)
    rld_counts=as.data.frame(assay(rld))
    rld_counts_ann = rld_counts
    rld_counts_ann$Ensembl=rownames(rld_counts_ann)
    rld_counts_ann=merge(En_to_GeneSym, rld_counts_ann, by.x="ID", by.y="Ensembl")
    write.table(rld_counts_ann, "allRLD_counts.txt", sep="\t",quote=F, row.names=F)
    ### Get results for each contrast
    for (i in 1:nrow(comparison_table)){
        res <- lfcShrink(dds_Diff,
                                contrast = c("condition",
                                    paste(comparison_table$comparison[i]),
                                    paste(comparison_table$control[i])),
                                type = "ashr")
        res_ann=as.data.frame(res)
        print(dim(res_ann))
        sig=as.data.frame(na.omit(res))
        sig=sig[which(sig$padj<0.05 & abs(sig$log2FoldChange) >1), ]
        sig_ann=cbind(as.data.frame(rownames(sig)), sig) 
        sig_ann=merge(En_to_GeneSym, sig_ann, by.x="ID", by.y="rownames(sig)")
        name = paste(comparison_table$control[i], comparison_table$comparison[i], sep="_vs_")
        sig_filename=paste(name, "_sig_ann.txt", sep="")
        if (dim(sig_ann)[1] > 0) { 
            write.table(sig_ann, sig_filename, sep="\t", row.names=F, quote=F)
            to_print_list =list("Comparison"=name, "NumSigGenes"=dim(sig_ann)[1])
            print(to_print_list)
        } else {
                no_deg = list("Comparison"=name, "NumSigGenes"="No_Sig_DEG")
                print(no_deg)
        }
        ####Perform wald, to use for GSEA
        wald  <- results(dds_Diff, 
                         contrast = c("condition",
                            paste(comparison_table$comparison[i]),
                            paste(comparison_table$control[i])),
                         test="Wald")
        wald_df = as.data.frame(wald)
        res_ann$wald = wald_df[match(rownames(wald_df), rownames(res_ann)), "stat"]
        res_ann=cbind(as.data.frame(rownames(res_ann)), res_ann)
        res_ann=merge(En_to_GeneSym, res_ann, by.x="ID", by.y="rownames(res_ann)")
        GSEA=res_ann[,c("GeneSym", "wald")]
        GSEA = na.omit(GSEA)
        GSEA = GSEA[order(GSEA$wald),]
        GSEA_filename = paste(name, "_GSEA.rnk", sep="")
        all_res_filename = paste(name, "_all_res_ann.txt", sep="")
        write.table(GSEA , GSEA_filename, sep="\t", row.names=F, quote=F)
        write.table(res_ann, all_res_filename, sep="\t", row.names=F, quote=F)    
    }
}


In [None]:
DESEQ_DEG(sampleTable, comparison_table)