Skip to content
No description, website, or topics provided.
R
Branch: master
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Type Name Latest commit message Commit time
Failed to load latest commit information.
CHIKV_paper_edgeR_analysis.R
LICENSE
README.md
fccounts.matrix.infected
samplesinfo.tsv
symbols_ensembl_id.tsv

README.md

Article: Systems Analysis of Subjects Acutely Infected with Chikungunya Virus

Differential Expression Analysis between CHIKV infected patients and healthy subjects

CSBL - Computational Systems Biology Laboratory (csbiology.com)


Download the ZIP file (green button on the top right)

Then, extract files into a specific folder (i.e. working directory).

Or clone everything into your computer using bash

# Define your work directory and replace the <change here> below
workDir="~/<change here>"
cd $workDir
# Clone directory from github
git clone git@github.com:csbl-usp/csbl_chikungunya_edgeR.git

Importing libraries, support functions and loading files (R commands)

# Cleaning all workspace 
rm(list = ls())

# Importing libraries
# Define your work directory 
workDir <- "~/<change here>"
if(!require("edgeR")){ # Version 3.24.3
  if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
  BiocManager::install("edgeR", version = "3.8")
}

datafile <- paste0(workDir, "/fccounts.matrix.infected")
if(file.exists(datafile)) {
  data = read.table(datafile, header=T, row.names=1, com='')
} else {
  stop(paste0("File: ", workDir, "/fccounts.matrix.infected don't exist!"))
}

samplesinfofile <- paste0(workDir, "/samplesinfo.tsv")
if(file.exists(samplesinfofile)) {
  samplesinfo <- read.table(samplesinfofile, header = T, sep = "\t")
} else {
  stop(paste0("File: ", samplesinfofile, " don't exist!"))
}

samplesinfo <- samplesinfo[ c( which( samplesinfo$Class == "CTRL" ),
                               which( samplesinfo$Class == "INF" ) ), ]

samplesinfo <- samplesinfo[ samplesinfo$Sample %in% colnames( data ), ]
data <- data[ , c( as.character( samplesinfo$Sample ) ) ]
samplesinfo <- samplesinfo[ samplesinfo$Sample %in% colnames( data ), ]

# Parameters to determine DEGs
adjPcut <- 0.05
logFCcut <- 1
disp <- 0.1

DEG_analysis <- function(data, samplesinfo, disp, method = c("edgeR"), verbose = F) {

  if(method == "edgeR") {
    
    if(verbose) message("Filtering data expression")
    colnames(samplesinfo)
    rnaseqMatrix <- data[ , c( as.character( samplesinfo$Sample ) ) ]
    rnaseqMatrix = round(rnaseqMatrix)
    rnaseqMatrix = rnaseqMatrix[rowSums(cpm(rnaseqMatrix) > 1) >= 2,]
    repControl <- dim(samplesinfo[ which( samplesinfo$Class == "CTRL" ), ])[1]
    repInfected <- dim(samplesinfo[ which( samplesinfo$Class == "INF" ), ])[1]
    
    if(verbose) message("Creating group factor")
    conditions = factor(c(rep("CTRL", repControl), rep("INF", repInfected)))
    
    if(verbose) message("Creating DGEList object and normalize expression")
    exp_study = DGEList(counts=rnaseqMatrix, group=conditions)
    exp_study = calcNormFactors(exp_study)
    
    if(verbose) message("Analyzing DEG with dispersion")
    et = exactTest(exp_study, pair=c("INF", "CTRL"), dispersion=0.1)
    tTags = topTags(et,n=NULL)
    result_table = tTags$table
    result_table = data.frame(sampleA="INF", sampleB="CTRL", result_table)
    result_table$logFC = -1 * result_table$logFC
    
    return(result_table)
  }
}

Group comparison (Infected patients versus Healthy subjects)

table <- DEG_analysis(data = data, samplesinfo = samplesinfo, disp = 0.1, method = "edgeR", verbose = F)

symbols_unique <- read.table(file = paste0(workDir, "/symbols_ensembl_id.tsv"), header = T, sep = "\t")
table$Symbol <- rownames(table)
table <- merge(table, symbols_unique, by = "Symbol", all.x = T)

table_degs      <- table[table$FDR < adjPcut,]
table_degs_up   <- table_degs[table_degs$logFC > logFCcut,]
table_degs_down <- table_degs[table_degs$logFC < -logFCcut,]

write.table(x = data.frame(Symbol = table$geneSymbol.1, table[, c(4:7)]), file = paste0(workDir, "/edgeR_all_FDR_", adjPcut, "_log2FC_", logFCcut, "_infected_VS_control_all.tsv"), quote = F, sep = "\t", row.names = F)
write.table(x = data.frame(Symbol = table_degs_up$geneSymbol.1, table_degs_up[, c(4:7)]), file = paste0(workDir, "/edgeR_UP_FDR_", adjPcut, "_log2FC_", logFCcut, "_infected_VS_control_all.tsv"), quote = F, sep = "\t", row.names = F)
write.table(x = data.frame(Symbol = table_degs_down$geneSymbol.1, table_degs_down[, c(4:7)]), file = paste0(workDir, "/edgeR_DOWN_FDR_", adjPcut, "_log2FC_", logFCcut, "_infected_VS_control_all.tsv"), quote = F, sep = "\t", row.names = F)

message(paste0("Criteria DEG - AdjP: ", adjPcut, " and Log2FC: ", logFCcut,
               "\nNumber of Up-regulated genes: ", dim(table_degs_up)[1],
               "\nNumber of Down-regulated genes: ", dim(table_degs_down)[1]))
You can’t perform that action at this time.