<a href="https://colab.research.google.com/github/almedida/thesis/blob/main/limma_ttest_10k.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#install BiocManager
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

#install limma and qvalue packages
BiocManager::install(c("limma", "qvalue"))
library(limma)
library(qvalue)

install.packages('pacman')
library(pacman, devtools)
p_load("tidyverse", "matrixTests", "gtools", "VennDiagram")



In [None]:
#gene expression analysis for the 2 levels (Tech and CBA) for liver

#load table on data frame liverExpr
liverExpr = read.table("liver.csv", header= T, sep=",")

#create a factor list for the differentially expressed genes with Tech set as first level
liverList <- factor(
  x = c(rep("Tech",6), rep("CBA",6)),
  levels=c("Tech","CBA")            # Set Tech to be the first level
)

design <- model.matrix(~liverList)          # Remove the zero




In [None]:
#Now we can run the differential expression pipeline

fit <- lmFit(liverExpr, design)
fit <- eBayes(fit)
results <- decideTests(fit)
write.csv(summary(results), "summaryLiver2.csv")


In [None]:
#View(fit, ceof="liverListCBA")
#The differentially expressed genes

write.csv(topTable(fit, coef="liverListCBA", n=42575), "topLiver2.csv")

In [None]:

#visualization of the results
plotMD(fit, coef="liverListCBA", status=results)


In [None]:
##########################################
#visualization of the results using intersection method 
#on the adjusted p-values from limma test
#excel was used to do the intersection method analysis

#adjusted p-values <=0.01
draw.pairwise.venn(area1=21245, area2=7520,cross.area=3723,
                   category=c("Kidney","Liver"),fill=c("Red","Green"),
)


#adjusted p-values <=0.05
draw.pairwise.venn(area1=27350, area2=12989,cross.area=8144,
                   category=c("Kidney","Liver"),fill=c("Red","Green"),
)

#adjusted p-values <=0.1
draw.pairwise.venn(area1=30168, area2=16461,cross.area=11382,
                   category=c("Kidney","Liver"),fill=c("Red","Green"),
)

In [None]:
##################################################
# Setting CBA to be the first level
liverList2 <- factor(
  x = c(rep("Tech",6), rep("CBA",6)),
  levels=c("CBA","Tech")            # Set Tech to be the first level
)

design2 <- model.matrix(~liverList2)          # Remove the zero

In [None]:
#Now we can run the differential expression pipeline

fit2 <- lmFit(liverExpr, design)
fit2 <- eBayes(fit2)
results2 <- decideTests(fit2)
summary(results2)
dim(results2)

topTable(fit2, coef="liverListCBA", n=30)
plotMD(fit, coef="liverlistCBA", status=results)

In [None]:
##################################################################################
#either level gives the same results.

#gene expression analysis for the 2 levels (Tech and CBA) for kidney

#load table on data frame kidneyExpr
kidneyExpr = read.table("kidney.csv", header= T, sep=",")
dim(kidneyExpr)
#create a factor list for the differentially expressed genes with Tech set as first level
kidneyList <- factor(
  x = c(rep("Tech",5), rep("CBA",5)),
  levels=c("Tech","CBA")            # Set Tech to be the first level
)

head(kidneyList)
designKidney <- model.matrix(~kidneyList)          # Remove the zero

In [None]:
##########################################
#visualization of the results using intersection method 
#on the adjusted p-values from limma test vs BH ttest for each experiment
#excel was used to do the intersection method analysis

#experiment 1 - kidney
#adjusted p-values <=0.01
draw.pairwise.venn(area1=21245, area2=19092,cross.area=18560,
                   category=c("Moderated t-test","BH t-test"),fill=c("Red","Green"),
)


#adjusted p-values <=0.05
draw.pairwise.venn(area1=27350, area2=26162,cross.area=25658,
                   category=c("Kidney","Liver"),fill=c("Red","Green"),
)

#adjusted p-values <=0.1
draw.pairwise.venn(area1=30168, area2=29334,cross.area=28879,
                   category=c("Kidney","Liver"),fill=c("Red","Green"),
)


In [None]:
#experiment 2 - liver
#adjusted p-values <=0.01
draw.pairwise.venn(area1=7520, area2=6159,cross.area=5739,
                   category=c("Moderated t-test","BH t-test"),fill=c("Red","Green"),
)


#adjusted p-values <=0.05
draw.pairwise.venn(area1=12989, area2=12114,cross.area=11385,
                   category=c("Kidney","Liver"),fill=c("Red","Green"),
)

#adjusted p-values <=0.1
draw.pairwise.venn(area1=16461, area2=15862,cross.area=14977,
                   category=c("Kidney","Liver"),fill=c("Red","Green"),
)

In [None]:
#Now we can run the differential expression pipeline
fitKidney <- lmFit(kidneyExpr, designKidney)
fitKidney <- eBayes(fitKidney)
resultsKidney <- decideTests(fitKidney)
write.csv(summary(resultsKidney), "summaryKidney.csv")

#The top 30 differentially expressed genes

write.csv(topTable(fitKidney, coef="kidneyListCBA", n=42575), "topKidney2.csv")

#visualization of the results
plotMD(fitKidney, coef="kidneyListCBA", status=resultsKidney)

