-
Notifications
You must be signed in to change notification settings - Fork 0
/
UseCase3.Rmd
131 lines (116 loc) · 5.5 KB
/
UseCase3.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
---
title: "MeinteR: Use cases"
output:
pdf_document: default
html_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Use Case 3: Associating genomic DMS signatures with gene expression
```{r evaluation3, eval=FALSE}
###############################################
# Use Case 3
# Title - Association of genomic index with gene expression in bladder cancer
###############################################
library("yarrr")
library(scales)
library(ggplot2)
#-----FUNCTIONS----------------------
evaluation3 <- function(DEX, aggr, pirate.title, logfc.low=logfc.low, logfc.high=logfc.high) {
options( scipen=999)
res.df <- vector()
tt <- t.test(DEX$logFC, aggr$logFC)
res.df <- c(res.df, tt$p.value, mean(abs(aggr$logFC)),mean(abs(DEX$logFC)))
# normalized density plots
df1 <- data.frame(aggr$logFC, group="DMS∩DEx")
colnames(df1)<- c("data", "Dataset")
df2 <- data.frame(DEX$logFC, group="DEx")
colnames(df2)<- c("data", "Dataset")
df <- rbind(df1,df2)
print(
ggplot(df, aes(data, fill=Dataset)) +
geom_density(aes(y=2*(..density..)/sum(..density..)), alpha=0.8,
position="identity", lwd=0.8) +
scale_y_continuous(labels=percent_format(accuracy = .1)) +
scale_fill_manual(values=c("#9999CC", "#66CC99"))+
xlab("logfc") +
ylab("Density") +
theme(
axis.text.x = element_text(colour="grey20",size=20,angle=0,hjust=.5,vjust=.5,face="plain"),
axis.text.y = element_text(colour="grey20",size=20,angle=0,hjust=1,vjust=0,face="plain"),
axis.title.x = element_text(colour="grey20",size=20,angle=0,hjust=.5,vjust=0,face="plain"),
axis.title.y = element_text(colour="grey20",size=20,angle=90,hjust=.5,vjust=.5,face="plain"),
legend.title = element_text(colour="grey20",size=20,angle=0,hjust=.5,vjust=.5,face="plain"),
legend.text = element_text(colour="grey20",size=20,angle=0,hjust=.5,vjust=.5,face="plain"),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
legend.position = c(.95, .95),
legend.justification = c("right", "top"),
legend.box.just = "left",
legend.margin = margin(5, 5, 5, 5)
)
)
logfc.0 <- subset(aggr, abs(aggr$logFC) < logfc.low)
logfc.1 <- subset(aggr, abs(aggr$logFC) >= logfc.high)
res.df <- c(res.df, nrow(logfc.0),nrow(logfc.1))
p <- t.test(logfc.0$g.index,logfc.1$g.index)
res.df <- c(res.df, mean(logfc.0$g.index),mean(logfc.1$g.index), p$p.value)
aggr$idx.bin <- aggr$g.index < 1
aggr$idx.bin[aggr$idx.bin == FALSE] <- "high"
aggr$idx.bin[aggr$idx.bin == TRUE] <- "low"
test <- paste0("p-value= ", round(p$p.value,3))
pirateplot(formula = abs(logFC) ~ idx.bin,
point.o = .2,
pal = "pony",
data = aggr,
cex.lab = 2.5,
cex.axis = 2.5,
cex.names = 2.5,
xlab = bquote('logfc'),
ylab = " ")
legend_title <- "|logfc|"
ggplot(aggr, aes(abs(logFC), g.index, color=abs(logFC))) +
geom_point(shape = 16, size = 4, show.legend = TRUE) +
theme_minimal()+
scale_color_gradient2(legend_title, low = "#73D055FF", mid= "#FDE725FF", high = "#440154FF") +
labs(x = "|logfc|", y = "Genomic index") +
theme(axis.text.x = element_text(hjust = 1, size=13, color="black"))+
theme(axis.text.y = element_text(hjust = 1, size=13, color="black"))+
theme(axis.title.y = element_text(size = rel(1.8), angle = 90)) +
stat_smooth(method="lm", se=TRUE, color="#39568CFF")+
theme(axis.title.x = element_text(size = rel(1.8), angle = 00))
aggr$score.bin <- aggr$score < 0 #if TRUE then DMS-
p <- t.test(aggr$logFC[aggr$score.bin == FALSE],aggr$logFC[aggr$score.bin == TRUE])
res.df <- c(res.df, p$p.value, mean(aggr$logFC[aggr$score.bin == FALSE]), mean(aggr$logFC[aggr$score.bin == TRUE]))
aggr$score.bin[aggr$score.bin == FALSE] <- "DMS+"
aggr$score.bin[aggr$score.bin == TRUE] <- "DMS-"
test <- paste0("p-value= ", round(p$p.value,3))
pirateplot(formula = logFC ~ score.bin,
point.o = .1,
pal = "xmen",
data = aggr,
cex.lab = 1.5,
cex.axis = 1.5,
cex.names = 1.5,
xlab="",
ylab = " ")
mtext(bquote('logfc'),side=2, font=20, cex=2, line =2)
res.df = data.frame(round(res.df,4))
abline(h=0, col="black", lwd=3, lty=2)
row.names(res.df) = c("p.beta", "abs(meanlogfc(merged)", "abs(meanlogfc(dex)", "lowlogfc", "highlogfc", "mean(logfc.low$g.index)","mean(logfc.high$g.index)", "p logfc vs g.index", "t.test(hypo$logFC, hyper$logFC)", "mean(hypo$logfc)", "mean(hyper$logfc")
}
TCGA.DEX.BLCA <- read.csv("blca_dex.csv") #set path appropriately (vignette folder)
TCGA.DEX.BLCA <- subset(TCGA.DEX.BLCA,!(TCGA.DEX.BLCA$logFC > quantile(TCGA.DEX.BLCA$logFC, probs=c(.005, .995))[2] | TCGA.DEX.BLCA$logFC < quantile(TCGA.DEX.BLCA$logFC, probs=c(.005, .995))[1]) )
mtx.blca <- read.csv(file="BLCA_signature.csv", header=TRUE, stringsAsFactors = FALSE) #set path appropriately (vignette folder)
mtx.blca <- subset(mtx.blca, abs(mtx.blca$score)>= 0.4)
aggr.blca <- merge(mtx.blca, TCGA.DEX.BLCA, by.x="name", by.y="gene")
aggr.blca <- subset(aggr.blca, select = -c(X.x))
aggr.blca <- unique(aggr.blca)
DEX <- TCGA.DEX.BLCA
aggr <- aggr.blca
pirate.title <-"TCGA-BLCA"
logfc.low = 0.1
logfc.high = 1
evaluation3(DEX=DEX, aggr=aggr, pirate.title=pirate.title, logfc.low = logfc.low, logfc.high=logfc.high)
```