-
Notifications
You must be signed in to change notification settings - Fork 0
/
tutorial.Rmd
133 lines (113 loc) · 3.71 KB
/
tutorial.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
---
title: "Spline regression with DESeq2"
author: "Frederik Ziebell"
output:
html_document:
df_print: paged
highlight: tango
---
# Preparations
```{r}
suppressPackageStartupMessages({
library("recount3")
library("DESeq2")
library("splines")
library("cowplot")
library("tidyverse")
})
theme_set(theme_cowplot())
```
# Create data set
```{r message=FALSE, warning=FALSE}
# # download data using recount3
# proj_info <- available_projects() %>%
# filter(project=="SRP144178")
# se <- create_rse(proj_info)
se <- readRDS("SRP144178.rds")
# extract donor and timepoint info
colData(se) <- colData(se) %>%
as_tibble(rownames="rowname") %>%
mutate(donor=str_extract(sra.sample_title,"^[:digit:]+_[:digit:]+")) %>%
mutate(time=str_extract(sra.sample_title,"[:digit:]+hr$") %>% str_replace("hr","") %>% as.double()) %>%
select(rowname, donor, time) %>%
column_to_rownames("rowname") %>%
DataFrame()
# subset to two donors
se <- se[,se$donor %in% c("1741_006","1741_009")]
# make and filter DESeqDataSet
dds <- DESeqDataSet(se, design=~donor)
keep <- rowSums(assay(dds)>=10)>=5
dds <- dds[keep,]
```
# Inspect data set
```{r}
dds
colData(dds)
```
# Spline fitting
```{r message=FALSE, warning=FALSE}
# create basis of natural cubic splines with interior knots at 8h and 16h
spline_basis <- ns(dds$time, knots=c(8,16))
colnames(spline_basis) <- str_c("fun",colnames(spline_basis))
colData(dds) <- cbind(colData(dds), spline_basis)
# add basis functions to design
design(dds) <- ~ donor + donor:fun1 + donor:fun2 + donor:fun3
# differential testing
dds <- DESeq(dds, reduced=~donor+fun1+fun2+fun3, test="LRT", parallel=T)
```
# Top genes with differential time couse
```{r}
res <- results(dds, tidy=T) %>%
select(gene_id=row, baseMean, stat, pvalue, padj) %>%
filter(padj<.01) %>%
arrange(pvalue)
head(res)
```
# Plot top hit
```{r}
gene_id <- res$gene_id[1]
gene_name <- rowData(dds)$gene_name[rownames(dds)==gene_id]
# estimated model coefficients
coefs <- mcols(dds)[gene_id,str_subset(colnames(mcols(dds)),"^Intercept|^donor|^fun")] %>%
as_tibble() %>%
pivot_longer(everything()) %>%
deframe()
# timepoints at form which the spline basis was formed
timepts <- seq(attr(spline_basis,"Boundary.knots")[1], attr(spline_basis,"Boundary.knots")[2], length.out=100)
# create splines corresponding to original spline basis
# but with more points at which the splines are evaluated
spline_basis_detailed <- ns(
x = timepts,
knots = attr(spline_basis,"knots"),
intercept = attr(spline_basis,"intercept"),
Boundary.knots = attr(spline_basis,"Boundary.knots"))
# timecourse of the gene
gene_timecourse <- counts(dds, normalized=T)[gene_id,] %>%
enframe(name="sample_id", value="norm_count") %>%
dplyr::mutate(log_norm_count=log2(1+norm_count)) %>%
dplyr::left_join(colData(dds) %>% as_tibble(rownames="sample_id"), by="sample_id") %>%
arrange(donor, time)
# make fitted spline for per donor
spline_donor1741_006 <- tibble(
time = timepts,
count = coefs["Intercept"] + drop(spline_basis_detailed %*% coefs[str_c("donor1741_006.fun",1:ncol(spline_basis_detailed))]),
donor="1741_006"
) %>%
dplyr::mutate(log_norm_count=log2(1+2^count))
spline_donor1741_009 <- tibble(
time = timepts,
count = coefs["Intercept"] + coefs["donor_1741_009_vs_1741_006"] + drop(spline_basis_detailed %*% coefs[str_c("donor1741_009.fun",1:ncol(spline_basis_detailed))]),
donor="1741_009"
) %>%
dplyr::mutate(log_norm_count=log2(1+2^count))
gene_timecourse %>%
ggplot(aes(time, log_norm_count, color=donor)) +
geom_point(alpha=.6) +
geom_line(data=spline_donor1741_006) +
geom_line(data=spline_donor1741_009) +
ggsci::scale_color_jco() +
labs(x="time [hr]", y="log2(norm. count)", title=gene_name)
```
```{r}
sessionInfo()
```