-
Notifications
You must be signed in to change notification settings - Fork 1
/
gene_heatmap.Rmd
100 lines (77 loc) · 2.03 KB
/
gene_heatmap.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
---
title: "Gene expression heatmap using ARCHS4 data"
date: "`r Sys.Date()`"
output:
workflowr::wflow_html:
toc: false
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(pheatmap)
library(plotly)
```
Prepare data using base R.
```{r my_df}
lapply(
list.files("data/archs4/cancer", pattern = ".csv$", full.names = TRUE),
function(x){
cbind(gene = sub("\\.\\w+$", "", basename(x)), read.csv(x))
}
) |>
do.call("rbind", args = _) -> my_df
# Split `id` column.
do.call("rbind", strsplit(x = my_df$id, split = "\\.")) |>
as.data.frame() -> id_split
colnames(id_split) <- c('root', 'system', 'organ', 'tissue')
# Rename tissues.
cap_first <- function(x){
s <- strsplit(x, "")[[1]][1]
return(sub(s, toupper(s), x))
}
id_split$tissue <- tolower(id_split$tissue)
id_split$tissue <- sapply(id_split$tissue, cap_first)
my_df <- cbind(my_df, id_split)
# Order `my_df` by system.
my_df <- my_df[order(my_df$gene, my_df$system), ]
my_df$tissue <- factor(my_df$tissue, levels = unique(my_df$tissue))
head(my_df)
```
Back to wide format.
```{r pivot_wider}
my_df |>
dplyr::select(gene, median, tissue) |>
tidyr::pivot_wider(names_from = tissue, values_from = median) -> my_df_wide
```
Convert to matrix and plot.
```{r pheatmap, fig.width=12, fig.height=6}
my_mat <- as.matrix(my_df_wide[, -1])
row.names(my_mat) <- my_df_wide$gene
pheatmap(my_mat)
```
Create sample annotation.
```{r sample_anno}
my_order <- colnames(my_mat)
my_df |>
dplyr::select(system, tissue) |>
dplyr::distinct() |>
dplyr::arrange(match(tissue, my_order)) |>
dplyr::select(-tissue) -> sample_anno
row.names(sample_anno) <- my_order
head(sample_anno)
```
Heatmap with system annotation.
```{r pheatmap_with_anno, fig.width=12, fig.height=6}
pheatmap(my_mat, annotation_col = sample_anno)
```
Interactive heatmap.
```{r plotly_heatmap, fig.height=6, fig.width=12}
plot_ly(
x=colnames(my_mat),
y=rownames(my_mat),
z = my_mat,
colors = colorRamp(c("green", "red")),
type = "heatmap"
)
```