-
Notifications
You must be signed in to change notification settings - Fork 12
/
dendro_plot.R
146 lines (135 loc) · 5.69 KB
/
dendro_plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#' @title Dendrograms for multiple samples/groups clustering.
#' @description Dendrograms for multiple samples/groups clustering.
#' @author wei dong
#'
#' @return Plot: dendrogram for multiple samples clustering.
#' @param data Dataframe: All genes in all samples expression dataframe of RNA-Seq (1st-col: Genes, 2nd-col~: Samples).
#' @param dist_method Character: distance measure method. Default: "euclidean", options: "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski".
#' @param hc_method Character: hierarchical clustering method. Default: "ward.D2", options: "ward.D", "ward.D2", "single", "complete","average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).
#' @param tree_type Character: plot tree type. Default: "rectangle", options: "rectangle", "circular", "phylogenic".
#' @param k_num Numeric: the number of groups for cutting the tree. Default: 3.
#' @param palette Character: color palette used for the group. Default: "npg", options: "npg", "aaas", "lancet", "jco", "ucscgb", "uchicago", "simpsons" and "rickandmorty".
#' @param color_labels_by_k Logical: labels colored by group. Default: TRUE, options: TRUE or FALSE.
#' @param horiz Logical: horizontal dendrogram. Default: FALSE, options: TRUE or FALSE.
#' @param label_size Numeric: tree label size. Default: 0.8, min: 0.
#' @param line_width Numeric: branches and rectangle line width. Default: 0.7, min: 0.
#' @param rect Logical: add a rectangle around groups. Default: TRUE, options: TRUE or FALSE.
#' @param rect_fill Logical: fill the rectangle. Default: TRUE, options: TRUE or FALSE.
#' @param xlab Character: title of the xlab. Default: "".
#' @param ylab Character: title of the ylab. Default: "Height".
#' @param ggTheme Character: ggplot2 theme. Default: "theme_light", options: "theme_default", "theme_bw", "theme_gray", "theme_light", "theme_linedraw", "theme_dark", "theme_minimal", "theme_classic", "theme_void".
#'
#' @import ggplot2
#' @importFrom stats dist hclust as.dendrogram
#' @importFrom factoextra fviz_dend
#' @export
#'
#' @examples
#' # 1. Library TOmicsVis package
#' library(TOmicsVis)
#'
#' # 2. Use example dataset gene_expression
#' data(gene_expression)
#' head(gene_expression)
#'
#' # 3. Default parameters
#' dendro_plot(gene_expression)
#'
#' # 4. Set palette = "aaas"
#' dendro_plot(gene_expression, palette = "aaas")
#'
#' # 5. Set tree_type = "circular"
#' dendro_plot(gene_expression, tree_type = "circular")
#'
dendro_plot <- function(data,
dist_method = "euclidean",
hc_method = "ward.D2",
tree_type = "rectangle",
k_num = 5,
palette = "npg",
color_labels_by_k = TRUE,
horiz = FALSE,
label_size = 1.00,
line_width = 1.00,
rect = TRUE,
rect_fill = TRUE,
xlab = "Samples",
ylab = "Height",
ggTheme = "theme_light"
){
# -> 2. NA and Duplicated
data <- as.data.frame(data)
data <- data[,-1]
data <- data[rowSums(data > 0) > 0, ]
data <- as.data.frame(t(data))
# data <- data[!is.na(data[, 1]), ]
# idx <- duplicated(data[, 1])
# data[idx, 1] <- paste0(data[idx, 1], "--dup-", cumsum(idx)[idx])
# rownames(data) <- data[, 1]
# data <- data[, -1]
# <- 2. NA and Duplicated
# -> 3. Plot parameters
# dist_method <- "euclidean"
# ChoiceBox: "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
# hc_method <- "average"
# ChoiceBox: "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC)
# tree_type <- "rectangle"
# ChoiceBox: "rectangle", "triangle", "circular", "phylogenic"
# title <- "Cluster Dendrogram"
# k_num = 3
# label_size = 0.8
# line_width = 0.7
# <- 3. Plot parameters
# Calculate distance matrix,method = "euclidean"
# This must be one of "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski"
dists <- dist(data, method = dist_method)
# Perform hierarchical clustering,method = "average"
# This should be (an unambiguous abbreviation of) one of "ward.D", "ward.D2", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC).
hc <- hclust(dists, method = hc_method)
# Create dendrogram object
dend <- as.dendrogram(hc)
# ggTheme <- "theme_light"
# ChoiceBox: "theme_default", "theme_bw", "theme_gray", "theme_light", "theme_linedraw", "theme_dark", "theme_minimal", "theme_classic", "theme_void"
if (ggTheme == "theme_default") {
gg_theme <- theme()
} else if (ggTheme == "theme_bw") {
gg_theme <- theme_bw()
} else if (ggTheme == "theme_gray") {
gg_theme <- theme_gray()
} else if (ggTheme == "theme_light") {
gg_theme <- theme_light()
} else if (ggTheme == "theme_linedraw") {
gg_theme <- theme_linedraw()
} else if (ggTheme == "theme_dark") {
gg_theme <- theme_dark()
} else if (ggTheme == "theme_minimal") {
gg_theme <- theme_minimal()
} else if (ggTheme == "theme_classic") {
gg_theme <- theme_classic()
} else if (ggTheme == "theme_void") {
gg_theme <- theme_void()
} else if (ggTheme == "theme_test") {
gg_theme <- theme_test()
}
suppressWarnings(
p <- factoextra::fviz_dend(dend,
k = k_num,
color_labels_by_k = color_labels_by_k,
show_labels = TRUE,
repel = TRUE,
type = tree_type,
rect = rect,
rect_border = palette,
rect_fill = rect_fill,
horiz = horiz,
cex = label_size,
lwd = line_width,
main = NULL,
xlab = xlab,
ylab = ylab,
k_colors = palette
) +
gg_theme
)
return(p)
}