Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
if (getRversion() >= "2.15.1") {
utils::globalVariables(c(
"value", "variable", "Clonotype", "Count", "Sample", "Clone",
"Value", "Value.mean", "Value.min", "Value.max", "quantile",
"Kmer", "Group", "Size", "Val", "Grouping.var", "Xrep", "Yrep",
"Clone.num", "Q", "Clonotypes", "Sample_subj", "Seq.count",
"Overlap", "head", "Mean", "MeanVal", "MinVal", "MaxVal",
"Q1", "Q2", "Type", "Length", "Gene", "Freq", "Sequence",
"AA", "Clones", "Source.gr", "Target.gr", "Samples", "Samples.y",
"CDR3.aa", "p.adj", "group1", "group2", "y.coord", "..p.adj..", ".SD",
"name", "label", "."
))
}
##### Utility functions #####
.rem_legend <- function(.p) {
.p + theme(legend.position = "none")
}
.rem_xlab <- function(.p) {
.p + theme(axis.title.x = element_blank())
}
.rem_ylab <- function(.p) {
.p + theme(axis.title.y = element_blank())
}
.colourblind_vector <- function() {
c("#FF4B20", "#FFB433", "#C6FDEC", "#7AC5FF", "#0348A6")
}
.colourblind_discrete <- function(.n, .colour = FALSE) {
cs <- .colourblind_vector()
if (.colour) {
scale_colour_manual(values = colorRampPalette(cs)(.n))
} else {
scale_fill_manual(values = colorRampPalette(cs)(.n))
}
}
.tweak_fill <- function(.n) {
palette_name <- ""
if (.n == 1) {
palette_name <- "Set2"
} else if (.n == 2) {
palette_name <- "Set1"
}
# else if (.n < 4) { palette_name = "YlGnBu" }
# else if (.n < 6) { palette_name = "RdBu" }
else if (.n < 12) {
palette_name <- "Spectral"
} else {
return(scale_fill_hue())
}
scale_fill_brewer(palette = palette_name)
}
.tweak_col <- function(.n) {
palette_name <- ""
if (.n == 1) {
palette_name <- "Set2"
} else if (.n == 2) {
palette_name <- "Set1"
}
# else if (.n < 4) { palette_name = "YlGnBu" }
# else if (.n < 6) { palette_name = "RdBu" }
else if (.n < 12) {
palette_name <- "Spectral"
} else {
return(scale_colour_hue())
}
scale_color_brewer(palette = palette_name)
}
theme_cleveland2 <- function(rotate = TRUE) {
if (rotate) {
theme(
panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(
colour = "grey70",
linetype = "dashed"
)
)
} else {
theme(
panel.grid.major.x = element_line(
colour = "grey70",
linetype = "dashed"
), panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank()
)
}
}
##### The one and only - the ultimate vis() function #####
#' One function to visualise them all
#'
#' @concept vis
#'
#' @name vis
#'
#' @import ggplot2
#' @importFrom factoextra fviz_cluster fviz_dend fviz_pca_ind
#' @importFrom grDevices colorRampPalette
#'
#' @description Output from every function in immunarch can be visualised with a
#' single function - \code{vis}. The \code{vis} automatically detects
#' the type of the data and draws a proper visualisation. For example, output
#' from the \code{repOverlap} function will be identified as repertoire overlap values
#' and respective visualisation will be chosen without any additional arguments.
#' See "Details" for the list of available visualisations.
#'
#' @param .data Pass the output from any immunarch analysis tool to \code{vis()}.
#' @param ... Any other arguments, see the "Details" section for specific visualisation functions.
#'
#' @details
#' List of available visualisations for different kinds of data.
#'
#' Basic analysis:
#'
#' - Exploratory analysis results (from \link{repExplore}) - see \link{vis.immunr_exp_vol};
#'
#' - Clonality statistics (from \link{repClonality}) - see \link{vis.immunr_homeo}.
#'
#' Overlaps and public clonotypes:
#'
#' - Overlaps (from \link{repOverlap}) using heatmaps, circos plots, polar area plots - see \link{vis.immunr_ov_matrix};
#'
#' - Overlap clustering (from \link{repOverlapAnalysis}) - see \link{vis.immunr_hclust};
#'
#' - Repertoire incremental overlaps (from \link{repOverlap}) - see \link{vis.immunr_inc_overlap};
#'
#' - Public repertoire abundance (from \link{pubRep}) - vis \link{vis.immunr_public_repertoire}.
#'
#' Gene usage:
#'
#' - Gene usage statistics (from \link{geneUsage}) using bar plots, box plots - see \link{vis.immunr_gene_usage};
#'
#' - Gene usage distances (from \link{geneUsageAnalysis}) using heatmaps, circos plots, polar area plots - see \link{vis.immunr_ov_matrix};
#'
#' - Gene usage clustering (from \link{geneUsageAnalysis}) - see \link{vis.immunr_hclust}.
#'
#' Diversity estimation:
#'
#' - Diversity estimations (from \link{repDiversity}) - see \link{vis.immunr_chao1}.
#'
#' Advanced analysis:
#'
#' - Repertoire dynamics (from \link{trackClonotypes}) - see \link{vis.immunr_dynamics};
#'
#' - Sequence logo plots of amino acid distributions (from \link{kmer_profile}) - see \link{vis_seqlogo};
#'
#' - Kmers distributions (from \link{getKmers}) - see \link{vis.immunr_kmer_table};
#'
#' - Mutation networks (from mutationNetwork) - Work In Progress on vis.immunr_mutation_network;
#'
#' - CDR3 amino acid properties, e.g., biophysical (from cdrProp) - Work In Progress on vis.immunr_cdr_prop.
#'
#' Additionaly, we provide a wrapper functions for visualisations of common data types:
#'
#' - Any data frames or matrices using heatmaps - see \link{vis_heatmap} and \link{vis_heatmap2};
#'
#' - Any data frames or matrices using circos plots - see \link{vis_circos}.
#'
#' @return
#' A ggplot2, pheatmap or circlize object.
#'
#' @seealso \link{fixVis} for precise manipulation of plots.
#'
#' @examples
#' # Load the test data
#' data(immdata)
#'
#' # Compute and visualise:
#' ov <- repOverlap(immdata$data)
#' vis(ov)
#'
#' gu <- geneUsage(immdata$data)
#' vis(gu)
#'
#' dv <- repDiversity(immdata$data)
#' vis(dv)
#' @export vis
vis <- function(.data, ...) {
UseMethod("vis")
}
##### Overlap & heatmaps, circos plots and polar area plots #####
#' Repertoire overlap and gene usage visualisations
#'
#' @concept overlap
#'
#' @name vis.immunr_ov_matrix
#'
#' @aliases vis.immunr_ov_matrix vis.immunr_gu_matrix
#'
#' @description Visualise matrices with overlap values or gene usage distances among samples.
#' For details see links below.
#'
#' @param .data Output from \link{repOverlap} or \link{geneUsageAnalysis}.
#'
#' @param .plot A string specifying the plot type:
#'
#' - "heatmap" for heatmaps using \link{vis_heatmap};
#'
#' - "heatmap2" for heatmaps using \link{vis_heatmap2};
#'
#' - "circos" for circos plots using \link{vis_circos};
#'
#' @param ... Other arguments are passed through to the underlying plotting function:
#'
#' - "heatmap" - passes arguments to \link{vis_heatmap};
#'
#' - "heatmap2" - passes arguments to \link{vis_heatmap2} and \link{heatmap} from the "pheatmap" package;
#'
#' - "circos" - passes arguments to \link{vis_circos} and \link{chordDiagram} from the "circlize" package;
#'
#' @return
#' A ggplot2, pheatmap or circlize object.
#'
#' @examples
#' data(immdata)
#' ov <- repOverlap(immdata$data)
#' vis(ov)
#' vis(ov, "heatmap")
#' vis(ov, "heatmap2")
#' vis(ov, "circos")
#' @export
vis.immunr_ov_matrix <- function(.data, .plot = c("heatmap", "heatmap2", "circos"), ...) {
args <- list(...)
if (!(".title" %in% names(args))) {
args$.title <- "Repertoire overlap"
}
.plot <- .plot[1]
heatfun <- vis_heatmap
if (.plot == "heatmap2") {
heatfun <- vis_heatmap2
} else if (.plot == "circos") {
heatfun <- vis_circos
}
do.call(heatfun, c(list(.data = .data), args))
}
#' @export
vis.immunr_gu_matrix <- function(.data, .plot = c("heatmap", "heatmap2", "circos"), ...) {
args <- list(...)
if (!(".title" %in% names(args))) {
args$.title <- "Gene usage"
}
.plot <- .plot[1]
heatfun <- vis_heatmap
if (.plot == "heatmap2") {
heatfun <- vis_heatmap2
} else if (.plot == "circos") {
heatfun <- vis_circos
}
do.call(heatfun, c(list(.data = .data), args))
}
#' Visualisation of matrices and data frames using ggplo2-based heatmaps
#'
#' @concept vis
#'
#' @name vis_heatmap
#'
#' @aliases vis_heatmap
#'
#' @description Fast and easy visualisations of matrices or data frames
#' with functions based on the ggplot2 package.
#'
#' @param .data Input object: a matrix or a data frame.
#'
#' If matrix: column names and row names (if presented) will be used as names for labs.
#'
#' If data frame: the first column will be used for row names and removed from the data.
#' Other columns will be used for values in the heatmap.
#'
#' @param .text If TRUE then plot values in the heatmap cells. If FALSE do not plot values,
#' just plot coloured cells instead.
#'
#' @param .scientific If TRUE then use the scientific notation for numbers (e.g., "2.0e+2").
#'
#' @param .signif.digits Number of significant digits to display on plot.
#'
#' @param .text.size Size of text in the cells of heatmap.
#'
#' @param .axis.text.size Size of text on the axis labels.
#'
#' @param .labs A character vector of length two with names for x-axis and y-axis, respectively.
#'
#' @param .title The The text for the plot's title.
#'
#' @param .leg.title The The text for the plots's legend. Provide NULL to remove the legend's title completely.
#'
#' @param .legend If TRUE then displays a legend, otherwise removes legend from the plot.
#'
#' @param .na.value Replace NA values with this value. By default they remain NA.
#'
#' @param .transpose Logical. If TRUE then switch rows and columns.
#'
#' @param ... Other passed arguments.
#'
#' @return
#' A ggplot2 object.
#'
#' @seealso \link{vis}, \link{repOverlap}.
#'
#' @examples
#' data(immdata)
#' ov <- repOverlap(immdata$data)
#' vis_heatmap(ov)
#' gu <- geneUsage(immdata$data, "hs.trbj")
#' vis_heatmap(gu)
#' @export
vis_heatmap <- function(.data, .text = TRUE, .scientific = FALSE, .signif.digits = 2,
.text.size = 4, .axis.text.size = NULL,
.labs = c("Sample", "Sample"), .title = "Overlap",
.leg.title = "Overlap values", .legend = TRUE,
.na.value = NA, .transpose = FALSE, ...) {
if (has_class(.data, "data.frame")) {
names <- .data[[1]]
.data <- as.matrix(.data[, -1])
row.names(.data) <- names
if (.transpose) {
.data <- t(.data)
}
} else if (is.null(dim(.data))) {
.data <- as.matrix(.data)
if (.transpose) {
.data <- t(.data)
}
}
if (is.null(colnames(.data))) {
colnames(.data) <- paste0("C", seq_len(ncol(.data)))
}
if (is.null(row.names(.data))) {
row.names(.data) <- paste0("C", seq_len(nrow(.data)))
}
.data[is.na(.data)] <- .na.value
tmp <- as.data.frame(.data)
tmp$name <- row.names(.data)
m <- reshape2::melt(tmp, id.var = c("name"))
m[, 1] <- factor(m[, 1], levels = rev(rownames(.data)))
m[, 2] <- factor(m[, 2], levels = colnames(.data))
format <- if (.scientific) "g" else "fg"
m$label <- formatC(m$value, format = format, digits = .signif.digits)
p <- ggplot(m, aes(x = variable, y = name, fill = value))
p <- p + geom_tile(aes(fill = value), colour = "white")
if (.text) {
p <- p + geom_text(aes(label = label), size = .text.size)
}
p <- p + scale_fill_distiller(palette = "RdBu")
p <- p + ggtitle(.title) +
guides(fill = guide_colourbar(title = .leg.title)) +
xlab(.labs[1]) + ylab(.labs[2]) + coord_fixed() +
theme_linedraw() + theme(
axis.text.x =
element_text(angle = 90, vjust = .5, size = .axis.text.size)
) + theme(
axis.text.y =
element_text(size = .axis.text.size)
) + scale_x_discrete(expand = c(0, 0)) + scale_y_discrete(expand = c(0, 0))
if (!.legend) {
p <- .rem_legend(p)
}
if (is.null(.labs)) {
p <- .rem_xlab(.rem_ylab(p))
}
p
}
#' Visualisation of matrices using pheatmap-based heatmaps
#'
#' @concept vis
#'
#' @importFrom pheatmap pheatmap
#'
#' @name vis_heatmap2
#'
#' @description Visualise matrices with the functions based on the \link[pheatmap]{pheatmap}
#' package with minimum amount of arguments.
#'
#' @param .data Input matrix. Column names and row names (if presented) will be used as names for labs.
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#'
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' @param .title The text for the plot's title (same as the "main" argument in \link[pheatmap]{pheatmap}).
#'
#' @param .color A vector specifying the colors (same as the "color" argument in \link[pheatmap]{pheatmap}).
#' Pass NA to use the default pheatmap colors.
#'
#' @param ... Other arguments for the \link[pheatmap]{pheatmap} function.
#'
#' @return
#' A pheatmap object.
#'
#' @seealso \link{vis}, \link{repOverlap}
#'
#' @examples
#' data(immdata)
#' ov <- repOverlap(immdata$data)
#' vis_heatmap2(ov)
#' @export
vis_heatmap2 <- function(.data, .meta = NA, .by = NA, .title = NA, .color = colorRampPalette(c("#67001f", "#d6604d", "#f7f7f7", "#4393c3", "#053061"))(1024), ...) {
args <- list(...)
if (!is.na(.by)[1]) {
if (!is.na(.meta)[1]) {
args[["annotation_col"]] <- .meta %>%
tibble::column_to_rownames(var = "Sample") %>%
dplyr::select(tidyselect::any_of(.by))
}
}
args[["mat"]] <- .data
args[["main"]] <- .title
if (!is.na(.color)[1]) {
args[["color"]] <- .color
}
do.call(pheatmap, args)
}
#' Visualisation of matrices using circos plots
#'
#' @concept vis
#'
#' @importFrom circlize chordDiagram
#'
#' @name vis_circos
#'
#' @description Visualise matrices with the \link{chordDiagram} function
#' from the circlize package.
#'
#' @param .data Input matrix.
#'
#' @param .title The The text for the title of the plot.
#'
#' @param ... Other arguments passed to \link{chordDiagram} from the 'circlize' package.
#'
#' @return
#' A circlize object.
#'
#' @seealso \link{vis}, \link{repOverlap}.
#'
#' @examples
#' data(immdata)
#' ov <- repOverlap(immdata$data)
#' vis(ov, .plot = "circos")
#' @export
vis_circos <- function(.data, .title = NULL, ...) {
if (has_class(.data, "tibble")) {
.data <- as.data.frame(.data)
}
chordDiagram(.data, ...)
}
# vis_radar <- function(.data, .by = NA, .meta = NA,
# .ncol = NA, .which = NA,
# .errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .expand = c(.25, 0),
# .title = NA, .subtitle = NULL,
# .legend = FALSE, .leg.title = NULL, ...) {
# stop("vis_radar() is under complety re-working. Please use other functions such as vis_heatmap.
# Sorry for the inconvenience! Please contact us if you need vis_radar() ASAP.")
#
# .data <- melt(.data)
# colnames(.data) <- c("Source", "Target", "Value")
# .data$Value[is.na(.data$Value)] <- 0
#
# group_res <- process_metadata_arguments(.data, .by, .meta, .data.sample.col = "Source")
# group_res2 <- process_metadata_arguments(.data, .by, .meta, .data.sample.col = "Target")
# .data$Source.gr <- group_res$group_column
# .data$Target.gr <- group_res2$group_column
#
# .data_proc <- .data %>%
# dplyr::group_by(Source.gr, Target.gr) %>%
# dplyr::summarise(
# Value = mean(Value, na.rm = TRUE),
# Value.min = quantile(Value, .errorbars[1], na.rm = TRUE),
# Value.max = quantile(Value, .errorbars[2], na.rm = TRUE)
# )
# .data_proc <- .data_proc[!is.na(.data_proc$Source.gr), ]
#
# step <- length(unique(.data_proc$Source.gr))
#
# if (!is.na(.which[1])) {
# .data_proc <- .data_proc[.data_proc$Source.gr %in% .which, ]
# }
#
# if (is.na(.leg.title)) {
# if (group_res$is_grouped) {
# .leg.title <- "Group"
# } else {
# .leg.title <- "Sample"
# }
# }
#
# if (!group_res$is_grouped) {
# .errorbars.off <- TRUE
# }
#
# if (is.na(.ncol)) {
# .ncol <- round(length(unique(.data_proc$Source.gr))**.5)
# }
#
# .data_proc$Value[.data_proc$Source.gr == .data_proc$Target.gr] <- 0
# .data_proc$Value.min[.data_proc$Source.gr == .data_proc$Target.gr] <- NA
# .data_proc$Value.max[.data_proc$Source.gr == .data_proc$Target.gr] <- NA
#
# ps <- lapply(seq(1, nrow(.data_proc), step), function(l) {
# p <- ggplot(.data_proc[l:(l + step - 1), ], aes(x = Target.gr, y = Value, fill = Target.gr)) +
# geom_bar(colour = "black", stat = "identity")
#
# if (!.errorbars.off) {
# p <- p +
# geom_errorbar(aes(ymin = Value.min, ymax = Value.max), width = 0.2, position = position_dodge(.9))
# }
#
# p <- p +
# coord_polar() +
# scale_y_continuous(expand = .expand) +
# theme_linedraw() +
# .colourblind_discrete(step)
#
# if (!.legend) {
# p <- p + guides(fill = "none")
# } else {
# p <- p + guides(fill = guide_legend(title = .leg.title))
# }
#
# p <- p +
# labs(title = .data_proc$Source.gr[l], subtitle = .subtitle)
#
# p
# })
# do.call(grid.arrange, c(ps, ncol = .ncol, top = .title))
# }
##### Complex overlaps - top overlap & public repertoires #####
#' Visualise incremental overlaps
#'
#' @concept overlap
#'
#' @importFrom dplyr rename
#'
#' @name vis.immunr_inc_overlap
#'
#' @param .data Output from the \link{repOverlap} function that uses "top" methods.
#'
#' @param .target Index of a repertoire to plot. Omitted if .grid is TRUE.
#'
#' @param .grid Logical. If TRUE then plot all similarities in a grid.
#'
#' @param .ncol Numeric. Number of columns in the resulting grid.
#'
#' @param ... Not used here.
#'
#' @return
#' A ggplot2 object.
#'
#' @seealso \link{repOverlap}
#'
#' @examples
#' data(immdata)
#' tmp <- repOverlap(immdata$data[1:4], "inc+overlap", .verbose.inc = FALSE, .verbose = FALSE)
#' vis(tmp, .target = 1)
#' vis(tmp, .grid = TRUE)
#' @export
vis.immunr_inc_overlap <- function(.data, .target = 1, .grid = FALSE, .ncol = 2, ...) {
data_is_bootstrapped <- !is.null(attr(.data, "bootstrap"))
if (.grid) {
if (data_is_bootstrapped) {
sample_names <- colnames(.data[[1]][[1]])
} else {
sample_names <- colnames(.data[[1]])
}
p_list <- lapply(seq_along(sample_names), function(i_name) {
p <- vis(.data, .target = i_name) + ggtitle(sample_names[i_name]) + theme_pubr(legend = "right") + theme_cleveland2()
p
})
return(do.call(wrap_plots, c(p_list, list(ncol = .ncol))))
} else {
if (data_is_bootstrapped) {
sample_names <- colnames(.data[[1]][[1]])
.data <- lapply(.data, function(mat_list) {
lapply(mat_list, function(mat) {
replace(mat, lower.tri(mat, TRUE), NA)
})
})
.data <- reshape2::melt(.data, na.rm = TRUE)
names(.data) <- c("Sample_subj", "Sample", "Overlap", "N", "Seq.count")
filtered_data <- .data[.data$Sample_subj == sample_names[.target], ]
filtered_data$Seq.count <- as.numeric(filtered_data$Seq.count)
filtered_data <- filtered_data %>%
dplyr::group_by(Sample_subj, Sample, Seq.count, Overlap) %>%
dplyr::summarise(
Value = mean(Overlap, na.rm = TRUE),
Value.min = quantile(Overlap, 0.025, na.rm = TRUE),
Value.max = quantile(Overlap, 0.975, na.rm = TRUE)
)
} else {
sample_names <- colnames(.data[[1]])
.data <- lapply(.data, function(mat) replace(mat, lower.tri(mat, TRUE), NA))
.data <- reshape2::melt(.data, na.rm = TRUE)
names(.data) <- c("Sample_subj", "Sample", "Overlap", "Seq.count")
filtered_data <- .data[.data$Sample_subj == sample_names[.target], ]
filtered_data$Seq.count <- as.numeric(filtered_data$Seq.count)
filtered_data <- filtered_data %>%
dplyr::rename(Value = Overlap)
}
p <- ggplot(aes(x = Seq.count, y = Value, col = Sample, group = Sample), data = filtered_data) +
geom_line()
if (data_is_bootstrapped) {
p <- p +
geom_ribbon(aes(group = Sample, ymin = Value.min, ymax = Value.max, fill = Sample), alpha = 0.15)
}
p <- p +
theme_pubr(legend = "right") + theme_cleveland2() +
ggtitle("Similarity of repertoires") +
xlab("Sequence count") + ylab("Similarity")
return(p)
}
}
#' Public repertoire visualisation
#'
#' @concept pubrep
#'
#' @name vis.immunr_public_repertoire
#'
#' @param .data Public repertoire, an output from \link{pubRep}.
#' @param .plot A string specifying the plot type:
#'
#' - "freq" for visualisation of the distribution of occurrences of clonotypes
#' and their frequencies using \link{vis_public_frequencies}.
#'
#' - "clonotypes" for visualisation of public clonotype frequenciy correlations between pairs of
#' samples using \link{vis_public_clonotypes}
#' @param ... Further arguments passed \link{vis_public_frequencies} or \link{vis_public_clonotypes},
#' depending on the ".plot" argument.
#'
#' @return
#' A ggplot2 object.
#'
#' @examples
#' data(immdata)
#' immdata$data <- lapply(immdata$data, head, 300)
#' pr <- pubRep(immdata$data, .verbose = FALSE)
#' vis(pr, "freq")
#' vis(pr, "freq", .type = "none")
#'
#' vis(pr, "clonotypes", 1, 2)
#' @export
vis.immunr_public_repertoire <- function(.data, .plot = c("freq", "clonotypes"), ...) {
.plot <- .plot[1]
if (.plot == "freq") {
vis_public_frequencies(.data, ...)
} else if (.plot == "clonotypes") {
vis_public_clonotypes(.data, ...)
} else {
stop('Error: unknown argument for .plot, please provide one of the following: "freq", "clonotypes" or "???"')
}
}
#' Visualise sharing of clonotypes among samples
#'
#' @concept pubrep
#'
#' @importFrom UpSetR upset fromExpression
#'
#' @name vis.immunr_public_statistics
#'
#' @description Visualise public clonotype frequencies.
#'
#' @param .data Public repertoire - an output from the \link{pubRep} function.
#'
#' @param ... Other arguments passsed directly to \link{upset}.
#'
#' @return
#' A ggplot2 object.
#'
#' @examples
#' data(immdata)
#' immdata$data <- lapply(immdata$data, head, 2000)
#' pr <- pubRep(immdata$data, .verbose = FALSE)
#' pubRepStatistics(pr) %>% vis()
#' @export
vis.immunr_public_statistics <- function(.data, ...) {
upsetr_data <- as.list(.data$Count)
names(upsetr_data) <- .data$Group
upset(fromExpression(upsetr_data), ...)
}
#' Public repertoire visualisation
#'
#' @concept pubrep
#'
#' @name vis_public_frequencies
#'
#' @description Visualise public clonotype frequencies.
#'
#' @param .data Public repertoire - an output from the \link{pubRep} function.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .type Character. Either "boxplot" for plotting distributions of frequencies,
#' "none" for plotting everything, or "mean" for plotting average values only.
#'
#' @return
#' A ggplot2 object.
#'
#' @examples
#' data(immdata)
#' immdata$data <- lapply(immdata$data, head, 500)
#' pr <- pubRep(immdata$data, .verbose = FALSE)
#' vis(pr, "freq", .type = "boxplot")
#' vis(pr, "freq", .type = "none")
#' vis(pr, "freq", .type = "mean")
#' vis(pr, "freq", .by = "Status", .meta = immdata$meta)
vis_public_frequencies <- function(.data, .by = NA, .meta = NA,
.type = c("boxplot", "none", "mean")) {
.type <- .type[1]
# ToDo: make it work with V genes
melted_pr <- reshape2::melt(.data, id.vars = colnames(.data)[1:(match("Samples", colnames(.data)))])
colnames(melted_pr)[1] <- "Sequence"
colnames(melted_pr)[ncol(melted_pr) - 1] <- "Sample"
group_res <- process_metadata_arguments(melted_pr, .by, .meta)
group_column <- group_res$name
melted_pr$Group <- group_res$group_column
if (.type == "boxplot") {
p <- ggplot() +
geom_boxplot(aes(y = value, x = as.factor(Samples), col = Group), data = melted_pr) +
scale_y_log10() +
ggtitle("Distribution of public clonotype frequencies") +
xlab("Number of shared samples") +
ylab("Public clonotype frequencies")
} else if (.type == "mean") {
melted_pr <- melted_pr %>%
group_by(Sequence, Samples) %>%
summarise(Value = mean(value, na.rm = TRUE)) %>%
as.data.table()
p <- ggplot() +
geom_jitter(aes(x = Value, y = Samples), data = melted_pr) +
scale_x_log10() +
ggtitle("Mean frequencies of public clonotypes") +
ylab("Number of shared samples") +
xlab("Public clonotype frequencies")
} else {
# ToDo: add the .verbose argument for this
message("Warning! Visualising ", sum(!is.na(melted_pr$value)), ' points. Too many points may take a while to visualise depending on your hardware. We highly recommend you to use other types: "mean" or "boxplot"')
p <- ggplot() +
geom_jitter(aes(x = value, y = Samples, col = Group), data = melted_pr) +
scale_x_log10() +
ggtitle("Distribution of public clonotype frequencies") +
ylab("Number of shared samples") +
xlab("Public clonotype frequencies")
}
# tmp = as_tibble(data.frame(N_samples = .data$Samples,
# Value = rowMeans(public_matrix(.data), na.rm = TRUE)))
# ggplot(aes(x=Value, y=N_samples, color=N_samples), data=tmp) + geom_jitter() +
# scale_x_log10() +
# xlab("Public clonotype frequency") + ylab("Number of samples") +
# ggtitle("Public clonotype frequencies") +
# theme_bw()
p + theme_pubr(legend = "right") + theme_cleveland2()
}
#' Visualisation of public clonotypes
#'
#' @concept pubrep
#'
#' @name vis_public_clonotypes
#'
#' @importFrom grid rectGrob gpar
#' @importFrom stats lm
#' @importFrom patchwork wrap_plots plot_annotation
#'
#' @description Visualise correlation of public clonotype frequencies in pairs of repertoires.
#'
#' @param .data Public repertoire data - an output from the \link{pubRep} function.
#'
#' @param .x.rep Either indices of samples or character vector of sample names
#' for the x-axis. Must be of the same length as ".y.rep".
#'
#' @param .y.rep Either indices of samples or character vector of sample names
#' for the y-axis. Must be of the same length as ".x.rep".
#'
#' @param .title The text for the title of the plot.
#'
#' @param .ncol An integer number of columns to print in the grid of pairs of repertoires.
#'
#' @param .point.size.modif An integer value that is a modifier of the point size.
#' The larger the number, the larger the points.
#'
#' @param .cut.axes If TRUE then axes limits become shorter.
#'
#' @param .density If TRUE then displays density plot for distributions of clonotypes
#' for each sample. If FALSE then removes density plot from the visualisation.
#'
#' @param .lm If TRUE then fit a linear model and displays an R adjusted coefficient
#' that shows how similar samples are in terms of shared clonotypes.
#'
#' @param .radj.size An integer value, that defines the size of the The text
#' for the R adjusted coefficient.
#'
#' @return
#' A ggplot2 object.
#'
#' @seealso \link{pubRep}, \link{vis.immunr_public_repertoire}
#'
#' @examples
#' data(immdata)
#' pr <- pubRep(immdata$data, .verbose = FALSE)
#' vis(pr, "clonotypes", 1, 2)
vis_public_clonotypes <- function(.data, .x.rep = NA, .y.rep = NA,
.title = NA, .ncol = 3,
.point.size.modif = 1, .cut.axes = TRUE,
.density = TRUE, .lm = TRUE, .radj.size = 3.5) {
.shared.rep <- .data
mat <- public_matrix(.shared.rep)
if (is.na(.x.rep) && is.na(.y.rep)) {
ps <- list()
for (i in 1:ncol(mat)) {
for (j in 1:ncol(mat)) {
ps <- c(ps, list(vis_public_clonotypes(.shared.rep, i, j, "",
.point.size.modif = .point.size.modif,
.cut.axes = .cut.axes, .density = .density, .lm = .lm, .radj.size = .radj.size
)))
}
}
do.call(wrap_plots, c(ps, ncol = .ncol)) + plot_annotation(title = .title)
} else if (is.na(.x.rep)) {
ps <- lapply(1:ncol(mat), function(i) {
vis_public_clonotypes(.shared.rep, i, .y.rep, "",
.point.size.modif = .point.size.modif,
.cut.axes = .cut.axes, .density = .density, .lm = .lm
)
})
do.call(wrap_plots, c(ps, ncol = .ncol)) + plot_annotation(title = .title)
} else if (is.na(.y.rep)) {
ps <- lapply(1:ncol(mat), function(j) {
vis_public_clonotypes(.shared.rep, .x.rep, j, "",
.point.size.modif = .point.size.modif,
.cut.axes = .cut.axes, .density = .density, .lm = .lm
)
})
do.call(wrap_plots, c(ps, ncol = .ncol)) + plot_annotation(title = .title)
} else {
if (!is.character(.x.rep)) {
.x.rep <- colnames(mat)[.x.rep]
}
if (!is.character(.y.rep)) {
.y.rep <- colnames(mat)[.y.rep]
}
if (.x.rep == .y.rep) {
return(rectGrob(gp = gpar(col = "white")))
}
df <- data.frame(cbind(mat[, .x.rep], mat[, .y.rep]))
df[, 1] <- df[, 1] / sum(df[, 1], na.rm = TRUE)
df[, 2] <- df[, 2] / sum(df[, 2], na.rm = TRUE)
df_full <- df
df <- df[!is.na(df[, 1]) & !is.na(df[, 2]), ]
freq <- log10(sqrt(as.numeric(df[, 1]) * df[, 2])) / 2
names(df) <- c("Xrep", "Yrep")
names(df_full) <- c("Xrep", "Yrep")
pnt.cols <- log(df[, 1] / df[, 2])
suppressWarnings(pnt.cols[pnt.cols > 0] <- pnt.cols[pnt.cols > 0] / max(pnt.cols[pnt.cols > 0]))
suppressWarnings(pnt.cols[pnt.cols < 0] <- -pnt.cols[pnt.cols < 0] / min(pnt.cols[pnt.cols < 0]))
if (.cut.axes) {
mat.lims <- c(min(as.matrix(df_full), na.rm = TRUE), max(as.matrix(df_full), na.rm = TRUE))
} else {
mat.lims <- c(min(as.matrix(df_full), na.rm = TRUE), 1)
}
empty <- ggplot() +
geom_point(aes(1, 1), colour = "white") +
theme(
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank()
)
min_df <- min(floor(log10(min(df_full[, 1], na.rm = TRUE))), floor(log10(min(df_full[, 2], na.rm = TRUE))))
max_df <- max(trunc(log10(max(df_full[, 1], na.rm = TRUE))), trunc(log10(max(df_full[, 2], na.rm = TRUE))))
breaks_values <- 10^seq(min_df, 1)
breaks_labels <- format(log10(breaks_values), scientific = FALSE)
grey_col <- "#CCCCCC"
points <- ggplot() +
geom_point(aes(x = Xrep, y = Yrep, size = freq, fill = pnt.cols), data = df, shape = 21) +
scale_radius(range = c(.point.size.modif, .point.size.modif * 6)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
theme_linedraw() +
scale_fill_distiller(palette = "RdBu") +
scale_x_log10(breaks = breaks_values, labels = breaks_labels, lim = mat.lims, expand = c(.015, .015)) +
scale_y_log10(breaks = breaks_values, labels = breaks_labels, lim = mat.lims, expand = c(.015, .015)) +
theme(legend.position = "none") +
xlab(.x.rep) +
ylab(.y.rep)
if (!is.na(.title)) {
points <- points + ggtitle(.title)
}
if (.lm) {
adj.R.sq <- summary(lm(Yrep ~ Xrep, df))$adj.
points <- points +
geom_smooth(aes(x = Xrep, y = Yrep), method = "lm", data = df, fullrange = TRUE, colour = "grey20", size = .5) +
geom_text(aes(
x = max(df_full, na.rm = TRUE) / 4,
y = min(df_full, na.rm = TRUE),
label = paste0("R^2(adj.) = ", as.character(round(adj.R.sq, 2)))
), size = .radj.size)
# ggtitle(paste0("R^2(adj.) = ", as.character(round(adj.R.sq, 2))))
}
if (.density) {
df2 <- data.frame(Clonotype = df_full[!is.na(df_full[, 1]) & is.na(df_full[, 2]), 1], Type = "unique", stringsAsFactors = FALSE)
df2 <- rbind(df2, data.frame(Clonotype = df_full[!is.na(df_full[, 1]) & !is.na(df_full[, 2]), 1], Type = "public", stringsAsFactors = FALSE))
top_plot <- ggplot() +
geom_density(aes(x = Clonotype, fill = Type), colour = "grey25", data = df2, alpha = .3) +
scale_x_log10(breaks = 10^(seq(min_df, 0)), lim = mat.lims, expand = c(.12, .015)) +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
legend.position = "none"
) +
scale_fill_manual(values = colorRampPalette(c(.colourblind_vector()[5], grey_col))(2))
df2 <- data.frame(Clonotype = df_full[!is.na(df_full[, 2]) & is.na(df_full[, 1]), 2], Type = "unique", stringsAsFactors = FALSE)
df2 <- rbind(df2, data.frame(Clonotype = df_full[!is.na(df_full[, 2]) & !is.na(df_full[, 1]), 2], Type = "public", stringsAsFactors = FALSE))
right_plot <- ggplot() +
geom_density(aes(x = Clonotype, fill = Type), colour = "grey25", data = df2, alpha = .3) +
scale_x_log10(breaks = 10^(seq(min_df, 0)), lim = mat.lims, expand = c(.12, .015)) +
coord_flip() +
theme_bw() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
legend.position = "none"
) +
scale_fill_manual(values = colorRampPalette(c(.colourblind_vector()[1], grey_col))(2))
wrap_plots(top_plot, empty, points, right_plot, ncol = 2, nrow = 2, widths = c(4, 1), heights = c(1, 4))
} else {
points
}
}
}
##### Gene usage & histogram plot, boxplot #####
#' Histograms and boxplots (general case / gene usage)
#'
#' @concept gene_usage
#'
#' @name vis.immunr_gene_usage
#'
#' @description Visualise distributions of genes using heatmaps or other plots.
#'
#' @param .data Output from the \link{geneUsage} function.
#'
#' @param .plot String specifying the plot type:
#'
#' - "hist" for histograms using \link{vis_hist};
#'
#' - "heatmap" for heatmaps using \link{vis_heatmap};
#'
#' - "heatmap2" for heatmaps using \link{vis_heatmap2};
#'
#' - "circos" for circos plots using \link{vis_circos}.
#'
#' @param ... Other arguments passed to corresponding functions depending on the plot type:
#'
#' - "hist" - passes arguments to \link{vis_hist};
#'
#' - "box" - passes arguments to \link{vis_box};
#'
#' - "heatmap" - passes arguments to \link{vis_heatmap};
#'
#' - "heatmap2" - passes arguments to \link{vis_heatmap2} and \link{heatmap} from the "pheatmap" package;
#'
#' - "circos" - passes arguments to \link{vis_circos} and \link{chordDiagram} from the "circlize" package.
#'
#' @return
#' A ggplot2 object, pheatmap or circlize object.
#'
#' @examples
#' data(immdata)
#'
#' gu <- geneUsage(immdata$data[[1]])
#' vis(gu)
#'
#' gu <- geneUsage(immdata$data)
#' vis(gu, .by = "Status", .meta = immdata$meta)
#' vis(gu, "box", .by = "Status", .meta = immdata$meta)
#' @seealso \link{geneUsage}
#'
#' @export
vis.immunr_gene_usage <- function(.data, .plot = c("hist", "box", "heatmap", "heatmap2", "circos"), ...) {
.plot <- .plot[1]
if (.plot == "hist") {
vis_hist(.data, ...)
} else if (.plot == "box") {
vis_box(.data,
.melt = TRUE, .grouping.var = "Gene",
.labs = c("Gene", "Usage"), .title = "Gene usage",
.subtitle = NULL, ...
)
} else if (.plot == "heatmap") {
row.names(.data) <- .data[[1]]
.data <- t(as.matrix(.data[2:ncol(.data)]))
vis_heatmap(.data, ...)
} else if (.plot == "heatmap2") {
row.names(.data) <- .data[[1]]
.data <- t(as.matrix(.data[2:ncol(.data)]))
vis_heatmap2(.data, ...)
} else if (.plot == "circos") {
row.names(.data) <- .data[[1]]
.data <- t(as.matrix(.data[2:ncol(.data)]))
vis_circos(.data, ...)
} else {
stop("Error: Unknown value of the .plot parameter. Please provide one of the following: 'hist', 'box', 'heatmap', 'heatmap2', 'circos'.")
}
}
#' Visualisation of distributions using histograms
#'
#' @concept vis
#'
#' @name vis_hist
#'
#' @description Visualisation of distributions using ggplot2-based histograms.
#'
#' @param .data Input matrix or data frame.
#'
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#'
#' @param .title The text for the title of the plot.
#'
#' @param .ncol A number of columns to display. Provide NA (by default) if you want the function
#' to automatically detect the optimal number of columns.
#'
#' @param .points A logical value defining whether points will be visualised or not.
#'
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#'
#' @param .coord.flip If TRUE then swap x- and y-axes.
#'
#' @param .grid If TRUE then plot separate visualisations for each sample.
#'
#' @param .labs A character vector of length two with names for x-axis and y-axis, respectively.
#'
#' @param .melt If TRUE then apply \link{melt} to the ".data" before plotting.
#' In this case ".data" is supposed to be a data frame with the first character column reserved
#' for names of genes and other numeric columns reserved to counts or frequencies of genes.
#' Each numeric column should be associated with a specific repertoire sample.
#'
#' @param .legend If TRUE then plots the legend. If FALSE removes the legend from the plot.
#' If NA automatically detects the best way to display legend.
#'
#' @param .add.layer Addditional ggplot2 layers, that added to each plot in the output plot or grid of plots.
#'
#' @param ... Is not used here.
#'
#' @details
#' If data is grouped, then statistical tests for comparing means of groups will be performed, unless \code{.test = FALSE} is supplied.
#' In case there are only two groups, the Wilcoxon rank sum test (https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) is performed
#' (R function \code{\link{wilcox.test}} with an argument \code{exact = FALSE}) for testing if there is a difference in mean rank values between two groups.
#' In case there more than two groups, the Kruskal-Wallis test (https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance) is performed (R function \code{\link{kruskal.test}}), that is equivalent to ANOVA for ranks and it tests whether samples from different groups originated from the same distribution.
#' A significant Kruskal-Wallis test indicates that at least one sample stochastically dominates one other sample.
#' Adjusted for multiple comparisons P-values are plotted on the top of groups.
#' P-value adjusting is done using the Holm method (https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method) (also known as Holm-Bonferroni correction).
#' You can execute the command \code{?p.adjust} in the R console to see more.
#'
#' @return
#' A ggplot2 object.
#'
#' @seealso \link{vis.immunr_gene_usage}, \link{geneUsage}
#'
#' @examples
#' data(immdata)
#' imm_gu <- geneUsage(immdata$data[[1]])
#' vis(imm_gu,
#' .plot = "hist", .add.layer =
#' theme(axis.text.x = element_text(angle = 75, vjust = 1))
#' )
#' imm_gu <- geneUsage(immdata$data[1:4])
#' vis(imm_gu,
#' .plot = "hist", .grid = TRUE, .add.layer =
#' theme(axis.text.x = element_text(angle = 75, vjust = 1))
#' )
#' @export
vis_hist <- function(.data, .by = NA, .meta = NA, .title = "Gene usage", .ncol = NA,
.points = TRUE, .test = TRUE, .coord.flip = FALSE,
.grid = FALSE, .labs = c("Gene", NA),
.melt = TRUE, .legend = NA, .add.layer = NULL, ...) {
res <- .data
if (.melt) {
res <- reshape2::melt(res)
res <- res[1:nrow(res), ]
if (ncol(.data) == 2) {
res[[2]] <- "Data"
}
colnames(res) <- c("Gene", "Sample", "Freq")
}
if (is.na(.labs[2])) {
.labs[2] <- "Frequency"
if (sum(res$Freq > 1, na.rm = TRUE) > 0) {
.labs[2] <- "Count"
}
}
if (length(unique(res$Sample)) == 1) {
p <- ggplot() +
geom_bar(aes(x = Gene, y = Freq, fill = Freq), data = res, stat = "identity", colour = "black")
expand_vec <- c(.02, 0)
p <- p + theme_pubr() + rotate_x_text() + theme_cleveland2() +
labs(x = .labs[1], y = .labs[2], title = .title, fill = .labs[2]) +
scale_fill_distiller(palette = "RdBu")
if (.coord.flip) {
p <- p + coord_flip() + scale_y_continuous(expand = c(.005, 0))
} else {
p <- p + scale_y_continuous(expand = c(.02, 0))
}
if (!is.na(.legend)) {
if (!.legend) {
p <- p + guides(fill = "none")
}
} else {
p <- p + guides(fill = "none")
}
return(p + .add.layer)
} else {
if (.grid) {
if (is.na(.legend)) {
.legend <- FALSE
}
if (is.na(.ncol)) {
.ncol <- round(length(unique(res$Sample))^.5)
}
if (!is.na(.by[1])) {
group_res <- process_metadata_arguments(res, .by, .meta, "Sample")
group_column <- group_res$name
res$Group <- group_res$group_column
res <- split(res, res$Group)
ps <- list()
for (i in seq_along(res)) {
res[[i]]$Value <- res[[i]]$Freq
ps[[i]] <- vis_bar(
.data = res[[i]], .by = .by, .meta = .meta,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = 3.5, .errorbar.width = 0.45,
.defgroupby = "Sample", .grouping.var = "Gene",
.labs = .labs, .title = names(res)[i], .subtitle = NULL,
.legend = FALSE, .leg.title = NA
) +
.add.layer
}
p <- do.call(wrap_plots, c(ps, ncol = .ncol))
p <- p + plot_annotation(title = .title)
return(p)
} else {
res <- split(res, res$Sample)
ps <- list()
for (i in seq_along(res)) {
ps[[i]] <- vis_hist(res[[i]],
.title = names(res)[i], .ncol = NA, .coord.flip = .coord.flip,
.grid = FALSE, .labs = c("Gene", NA),
.melt = FALSE, .legend = .legend, .add.layer = .add.layer, ...
)
}
p <- do.call(wrap_plots, c(ps, ncol = .ncol))
p <- p + plot_annotation(title = .title)
return(p)
}
} else {
res$Value <- res$Freq
p <- vis_bar(
.data = res, .by = .by, .meta = .meta,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = 3.5, .errorbar.width = 0.45,
.defgroupby = "Sample", .grouping.var = "Gene",
.labs = .labs,
.title = .title, .subtitle = NULL,
.legend = TRUE, .leg.title = NA
)
return(p + .add.layer)
}
}
}
#' Flexible box-plots for visualisation of distributions
#'
#' @concept vis
#'
#' @name vis_box
#'
#' @description Visualisation of distributions using ggplot2-based boxplots.
#'
#' @param .data Input matrix or data frame.
#'
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .title The text for the title of the plot.
#' @param .labs Character vector of length two with names for x-axis and y-axis, respectively.
#' @param .melt If TRUE then apply \link{melt} to the ".data" before plotting.
#' In this case ".data" is supposed to be a data frame with the first character column reserved
#' for names of genes and other numeric columns reserved to counts or frequencies of genes.
#' Each numeric column should be associated with a specific repertoire sample.
#'
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param .defgroupby A name for the column with sample names.
#' @param .grouping.var A name for the column to group by.
#' @param .subtitle The The text for the plot's subtitle.
#' @param .leg.title The The text for the plots's legend. Provide NULL to remove the legend's title completely.
#' @param .legend If TRUE then displays a legend, otherwise removes legend from the plot.
#' @param .legend.pos Positions of the legend: either "top", "bottom", "left" or "right".
#'
#' @return
#' A ggplot2 object.
#'
#' @seealso \link{vis.immunr_gene_usage}, \link{geneUsage}
#'
#' @examples
#' vis_box(data.frame(Sample = sample(c("A", "B", "C"), 100, TRUE), Value = rnorm(100)), .melt = FALSE)
#' @export
vis_box <- function(.data, .by = NA, .meta = NA, .melt = TRUE,
.points = TRUE, .test = TRUE, .signif.label.size = 3.5, .defgroupby = "Sample", .grouping.var = "Group",
.labs = c("X", "Y"), .title = "Boxplot (.title argument)",
.subtitle = "Subtitle (.subtitle argument)",
.legend = NA, .leg.title = "Legend (.leg.title argument)", .legend.pos = "right") {
if (.melt) {
res <- reshape2::melt(.data)
res <- res[1:nrow(res), ]
if (ncol(.data) == 2) {
res[[2]] <- "Data"
}
colnames(res) <- c(.grouping.var, "Sample", "Value")
.data <- res
}
group_res <- process_metadata_arguments(.data, .by, .meta, .defgroupby)
group_column <- group_res$name
.data$Group <- group_res$group_column
if (!(.grouping.var %in% names(.data))) {
stop("Error: no column '", .grouping.var, "' in the input data frame.")
}
.data$Grouping.var <- .data[[.grouping.var]]
.data_proc <- .data %>%
dplyr::group_by(Grouping.var, Group)
if (is.na(.labs[1])) {
if (group_res$is_grouped) {
.labs[1] <- "Group"
} else {
.labs[1] <- "Sample"
}
}
if (is.na(.leg.title)) {
if (group_res$is_grouped) {
.leg.title <- "Group"
} else {
.leg.title <- "Sample"
}
}
if (group_res$is_grouped) {
# p = ggplot(aes(x = Grouping.var, y = Value, fill = Group, color=Group, group = Group), data = .data) +
# geom_boxplot(position = "dodge", col = "black")
p <- ggplot(aes(x = Grouping.var, y = Value, fill = Group), data = .data) +
geom_boxplot(position = "dodge", col = "black")
if (is.na(.legend)) {
if (.grouping.var == "Group") {
# p = p + guides(fill=FALSE)
.legend.pos <- "none"
}
} else if (.legend == FALSE) {
# p = p + guides(fill=FALSE)
.legend.pos <- "none"
}
if (.points) {
p <- p +
geom_point(color = "black", position = position_jitterdodge(0.05), size = 1)
}
if (.test) {
if (.grouping.var == "Group") {
comparisons <- list()
for (i in 1:(nrow(.data_proc) - 1)) {
for (j in (i + 1):nrow(.data_proc)) {
# if ((.data$Grouping.var[i] == .data$Grouping.var[i]) && (.data$Group[i] != .data$Group[j])) {
comparisons <- c(comparisons, list(c(i, j)))
# }
}
}
p_df <- compare_means(Value ~ Group, .data, comparisons = comparisons, p.adjust.method = "holm")
y_max <- max(.data$Value)
p.value.y.coord <- rep(y_max, nrow(p_df))
step.increase <- (1:nrow(p_df)) * (y_max / 10)
p.value.y.coord <- p.value.y.coord + step.increase
p_df <- p_df %>%
mutate(
y.coord = p.value.y.coord,
p.adj = format.pval(p.adj, digits = 1)
)
p <- p + geom_signif(
data = p_df,
aes(xmin = group1, xmax = group2, annotations = p.adj, y_position = y.coord),
manual = TRUE, tip_length = 0.03, size = .5, inherit.aes = FALSE
)
} else {
# Seems fine...
# p_df = compare_means(Value ~ Group, group.by = "Grouping.var", method = "kruskal.test", .data, p.adjust.method = "holm")
# print(p_df)
p <- p +
stat_compare_means(aes(label = ..p.adj..),
bracket.size = .5, size = .signif.label.size,
label.y = max(.data$Value, na.rm = TRUE) * 1.07
)
}
}
p <- p + .tweak_fill(length(unique(.data$Group)))
} else {
p <- ggplot() +
geom_boxplot(aes(x = Grouping.var, y = Value, fill = Sample), position = "dodge", data = .data, col = "black")
if (is.na(.legend)) {
# p = p + guides(fill=FALSE)
.legend.pos <- "none"
}
p <- p + .colourblind_discrete(length(unique(.data$Sample)))
}
p + theme_pubr(legend = .legend.pos) +
labs(x = .labs[1], y = .labs[2], title = .title, subtitle = .subtitle, fill = group_column) +
rotate_x_text() + theme_cleveland2()
}
##### Clustering #####
#' Visualisation of hierarchical clustering
#'
#' @concept post_analysis
#'
#' @description
#' Visualisation of the results of hierarchical clustering.
#' For other clustering visualisations see \link{vis.immunr_kmeans}.
#'
#' @aliases vis.immunr_hclust
#'
#' @param .data Clustering results from \link{repOverlapAnalysis} or \link{geneUsageAnalysis}.
#' @param .rect Passed to \link{fviz_dend} - whether to add a rectangle around groups.
#' @param .plot A character vector of length one or two specifying which plots to visualise.
#' If "clust" then plot only the clustering. If "best" then plot the number of optimal clusters.
#' If both then plot both.
#' @param ... Not used here.
#'
#' @return
#' Ggplot2 objects inside the patchwork container.
#'
#' @seealso \link{vis}, \link{repOverlapAnalysis}, \link{geneUsageAnalysis}
#'
#' @examples
#' data(immdata)
#' ov <- repOverlap(immdata$data)
#' repOverlapAnalysis(ov, "mds+hclust") %>% vis()
#' @export
vis.immunr_hclust <- function(.data, .rect = FALSE, .plot = c("clust", "best"), ...) {
p1 <- NULL
if ("clust" %in% .plot) {
p1 <- fviz_dend(.data[[1]], main = "Hierarchical clustering", rect = .rect)
}
p2 <- NULL
if ("best" %in% .plot) {
p2 <- .data[[2]]
}
if (is.null(p1) | is.null(p2)) {
if (is.null(p1)) {
p2
} else {
p1
}
} else {
p <- p1 + p2
p
}
}
#' Visualisation of K-means and DBSCAN clustering
#'
#' @concept post_analysis
#'
#' @description
#' Visualisation of the results of K-means and DBSCAN clustering.
#' For hierarhical clustering visualisations see \link{vis.immunr_hclust}.
#'
#' @aliases vis.immunr_kmeans vis.immunr_dbscan
#'
#' @param .data Clustering results from \link{repOverlapAnalysis} or \link{geneUsageAnalysis}.
#' @param .point If TRUE then plot sample points. Passed to \link{fviz_cluster}.
#' @param .text If TRUE then plot text labels. Passed to \link{fviz_cluster}.
#' @param .ellipse If TRUE then plot ellipses around all samples. Passed to "ellipse" from \link{fviz_cluster}.
#' @param .point.size Size of points, passed to "pointsize" from \link{fviz_cluster}.
#' @param .text.size Size of text labels, passed to labelsize from \link{fviz_cluster}.
#' @param .plot A character vector of length one or two specifying which plots to visualise.
#' If "clust" then plot only the clustering. If "best" then plot the number of optimal clusters.
#' If both then plot both.
#' @param ... Not used here.
#'
#' @return
#' Ggplot2 objects inside the pathwork container.
#'
#' @seealso \link{vis}, \link{repOverlapAnalysis}, \link{geneUsageAnalysis}
#'
#' @examples
#' data(immdata)
#' ov <- repOverlap(immdata$data)
#' repOverlapAnalysis(ov, "mds+kmeans") %>% vis()
#' @export
vis.immunr_kmeans <- function(.data, .point = TRUE, .text = TRUE, .ellipse = TRUE,
.point.size = 2, .text.size = 10, .plot = c("clust", "best"),
...) {
p1 <- NULL
if ("clust" %in% .plot) {
p1 <- fviz_cluster(.data[[1]],
data = .data[[3]], main = "K-means clustering", geom = c("point", "text")[c(.point, .text)],
show.legend.text = FALSE, show.clust.cent = FALSE, repel = TRUE, ellipse = .ellipse, shape = 16,
pointsize = .point.size, labelsize = .text.size, label.rectangle = TRUE
) +
guides(fill = "none", shape = "none") +
theme_linedraw()
}
p2 <- NULL
if ("best" %in% .plot) {
p2 <- .data[[2]]
}
if (is.null(p1) | is.null(p2)) {
if (is.null(p1)) {
p2
} else {
p1
}
# p = ifelse(is.null(p1), p2, p1) # doesnt work for some reason
} else {
p1 + p2
}
}
#' @export
vis.immunr_dbscan <- function(.data, .point = TRUE, .text = TRUE, .ellipse = TRUE,
.point.size = 2, .text.size = 10, .plot = c("clust", "best"), ...) {
fviz_cluster(.data[[1]],
data = .data[[2]], main = "DBSCAN clustering", geom = c("point", "text")[c(.point, .text)],
show.legend.text = FALSE, show.clust.cent = FALSE, repel = TRUE, ellipse = .ellipse, shape = 16,
pointsize = .point.size, labelsize = .text.size, label.rectangle = TRUE
) +
guides(fill = "none", shape = "none") +
theme_linedraw()
}
##### Dimension reduction #####
#' PCA / MDS / tSNE visualisation (mainly overlap / gene usage)
#'
#' @concept post_analysis
#'
#' @importFrom ggpubr ggscatter
#'
#' @aliases vis.immunr_mds vis.immunr_pca vis.immunr_tsne
#'
#' @param .data Output from analysis functions such as \link{geneUsageAnalysis} or
#' \link{immunr_pca}, \link{immunr_mds} or \link{immunr_tsne}.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .point Logical. If TRUE then plot points corresponding to objects.
#' @param .text Logical. If TRUE then plot sample names.
#' @param .ellipse Logical. If TRUE then plot ellipses around clusters of grouped samples.
#' @param .point.size Numeric. A size of points to plot.
#' @param .text.size Numeric. A size of sample names' labels.
#' @param ... Not used here.
#'
#' @return
#' A ggplot2 object.
#'
#' @details
#' Other visualisation methods:
#'
#' - PCA - \link{vis.immunr_pca}
#'
#' - MDS - \link{vis.immunr_mds}
#'
#' - tSNE - \link{vis.immunr_tsne}
#'
#' @examples
#' data(immdata)
#' ov <- repOverlap(immdata$data)
#' repOverlapAnalysis(ov, "mds") %>% vis()
#' @export
vis.immunr_mds <- function(.data, .by = NA, .meta = NA,
.point = TRUE, .text = TRUE, .ellipse = TRUE,
.point.size = 2, .text.size = 4, ...) {
if (!.point & !.text) {
stop("Error: Please provide at least one of the arguments: .point and .text")
}
group_res <- process_metadata_arguments(data.frame(Sample = row.names(.data$x), stringsAsFactors = FALSE), .by, .meta)
group_column <- group_res$name
if (!group_res$is_grouped) {
.ellipse <- FALSE
}
fviz_pca_ind(.data,
habillage = group_res$group_column, geom = c("point", "text")[c(.point, .text)],
repel = TRUE, addEllipses = .ellipse, mean.point = FALSE, pointshape = 16,
pointsize = .point.size, labelsize = .text.size, label.rectangle = TRUE, show.legend.text = FALSE
) +
theme_linedraw() +
guides(fill = "none", shape = "none") +
labs(x = "Dim1", y = "Dim2", title = "Multidimensional scaling", col = group_column)
}
#' @export
vis.immunr_pca <- function(.data, .by = NA, .meta = NA,
.point = TRUE, .text = TRUE, .ellipse = TRUE,
.point.size = 2, .text.size = 4, ...) {
if (!.point & !.text) {
stop("Error: Please provide at least one of the arguments: .point and .text")
}
group_res <- process_metadata_arguments(data.frame(Sample = row.names(.data$x), stringsAsFactors = FALSE), .by, .meta)
group_column <- group_res$name
if (!group_res$is_grouped) {
.ellipse <- FALSE
}
fviz_pca_ind(.data,
habillage = group_res$group_column, geom = c("point", "text")[c(.point, .text)],
repel = TRUE, addEllipses = .ellipse, mean.point = FALSE, pointshape = 16,
pointsize = .point.size, labelsize = .text.size, label.rectangle = TRUE, show.legend.text = FALSE
) +
theme_linedraw() +
guides(fill = "none", shape = "none") +
labs(title = "Principal component analysis", col = group_column)
}
#' @export
vis.immunr_tsne <- function(.data, .by = NA, .meta = NA,
.point = TRUE, .text = TRUE, .ellipse = TRUE,
.point.size = 2, .text.size = 4, ...) {
.data <- data.frame(.data)
colnames(.data) <- c("Dim1", "Dim2")
.data$Sample <- row.names(.data)
group_res <- process_metadata_arguments(.data, .by, .meta)
group_column <- group_res$name
.data$Group <- group_res$group_column
if (!group_res$is_grouped) {
.ellipse <- FALSE
}
ggscatter(
data = .data, x = "Dim1", y = "Dim2", color = "Group", ellipse = .ellipse, size = .point.size,
point = .point, label = ifelse(.text, "Sample", NULL), repel = TRUE, label.rectangle = TRUE, show.legend.text = FALSE
) +
theme_linedraw() +
guides(fill = "none", shape = "none") +
labs(title = "t-Distributed Stochastic Neighbor Embedding", col = group_column)
}
#
# ToDo: ideally, geneUsageAnalysis returns raw objects, and vis.immunr_mds transforms it into the fviz-compatible objects
#
##### Clonality analysis #####
vis_bar_stacked <- function(.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .stack = NA,
.grouping.var = NA,
.labs = c(NA, "Y"),
.title = "Barplot (.title argument)", .subtitle = "Subtitle (.subtitle argument)",
.legend = NA, .leg.title = NA) {
group_res <- process_metadata_arguments(.data, .by, .meta)
.data$Group <- group_res$group_column
if (!(.grouping.var %in% names(.data))) {
stop("Error: no column '", .grouping.var, "' in the input data frame.")
}
.data$Grouping.var <- .data[[.grouping.var]]
position_bar <- "dodge"
if (is.na(.stack)) {
if (group_res$is_grouped) {
if (.errorbars.off) {
position_bar <- "stack"
} else {
position_bar <- "dodge"
}
} else {
position_bar <- "stack"
}
} else if (.stack) {
if (group_res$is_grouped) {
if (.errorbars.off) {
position_bar <- "stack"
} else {
warning("Warning: Provide .errorbars.off = TRUE in order to display stacked barplots.")
position_bar <- "dodge"
}
} else {
position_bar <- "stack"
}
}
if (is.na(.labs[1])) {
if (group_res$is_grouped) {
.labs[1] <- "Group"
} else {
.labs[1] <- "Sample"
}
}
if (group_res$is_grouped) {
perc <- .data %>%
dplyr::group_by(Grouping.var, Group) %>%
dplyr::summarise(
Value.mean = mean(Value, na.rm = TRUE),
Value.min = quantile(Value, .errorbars[1], na.rm = TRUE),
Value.max = quantile(Value, .errorbars[2], na.rm = TRUE)
)
p <- ggplot() +
geom_bar(aes(x = Group, y = Value.mean, fill = Grouping.var), data = perc, colour = "black", stat = "identity", position = position_bar)
if (!.errorbars.off) {
p <- p + geom_errorbar(aes(x = Group, fill = Grouping.var, ymin = Value.min, ymax = Value.max),
data = perc, colour = "black", width = .2, position = position_dodge(.9)
)
}
} else {
p <- ggplot() +
geom_bar(aes(x = Sample, y = Value, fill = Grouping.var), data = .data, colour = "black", stat = "identity", position = position_bar)
}
p + theme_pubr(legend = "right") +
labs(x = .labs[1], y = .labs[2], title = .title, subtitle = .subtitle, fill = .leg.title) +
.colourblind_discrete(length(unique(.data$Grouping.var)))
}
#' Visualise results of the clonality analysis
#'
#' @concept clonality
#'
#' @name vis.immunr_clonal_prop
#'
#' @aliases vis.immunr_clonal_prop vis.immunr_homeo vis.immunr_top_prop vis.immunr_tail_prop
#'
#' @description An utility function to visualise the output from \code{\link{repClonality}}.
#'
#' @importFrom reshape2 melt
#' @importFrom scales percent
#'
#' @param .data Output from \code{\link{repClonality}}.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#'
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#'
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param ... Not used here.
#'
#' @details
#' If data is grouped, then statistical tests for comparing means of groups will be performed, unless \code{.test = FALSE} is supplied.
#' In case there are only two groups, the Wilcoxon rank sum test (https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) is performed
#' (R function \code{\link{wilcox.test}} with an argument \code{exact = FALSE}) for testing if there is a difference in mean rank values between two groups.
#' In case there more than two groups, the Kruskal-Wallis test (https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance) is performed (R function \code{\link{kruskal.test}}), that is equivalent to ANOVA for ranks and it tests whether samples from different groups originated from the same distribution.
#' A significant Kruskal-Wallis test indicates that at least one sample stochastically dominates one other sample.
#' Adjusted for multiple comparisons P-values are plotted on the top of groups.
#' P-value adjusting is done using the Holm method (https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method) (also known as Holm-Bonferroni correction).
#' You can execute the command \code{?p.adjust} in the R console to see more.
#'
#' @return
#' A ggplot2 object.
#'
#' @seealso \link{repClonality} \link{vis}
#'
#' @examples
#' data(immdata)
#' clp <- repClonality(immdata$data, "clonal.prop")
#' vis(clp)
#'
#' hom <- repClonality(immdata$data, "homeo")
#' # Remove p values and points from the plot
#' vis(hom, .by = "Status", .meta = immdata$meta, .test = FALSE, .points = FALSE)
#' @export
vis.immunr_clonal_prop <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .points = TRUE, .test = TRUE, .signif.label.size = 3.5, ...) {
# ToDo: this and other repClonality and repDiversity functions doesn't work on a single repertoire. Fix it
perc_value <- round(.data[1, 2][1])
.data <- data.frame(Sample = row.names(.data), Value = .data[, 1])
p <- vis_bar(
.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Clonal proportion"),
.title = "Clonal proportion", .subtitle = paste0("Number of clonotypes occupying the ", perc_value, "% of repertoires"),
.legend = NA, .leg.title = NA
)
p
}
#' @export
vis.immunr_homeo <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .stack = NA, .test = TRUE, .points = TRUE, ...) {
melted <- reshape2::melt(.data)
colnames(melted) <- c("Sample", "Clone.group", "Value")
if (is.na(.by[1]) && is.na(.stack)) {
.stack <- TRUE
}
p <- vis_bar(melted,
.by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = .stack,
.grouping.var = "Clone.group", .points = .points,
.labs = c(NA, "Relative abundance, perc"),
.title = "Relative abundance", .subtitle = "Summary proportion of clonotypes with specific frequencies",
.legend = NA, .leg.title = "Clonotype group", .test = .test
) +
scale_y_continuous(labels = scales::percent)
p
}
#' @export
vis.immunr_top_prop <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .stack = NA, .points = TRUE, .test = TRUE, ...) {
tmp <- .data
if (is.null(dim(tmp))) {
tmp <- t(as.matrix(tmp))
.data <- t(as.matrix(.data))
}
for (i in 2:ncol(.data)) {
tmp[, i] <- .data[, i] - .data[, i - 1]
}
res <- tmp
.head <- as.numeric(colnames(.data))
colnames(res) <- paste0("[", c(1, .head[-length(.head)] + 1), ":", .head, ")")
res <- as.data.frame(res)
res$Sample <- row.names(res)
res <- reshape2::melt(res)
colnames(res) <- c("Sample", "Clone.index", "Value")
if (is.na(.by[1]) && is.na(.stack)) {
.stack <- TRUE
}
p <- vis_bar(res,
.by = .by, .meta = .meta, .points = .points, .test = .test,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = .stack,
.grouping.var = "Clone.index",
.labs = c(NA, "Occupied repertoire space"),
.title = "Top clonal proportion", .subtitle = "Summary proportion of clonotypes with specific indices",
.legend = NA, .leg.title = "Clonotype indices"
) +
scale_y_continuous(labels = scales::percent)
p
}
#' @export
vis.immunr_rare_prop <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .stack = NA, .points = TRUE, .test = TRUE, ...) {
tmp <- .data
if (is.null(dim(tmp))) {
tmp <- t(as.matrix(tmp))
.data <- t(as.matrix(.data))
}
colnames_new <- c()
prev_value <- "1"
start_index <- 1
if (colnames(.data)[1] == "1") {
colnames_new <- "1"
prev_value <- "2"
start_index <- 2
}
for (i in start_index:ncol(.data)) {
colnames_new <- c(colnames_new, paste0(prev_value, " - ", colnames(.data)[i]))
if (colnames(.data)[i] != "MAX") {
prev_value <- as.character(as.integer(colnames(.data)[i]) + 1)
}
}
colnames(tmp) <- colnames_new
for (i in 2:ncol(.data)) {
tmp[, i] <- .data[, i] - .data[, i - 1]
}
res <- tmp
res <- as.data.frame(res)
res$Sample <- row.names(res)
res <- reshape2::melt(res)
colnames(res) <- c("Sample", "Counts", "Value")
if (is.na(.by[1]) && is.na(.stack)) {
.stack <- TRUE
}
p <- vis_bar(res,
.by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = .stack,
.grouping.var = "Counts", .points = .points, .test = .test,
.labs = c(NA, "Occupied repertoire space"),
.title = "Rare clonal proportion", .subtitle = "Summary proportion of clonotypes with specific counts",
.legend = NA, .leg.title = "Clonotype counts"
) +
scale_y_continuous(labels = scales::percent)
p
}
##### Diversity estimation & dodged bar plots #####
#' Bar plots
#'
#' @concept vis
#'
#' @importFrom ggpubr compare_means geom_signif stat_compare_means theme_pubr rotate_x_text
#'
#' @name vis_bar
#'
#' @param .data Data to visualise.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#' @param .errorbar.width Numeric. Width for error bars.
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param .stack If TRUE and .errorbars.off is TRUE then plot stacked bar plots for each Group or Sample
#' @param .defgroupby A name for the column with sample names.
#' @param .grouping.var A name for the column to group by.
#' @param .subtitle The text for the plot's subtitle.
#' @param .leg.title The text for the plots's legend. Provide NULL to remove the legend's title completely.
#' @param .legend If TRUE then displays a legend, otherwise removes legend from the plot.
#' @param .labs A character vector of length two specifying names for x-axis and y-axis.
#' @param .title The text for the plot's title.
#' @param .legend.pos Positions of the legend: either "top", "bottom", "left" or "right".
#' @param .rotate_x How much the x tick text should be rotated? In angles.
#'
#' @return
#' A ggplot2 object.
#'
#' @examples
#' vis_bar(data.frame(Sample = c("A", "B", "C"), Value = c(1, 2, 3)))
#' @export
vis_bar <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .stack = FALSE,
.points = TRUE, .test = TRUE, .signif.label.size = 3.5, .errorbar.width = 0.2, .defgroupby = "Sample", .grouping.var = "Group",
.labs = c("X", "Y"), .title = "Barplot (.title argument)",
.subtitle = "Subtitle (.subtitle argument)",
.legend = NA, .leg.title = "Legend (.leg.title argument)", .legend.pos = "right",
.rotate_x = 90) {
group_res <- process_metadata_arguments(.data, .by, .meta, .defgroupby)
group_column <- group_res$name
.data$Group <- group_res$group_column
if (!(.grouping.var %in% names(.data))) {
stop("Error: no column '", .grouping.var, "' in the input data frame.")
}
.data$Grouping.var <- .data[[.grouping.var]]
.data_proc <- .data %>%
dplyr::group_by(Grouping.var, Group) %>%
dplyr::summarise(
Value.mean = mean(Value, na.rm = TRUE),
Value.min = quantile(Value, .errorbars[1], na.rm = TRUE),
Value.max = quantile(Value, .errorbars[2], na.rm = TRUE)
)
if (is.na(.labs[1])) {
if (group_res$is_grouped) {
.labs[1] <- "Group"
} else {
.labs[1] <- "Sample"
}
}
if (is.na(.leg.title)) {
.leg.title <- group_column
# if (group_res$is_grouped) {
# .leg.title = "Group"
# } else {
# .leg.title = "Sample"
# }
} else if (group_res$is_grouped) {
.leg.title <- group_column
}
position_name <- "dodge"
if (group_res$is_grouped) {
if (is.na(.stack)) {
.stack <- FALSE
}
} else {
if (is.na(.stack)) {
.stack <- FALSE
} else if (.stack) {
.errorbars.off <- TRUE
.points <- FALSE
.test <- FALSE
}
}
if (group_res$is_grouped) {
if (.stack) {
p <- ggplot() +
geom_col(aes(x = Group, y = Value.mean, fill = Grouping.var),
position = "stack", data = .data_proc, col = "black"
)
} else {
p <- ggplot(aes(x = Grouping.var, y = Value, color = Group, fill = Group, group = Group), data = .data) +
geom_col(aes(x = Grouping.var, y = Value.mean),
position = position_name, data = .data_proc, col = "black"
)
}
if (is.na(.legend)) {
if (.grouping.var == "Group") {
# p = p + guides(fill=FALSE)
.legend.pos <- "none"
}
} else if (.legend == FALSE) {
# p = p + guides(fill=FALSE)
.legend.pos <- "none"
}
if (!.errorbars.off) {
p <- p +
geom_errorbar(aes(x = Grouping.var, y = Value.mean, ymin = Value.min, ymax = Value.max, color = Group),
color = "black", data = .data_proc, width = .errorbar.width, position = position_dodge(.9)
)
}
if (.points) {
p <- p +
geom_point(color = "black", position = position_jitterdodge(0.05), size = 1)
}
if (.test) {
if (.grouping.var == "Group") {
comparisons <- list()
for (i in 1:(nrow(.data_proc) - 1)) {
for (j in (i + 1):nrow(.data_proc)) {
# if ((.data$Grouping.var[i] == .data$Grouping.var[i]) && (.data$Group[i] != .data$Group[j])) {
comparisons <- c(comparisons, list(c(i, j)))
# }
}
}
p_df <- compare_means(Value ~ Group, .data, comparisons = comparisons, p.adjust.method = "holm")
y_max <- max(.data$Value)
p.value.y.coord <- rep(y_max, nrow(p_df))
step.increase <- (1:nrow(p_df)) * (y_max / 10)
p.value.y.coord <- p.value.y.coord + step.increase
p_df <- p_df %>%
mutate(
y.coord = p.value.y.coord,
p.adj = format.pval(p.adj, digits = 1)
)
p <- p + geom_signif(
data = p_df,
aes(xmin = group1, xmax = group2, annotations = p.adj, y_position = y.coord),
manual = TRUE, tip_length = 0.03, size = .5, inherit.aes = FALSE
)
} else {
# Seems fine...
# p_df = compare_means(Value ~ Group, group.by = "Grouping.var", method = "kruskal.test", .data, p.adjust.method = "holm")
# print(p_df)
p <- p +
stat_compare_means(aes(label = ..p.adj..),
bracket.size = .5, size = .signif.label.size,
label.y = max(.data$Value, na.rm = TRUE) * 1.07
)
}
}
p <- p + .tweak_fill(length(unique(.data$Group)))
} else {
if (.stack) {
p <- ggplot() +
geom_bar(aes(x = Sample, y = Value, fill = Grouping.var), data = .data, colour = "black", stat = "identity", position = "stack")
if (!is.na(.legend)) {
if (!.legend) {
# p = p + guides(fill=FALSE)
.legend.pos <- "none"
}
}
p <- p + .colourblind_discrete(length(unique(.data$Grouping.var)))
} else {
p <- ggplot() +
geom_bar(aes(x = Grouping.var, y = Value, fill = Sample), position = position_name, stat = "identity", data = .data, col = "black")
if (is.na(.legend)) {
# p = p + guides(fill=FALSE)
.legend.pos <- "none"
}
p <- p + .colourblind_discrete(length(unique(.data$Sample)))
}
}
p <- p + theme_pubr(legend = .legend.pos) +
labs(x = .labs[1], y = .labs[2], title = .title, subtitle = .subtitle, fill = .leg.title) +
rotate_x_text() + theme_cleveland2()
p
}
#' Visualise diversity.
#'
#' @concept diversity
#'
#' @aliases vis.immunr_chao1 vis.immunr_dxx vis.immunr_rarefaction vis.immunr_div vis.immunr_ginisimp vis.immunr_invsimp vis.immunr_hill
#' @description An utility function to visualise the output from \code{\link{repDiversity}}.
#'
#' @importFrom reshape2 melt
#'
#' @param .data Output from \code{\link{repDiversity}}.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param ... Not used here.
#'
#' @details
#' If data is grouped, then statistical tests for comparing means of groups will be performed, unless \code{.test = FALSE} is supplied.
#' In case there are only two groups, the Wilcoxon rank sum test (https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) is performed
#' (R function \code{\link{wilcox.test}} with an argument \code{exact = FALSE}) for testing if there is a difference in mean rank values between two groups.
#' In case there more than two groups, the Kruskal-Wallis test (https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance) is performed (R function \code{\link{kruskal.test}}), that is equivalent to ANOVA for ranks and it tests whether samples from different groups originated from the same distribution.
#' A significant Kruskal-Wallis test indicates that at least one sample stochastically dominates one other sample.
#' Adjusted for multiple comparisons P-values are plotted on the top of groups.
#' P-value adjusting is done using the Holm method (https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method) (also known as Holm-Bonferroni correction).
#' You can execute the command \code{?p.adjust} in the R console to see more.
#'
#' @return
#' A ggplot2 object.
#'
#' @seealso \link{repDiversity} \link{vis}
#'
#' @examples
#' data(immdata)
#' dv <- repDiversity(immdata$data, "chao1")
#' vis(dv)
#' @export
vis.immunr_chao1 <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = FALSE, .points = TRUE, .test = TRUE, .signif.label.size = 3.5, ...) {
.data <- data.frame(Sample = row.names(.data), Value = .data[, 1])
vis_bar(
.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Chao1"),
.title = "Chao1", .subtitle = "Sample diversity estimation using Chao1",
.legend = NA, .leg.title = NA
)
}
#' @export
vis.immunr_hill <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
.points = TRUE, .add.points = TRUE, .point.size = 1.5, .add.point.size = 1, .line.size = 0.75, .leg.title = NA, ...) {
group_res <- process_metadata_arguments(.data, .by, .meta)
.data$Group <- group_res$group_column
if (is.na(.leg.title)) {
if (group_res$is_grouped) {
.leg.title <- "Group"
} else {
.leg.title <- "Sample"
}
}
grouped_data <- .data %>%
dplyr::group_by(Group, Q) %>%
dplyr::summarise(
Value.mean = mean(Value, na.rm = TRUE),
Value.min = quantile(Value, .errorbars[1], na.rm = TRUE),
Value.max = quantile(Value, .errorbars[2], na.rm = TRUE)
)
position_value <- "identity"
if (group_res$is_grouped) {
position_value <- position_jitter(width = 0.1)
}
p <- ggplot() +
geom_line(aes(x = Q, y = Value.mean, col = Group), size = .line.size, data = grouped_data, stat = "identity")
if (.points) {
p <- p + geom_point(aes(x = Q, y = Value, col = Group),
data = .data,
position = position_value, alpha = .5, size = .point.size
)
}
if (group_res$is_grouped) {
if (.add.points) {
p <- p +
geom_point(aes(x = Q, y = Value.mean, col = Group), data = grouped_data, size = .add.point.size)
}
if (!.errorbars.off) {
p <- p +
geom_errorbar(aes(x = Q, ymin = Value.min, ymax = Value.max, group = Group, col = Group),
data = grouped_data, width = .2, size = .line.size, position = position_dodge(0.1)
)
}
}
p <- p +
theme_pubr(legend = "right") + theme_cleveland2() +
labs(x = "Q values", y = "Diversity estimation", title = "Hill numbers", subtitle = "Sample diversity estimation using Hill numbers", color = .leg.title)
p
}
#' @export
vis.immunr_div <- function(.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
.legend = NA, ...) {
vis_bar(
.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Effective number of clonotypes"),
.title = "True diversity", .subtitle = "Sample diversity estimation using the true diversity index",
.legend = .legend, .leg.title = NA
)
}
#' @export
vis.immunr_ginisimp <- function(.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
.points = TRUE, .test = TRUE, .signif.label.size = 3.5, ...) {
vis_bar(
.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Gini-Simpson index"),
.title = "Gini-Simpson index", .subtitle = "Sample diversity estimation using the Gini-Simpson index",
.legend = NA, .leg.title = NA
)
}
#' @export
vis.immunr_invsimp <- function(.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
.legend = NA, ...) {
vis_bar(
.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Inverse Simpson index"),
.title = "Inverse Simpson index", .subtitle = "Sample diversity estimation using the inverse Simpson index",
.legend = .legend, .leg.title = NA
)
}
#' @export
vis.immunr_dxx <- function(.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
.points = TRUE, .test = TRUE, .signif.label.size = 3.5,
.legend = NA, ...) {
perc_value <- round(.data[1, 2][1])
.data <- data.frame(Sample = row.names(.data), Value = .data[, 1])
vis_bar(
.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, paste0("D", perc_value)),
.title = paste0("D", perc_value, " diversity index"), .subtitle = paste0("Number of clonotypes occupying the ", perc_value, "% of repertoires"),
.legend = .legend, .leg.title = NA
)
}
#' @export
vis.immunr_rarefaction <- function(.data, .by = NA, .meta = NA,
.mean = TRUE, .errors = TRUE, .log = FALSE,
.labels = TRUE, ...) {
.muc.res <- .data
group_res <- process_metadata_arguments(.data, .by, .meta)
data_groups <- group_res$groups
group_column <- group_res$name
names(data_groups) <- group_res$group_names
is_grouped <- group_res$is_grouped
.muc.res$Group <- group_res$group_column
if ("Type" %in% colnames(.muc.res)) {
.muc.res$Type <- factor(.muc.res$Type, levels = c("interpolation", "extrapolation"), ordered = TRUE)
}
p <- ggplot() +
xlab("Sample size (clones)") +
ylab("Estimated diversity (unique clonotypes)") +
ggtitle("Rarefaction analysis") +
theme_pubr(legend = "right") +
theme_cleveland2()
if (.mean && is_grouped) {
data_proc <- .muc.res %>%
group_by(Group, Size) %>%
summarise(
MinVal = quantile(Mean, .025, na.rm = TRUE),
MaxVal = quantile(Mean, .975, na.rm = TRUE),
MeanVal = mean(Mean, na.rm = TRUE)
)
p <- p + geom_line(aes(x = Size, y = MeanVal, color = Group), data = data_proc)
if (.errors) {
p <- p + geom_ribbon(aes(x = Size, group = Group, ymin = MinVal, ymax = MaxVal, fill = Group), data = data_proc, alpha = 0.15)
}
} else {
colnames(.muc.res)[2] <- "Q1"
colnames(.muc.res)[4] <- "Q2"
if (.errors) {
p <- p + geom_ribbon(aes(x = Size, group = Sample, ymin = Q1, ymax = Q2), col = "grey80", data = .muc.res, alpha = 0.2)
}
if ("Type" %in% colnames(.muc.res)) {
p <- p +
geom_line(aes(x = Size, y = Mean, colour = Group, linetype = Type), size = 1, data = .muc.res)
} else {
p <- p +
geom_line(aes(x = Size, y = Mean, colour = Group), size = 1, data = .muc.res)
}
if (.labels) {
tmp <- .muc.res[tapply(1:nrow(.muc.res), .muc.res$Sample, tail, 1), ]
tmp <- tmp[order(tmp$Group), ]
p <- p + ggrepel::geom_label_repel(aes(x = Size, y = Mean, label = Sample, fill = Group), data = tmp)
}
}
if (.log) {
p <- p + scale_x_log10()
}
p + .tweak_col(length(unique(.muc.res$Group))) + .tweak_fill(length(unique(.muc.res$Group)))
}
##### Exploratory analysis #####
#' Visualise results of the exploratory analysis
#'
#' @concept explore
#'
#' @aliases vis.immunr_exp_vol vis.immunr_exp_count vis.immunr_exp_len vis.immunr_exp_clones
#' @description An utility function to visualise the output from \code{\link{repExplore}}.
#'
#' @importFrom reshape2 melt
#'
#' @param .data Output from \code{\link{repExplore}}.
#' @param .by Pass NA if you want to plot samples without grouping.
#'
#' You can pass a character vector with one or several column names from ".meta"
#' to group your data before plotting. In this case you should provide ".meta".
#'
#' You can pass a character vector that exactly matches the number of samples in
#' your data, each value should correspond to a sample's property. It will be used
#' to group data based on the values provided. Note that in this case you should
#' pass NA to ".meta".
#'
#' @param .meta A metadata object. An R dataframe with sample names and their properties,
#' such as age, serostatus or hla.
#'
#' @param .errorbars A numeric vector of length two with quantiles for error bars
#' on sectors. Disabled if ".errorbars.off" is TRUE.
#'
#' @param .errorbars.off If TRUE then plot CI bars for distances between each group.
#' Disabled if no group passed to the ".by" argument.
#' @param .points A logical value defining whether points will be visualised or not.
#' @param .test A logical vector whether statistical tests should be applied. See "Details" for more information.
#' @param .signif.label.size An integer value defining the size of text for p-value.
#' @param ... Not used here.
#'
#' @details
#' If data is grouped, then statistical tests for comparing means of groups will be performed, unless \code{.test = FALSE} is supplied.
#' In case there are only two groups, the Wilcoxon rank sum test (https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test) is performed
#' (R function \code{\link{wilcox.test}} with an argument \code{exact = FALSE}) for testing if there is a difference in mean rank values between two groups.
#' In case there more than two groups, the Kruskal-Wallis test (https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance) is performed (R function \code{\link{kruskal.test}}), that is equivalent to ANOVA for ranks and it tests whether samples from different groups originated from the same distribution.
#' A significant Kruskal-Wallis test indicates that at least one sample stochastically dominates one other sample.
#' Adjusted for multiple comparisons P-values are plotted on the top of groups.
#' P-value adjusting is done using the Holm method (https://en.wikipedia.org/wiki/Holm%E2%80%93Bonferroni_method) (also known as Holm-Bonferroni correction).
#' You can execute the command \code{?p.adjust} in the R console to see more.
#'
#' @return
#' A ggplot2 object.
#'
#' @seealso \link{repExplore} \link{vis}
#'
#' @examples
#' data(immdata)
#' repExplore(immdata$data, "volume") %>% vis()
#' repExplore(immdata$data, "count") %>% vis()
#' repExplore(immdata$data, "len") %>% vis()
#' repExplore(immdata$data, "clones") %>% vis()
#' @export
vis.immunr_exp_vol <- function(.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
.points = TRUE, .test = TRUE,
.signif.label.size = 3.5, ...) {
.data <- rename_column(.data, "Volume", "Value")
vis_bar(
.data = .data, .by = .by, .meta = .meta,
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
.defgroupby = "Sample", .grouping.var = "Group",
.labs = c(NA, "Clonotypes"),
.title = "Number of clonotypes", .subtitle = "Number of unique clonotypes in the input data",
.legend = NA, .leg.title = NA
)
}
#' @export
vis.immunr_exp_count <- function(.data, .by = NA, .meta = NA, .logx = TRUE, .logy = TRUE, .labs = c("Abundance", "Number of clonotypes"),
.title = "Distribution of clonotype abundances", .subtitle = NULL,
.legend = TRUE, .leg.title = NA, ...) {
group_res <- process_metadata_arguments(.data, .by, .meta)
.data$Group <- group_res$group_column
if (is.na(.leg.title)) {
if (group_res$is_grouped) {
.leg.title <- "Group"
} else {
.leg.title <- "Sample"
}
}
p <- ggplot() +
geom_line(aes(x = Clone.num, y = Clonotypes, col = Group, group = Sample), data = .data, stat = "identity")
if (.logx) {
p <- p + scale_x_log10()
}
if (.logy) {
p <- p + scale_y_log10()
}
p <- p + labs(x = .labs[1], y = .labs[2], title = .title, subtitle = .subtitle, color = .leg.title) +
theme_pubr(legend = "right") + rotate_x_text() + theme_cleveland2()
p
}
#' @export
vis.immunr_exp_len <- function(.data, .by = NA, .meta = NA,
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
.points = TRUE, .test = TRUE,
.signif.label.size = 3.5, ...) {