-
Notifications
You must be signed in to change notification settings - Fork 10
/
plot_effects.R
582 lines (529 loc) · 26.7 KB
/
plot_effects.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
#' Plot cancer effects
#'
#' Visualize and compare cancer effects for variants of interest.
#'
#' @param effects Cancer effects table, as produced by \code{ces_variant()}. You can combine multiple tables via rbind()
#' to plot multiple effects per variant, such as to compare effects across subgroups.
#' @param topn Include up to this many variants. The highest-effect variants are plotted. (Or, if
#' \code{group_by} is gene, include up to this many groups. Groups are ranked by their
#' highest-effect variants.)
#' @param group_by If 'variant' (the default), one variant per row in the plot. If "gene" or some
#' other column name, variants will be plotted together accordingly.
#' @param y_label Y-axis labels for each group of variants. By default ("auto"), will be variant names
#' when \code{group_by = "variant"}, and the values in the group_by column otherwise.
#' @param color_by A single color to use for geom_point fill (default "darkseagreen4"). Or, the name of
#' a column that specifies color groupings. Can be used to distinguish points when multiple effects
#' are plotted per variant (for example, when comparing effects between subgroups), or to
#' highlight related groups of variants. A viridis color scale will be applied, unless ever single value
#' in the color column is interpretable as an R color, in which case the given colors will be used.
#' @param prevalence_method Show each variant's prevalence as a raw mutation count ("count", the default), or as
#' a percentage of samples with sequencing coverage at the site ("percent"). If the effects table
#' has the same number of samples covering every inference, you can choose "both".
#' @param color_label If color_by is supplying color names for scale_color_identity(), optionally
#' include color_label so that colors can be labeled in the plot legend.
#' @param legend.position Passed to ggplot's legend.position (none, left, right, top, bottom, or
#' coordinates). Use "none" to eliminate the legend. Defaults to "right".
#' @param legend_size_name The title for the point size scale (larger points = more prevalent variants).
#' @param legend_size_breaks Vector of specific mutation counts (or percentages) to depict in the point size legend.
#' Specify numeric values if you don't like what gets produced by the default ("auto"). Set to
#' FALSE or to a single desired point size to turn of size scaling.
#' @param legend_color_name The title for the point fill color scale.
#' @param viridis_option If using \code{color_by}, this argument
#' specifies which viridis color map to use. Ignored if you specify your own colors.
#' @param label_individual_variants When TRUE (default), individual variants within groups will be
#' labeled when group_by is not 'variant'. Set FALSE to not label variants, or specify a column
#' name that supplies a label for each row in the effects table. By default, variant names will be
#' used for labels. If group_by is exactly "gene", labels will be shortened to just the amino acid
#' changes. Some labels will be omitted (with a warning) if it seems there are too many to display
#' in the plot space.
#' @param order_by_effect When TRUE (default), variants are plotted in order of effect. When FALSE,
#' variants are plotted top-down in the order they are supplied.
#' @param show_ci TRUE/FALSE to depict confidence intervals in plot (default TRUE).
#' @param title Main title for the plot (by default, no title)
#' @param x_title Text for the X-axis label.
#' @param y_title Text for the Y-axis label.
#' @return A ggplot
#' @export
plot_effects = function(effects, topn = 30, group_by = 'variant',
title = '',
x_title = NULL, y_title = NULL,
y_label = 'auto',
color_by = 'darkseagreen4', color_label = NULL,
legend.position = 'right',
legend_size_name = 'auto',
legend_color_name = NULL,
viridis_option = 'cividis',
legend_size_breaks = 'auto',
label_individual_variants = TRUE,
order_by_effect = TRUE,
prevalence_method = 'auto',
show_ci = TRUE) {
# Verify ggplot2/ggrepel are installed, since they are not package dependencies
if (! require("ggplot2") || ! require("ggrepel")) {
stop("Packages needed for plotting are not installed. Run install.packages(c('ggplot2', 'ggrepel')).")
}
# Validate viridis_option
if(! is.character(viridis_option) || length(viridis_option) != 1) {
stop('viridis_option should be 1-length character.')
}
# Determine axis titles
if(is.null(x_title)) {
x_axis_title = 'Cancer effect (scaled selection coefficient)'
} else if (is.character(x_title) && length(x_title) == 1) {
x_axis_title = x_title
} else {
stop('x_title should be 1-length character (or NULL for default title).')
}
if(is.null(y_title)) {
y_axis_title = 'Somatic variant'
} else if (is.character(y_title) && length(y_title) == 1) {
y_axis_title = y_title
} else {
stop('y_title should be 1-length character (or NULL for default title).')
}
if(! is.character(title) || length(title) != 1) {
stop('title should be 1-length character')
}
# Validate that effects is table with required columns
if (! is(effects, 'data.table')) {
stop('effects should be a data.table of cancer effects.')
}
effects = copy(effects)
# For compound variants, there is no variant_id, just variant_name
if(! 'variant_id' %in% names(effects)) {
effects[, variant_id := variant_name]
}
required_cols = c('variant_name', 'variant_type', 'selection_intensity', 'included_with_variant', 'held_out')
if(identical(show_ci, TRUE)) {
required_cols = c(required_cols, c('ci_low_95', 'ci_high_95'))
} else if (! identical(show_ci, FALSE)) {
stop('Argument show_ci should be TRUE/FALSE.')
}
missing_cols = setdiff(required_cols, names(effects))
if(length(missing_cols) > 0) {
if(identical(missing_cols, 'held_out')) {
msg = paste0('Column held_out is missing from effects table. This column was added to effects output in cancereffectsizeR v2.8.0. ',
'If you are trying to use effects loaded from an older analysis, the solution is to re-run the ces_variant() call.')
warning(pretty_message(msg, emit = F))
}
stop("Missing required columns in effects table: ", paste(missing_cols, collapse = ', '), '.')
}
if(effects[, .N] == 0) {
stop('effects table has zero rows.')
}
# group_by can be variant (default), gene (also gets special behavior), or any other character/factor column name.
if(! is.character(group_by) || length(group_by) != 1) {
stop('group_by should be 1-length character.')
}
if(group_by == 'variant') {
# When grouping by variant, there is only 1 variant per variant group
effects[, variant_group := variant_id]
effects[, top_by_group := max(selection_intensity, na.rm = T), by = 'variant_group']
} else {
if(! group_by %in% names(effects)) {
stop('Specified group_by column ', group_by, ' is not present in effects table.')
}
effects[, variant_group := effects[[group_by]]]
if(! is.character(effects$variant_group) && ! is.factor(effects$variant_group)) {
effects$variant_group = as.factor(effects$variant_group)
}
has_na = effects[, anyNA(variant_group)]
if (has_na) {
effects = effects[! is.na(variant_group)]
msg = paste0('Some variants in effects table have NA values in group_by column ', group_by, '. ',
'These variants have been filtered out. If you want to include these in the plot, ',
'assign them non-NA.')
warning(pretty_message(msg, emit = F))
}
effects[, top_by_group := max(selection_intensity, na.rm = T), by = 'variant_group']
}
# Remove variants (or variant groups) outside of topn.
# Allow user to specificy topn = Inf in a variety of ways.
if(is.numeric(topn) && length(topn) == 1 && is.infinite(topn) && topn > 0) {
topn = NULL
}
if(length(topn) == 1 && is.na(topn)) {
topn = NULL
}
if(! is.null(topn)) {
if(! is.numeric(topn) || length(topn) != 1 || as.integer(topn) != topn) {
stop('topn should be a positive integer.')
}
if(uniqueN(effects$top_by_group) > topn) {
lowest_passing = sort(unique(effects$top_by_group), decreasing = T)[topn]
effects = effects[top_by_group >= lowest_passing]
}
}
# Deal with NA selection/CI
lowest_label = NULL
if(show_ci) {
lowest_real = effects[included_with_variant > 0, min(ci_low_95, na.rm = TRUE)]
values_to_check = unlist(effects[, .(selection_intensity, ci_low_95)])
} else {
lowest_real = effects[included_with_variant > 0, min(selection_intensity, na.rm = TRUE)]
values_to_check = effects$selection_intensity
}
even_lower = 10^floor(log10(lowest_real)) # rounding down to next factor of 10 below any lower CI (or effect)
if(anyNA(values_to_check) || any(values_to_check < lowest_real)) {
lowest_label = paste0(' <', format(even_lower, scientific = F, big.mark = ',')) # whitespace for aesthetics
if(show_ci) {
effects[is.na(ci_low_95) | ci_low_95 < lowest_real, ci_low_95 := even_lower]
}
effects[is.na(selection_intensity) | selection_intensity < lowest_real, selection_intensity := even_lower]
}
# Sort into desired plot order (top group will be top of plot, with variants ordered by selection within groups).
if (identical(order_by_effect, TRUE)) {
effects = effects[order(top_by_group, selection_intensity)]
} else if (identical(order_by_effect, FALSE)) {
effects = effects[.N:1] # Reverse given order so that they are plotted in top-down order.
} else {
stop('Argument order_by_effect should be TRUE/FALSE.')
}
# Use the chosen prevalence method to scale variant point sizes
effects[, num_samples := included_total + held_out]
if(identical(prevalence_method, 'auto')) {
# As stated in docs, we'll use count if sample numbers are similar enough (20%).
prevalence_method = ifelse(effects[, max(num_samples)/min(num_samples)] > 1.2, 'percent', 'count')
if(prevalence_method == 'percent') {
pretty_message("Depicting variant prevalence as percent of eligible samples that have mutation. If you prefer counts, set prevalence_method = \"count\".",
black = F)
}
}
if(identical(prevalence_method, 'count') || identical(prevalence_method, 'both')) {
effects[, prevalence := included_with_variant]
if(prevalence_method == 'both' && uniqueN(effects$num_samples) != 1) {
msg = paste0('Not all variants have sequencing coverage in the same number of samples, so ',
'prevalance_method \"both\" can\'t be used.')
stop(pretty_message(msg, emit = F))
}
} else if (identical(prevalence_method, 'percent')) {
# Rounding to match the rounding that will be applied to labels, to
# avoid possibly getting the same rounded label for two different breaks.
effects[, prevalence := round(included_with_variant / num_samples, 3)]
} else{
stop('prevalence_method should be "count", "percent", "both", or "auto".')
}
# legend_size_breaks controls point size and what point sizes are displayed in legend
if(identical(legend_size_breaks, 'auto')) {
if(effects[, .N] < 6) {
size_breaks = sort(unique(effects$prevalence))
} else {
ordered_prev = sort(unique(effects$prevalence))
first_break = ordered_prev[1]
last_break = max(ordered_prev)
num_middle_breaks_left = min(3, length(setdiff(ordered_prev, c(first_break, last_break))))
middle_breaks = numeric()
while(num_middle_breaks_left > 0) {
biggest_left = ordered_prev[length(ordered_prev)]
next_biggest = ordered_prev[length(ordered_prev) - 1]
ideal_spacing = (biggest_left - ordered_prev[1])/(num_middle_breaks_left + 1)
ideal_next_break = biggest_left - ideal_spacing
if(ideal_next_break > next_biggest) {
next_break = next_biggest
} else {
next_index = which.min(abs(ordered_prev - ideal_next_break))
next_break = ordered_prev[next_index]
i = 1
while(next_break/biggest_left > (1 + num_middle_breaks_left)/5 &&
next_index > 1 && length(next_break) > 1) {
next_break = ordered_prev[next_index - i]
i = i + 1
}
}
middle_breaks = c(middle_breaks, next_break)
ordered_prev = c(ordered_prev[ordered_prev < next_break], next_break)
num_middle_breaks_left = num_middle_breaks_left - 1
if(next_break == first_break) {
middle_breaks_left = 0
}
}
size_breaks = unique(sort(c(first_break, middle_breaks, last_break)))
}
} else if (identical(legend_size_breaks, FALSE)) {
size_breaks = 1.5
effects[, prevalence := as.numeric(prevalence)] # convert from integer to avoid warning
effects$prevalence = 1.5 # medium-small; we're going to do scale_size_identity()
} else if(is.numeric(legend_size_breaks)) {
size_breaks = legend_size_breaks
if(length(size_breaks) == 1) {
effects$prevalence = size_breaks # for scale_size_identity()
}
} else {
msg = paste0('legend_size_breaks should be "auto", numeric vector of prevalences to depict, or FALSE to make all points small, ',
'or a single numeric point size.')
stop(pretty_message(msg, emit = F))
}
if(identical(legend_size_name, 'auto')) {
if(prevalence_method == 'count'){
legend_size_name = 'Variant prevalence'
} else if (prevalence_method == 'percent') {
legend_size_name = 'Variant frequency\n(within covering samples, \nper effect inference)'
} else if (prevalence_method == 'both') {
legend_size_name = 'Variant prevalence\n(percent of samples)'
}
} else if(! is.character(legend_size_name) || length(legend_size_name) != 1) {
stop('legend_size_name should be 1-length character.')
}
# Look for parenthesized characters at the end of variant names, which are protein IDs, and insert new line.
effects[nchar(variant_name > 15) & variant_name %like% '^[^\\)]+\\(.*\\)$',
variant_name := gsub('^([^\\)]+)(\\(.*\\))$', '\\1\n\\2', variant_name)]
# Handle variant (or variant group) labels
if(identical(y_label, 'auto')) {
if(group_by == 'variant') {
if(uniqueN(effects$variant_name) == uniqueN(effects$variant_id)) {
# REMOVE for version 3
effects[, variant_group_label := gsub('_', ' ', variant_name)]
} else {
effects[, variant_group_label := variant_id] # unusual situation
}
} else {
effects[, variant_group_label := variant_group]
}
} else {
if(! y_label %in% names(effects)) {
stop('Column ', y_label, ' does not exist in effects table.')
}
effects[, variant_group_label := effects[[y_label]]]
}
# Validate color specification
# Other nice choices: "darkseagreen3", "lightskyblue4"
if(! is.character(color_by)) {
stop('color_by should be type character')
}
if(length(color_by) == 1) {
if(color_by %in% names(effects)) {
if(color_by %in% colors()) {
msg = paste0("This is kind of silly: You chose a value for ",
"color_by that is both a column name and an R color.")
stop(pretty_message(msg, emit = F))
}
effects[, point_fill := effects[[color_by]]]
use_fill_identity = FALSE
if(is.character(effects$point_fill)) {
effects[, is_color := point_fill %in% colors()]
# Help out user when only missing colors are NAs.
if(effects[is_color == F, .N > 0 && all(is.na(point_fill))]) {
stop("It seems like your color_by column is giving color names, but some rows have NA values.")
}
use_fill_identity = all(effects$is_color)
}
} else {
effects$point_fill = color_by
use_fill_identity = TRUE
}
} else {
stop("color_by should be an R color name (\"purple4\") or the name of a column in effects")
}
if(uniqueN(effects[, .(variant_group, variant_group_label)]) != uniqueN(effects$variant_group)) {
if(group_by == 'variant') {
stop('There is not exactly one unique label per variant. Check your y_label.')
} else {
stop("There is not exactly one unique label per group of variants. Check y_label.")
}
}
# When there are multiple variants with same y-position (that is, multiple variants in a variant
# group), nudge y-position so that points/CIs don't overlap.
effects[, y_nudge := scale((1:.N)/.N, center = T, scale = F), by = 'variant_group']
effects[y_nudge != 0, y_nudge := (y_nudge * .15) / max(y_nudge), by = 'variant_group']
if(show_ci) {
x_limits = c(min(effects$ci_low_95, na.rm = T), max(effects$ci_high_95, na.rm = T))
} else {
x_limits = c(min(effects$selection_intensity, na.rm = T), max(effects$selection_intensity, na.rm = T))
}
# Alternating rows of output will have darker and lighter dashed lines to connect to group names.
# The dashed lines go until highest lower CI in each group.
effects[, line_color := ifelse(.GRP %% 2 == 0, 'gray60', 'gray90'), by = 'variant_group']
# When just one variant per row, dashed lines go to lower CI. With multiple variants, we'll
# do the dashed line all the way across
if(identical(as.integer(effects[, .N, by = 'variant_group'][, unique(N)]), 1L)) {
if(show_ci) {
effects[, dash_end := ci_low_95]
} else {
effects[, dash_end := selection_intensity]
}
} else {
effects[, dash_end := Inf] # to end of visible plot
}
# For aesthetics, we'll eliminate the CI crossbars for groups that have lots of variants.
effects[, ci_width := ifelse(.N > 4, 0, .2), by = 'variant_group']
x_labeler = function(x) {
first_visible_label = which(! is.na(x))[1]
x = format(x, scientific = F, big.mark = ',')
if(! is.null(lowest_label)) {
x[first_visible_label] = lowest_label
}
return(x)
}
gg = ggplot(effects, aes(x = selection_intensity, y = variant_group)) +
geom_segment(aes(x = x_limits[1], xend = dash_end, y = variant_group, yend = variant_group),
color = effects$line_color, linetype = 'dotted', na.rm = T)
if(show_ci) {
gg = gg + geom_errorbar(aes(xmin = ci_low_95, xmax = ci_high_95), color = "azure4", na.rm = T,
position = position_nudge(x = 0, y = effects$y_nudge), width = effects$ci_width, linewidth = .25)
}
gg = gg + geom_point(shape = 21, color = 'gray20', aes(size = prevalence, fill = point_fill), na.rm = T,
position = position_nudge(x = 0, y = effects$y_nudge)) +
scale_x_log10(expand = expansion(mult = c(.01, .05)), labels = x_labeler) +
scale_y_discrete(limits = unique(effects$variant_group), labels = unique(effects$variant_group_label),
expand = expansion(add = 1)) +
labs(title = title, x = x_axis_title, y = y_axis_title) +
theme(axis.title.x = element_text(margin = margin(6, 0, 6, 0)),
axis.title.y = element_text(margin = margin(0, 6, 0, 6)),
axis.text.y = element_text(angle = 0, hjust = 1, vjust = 0.5, size = 8),
axis.text.x = element_text(size = 8),
axis.ticks.x = element_line(color = 'gray50'),
axis.ticks.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.position = legend.position,
legend.direction = 'vertical',
legend.title = element_text(size = 7), legend.text = element_text(size = 7),
plot.margin = margin(l = 6, r = 15, b = 6, unit = 'pt'),
plot.title = element_text(margin = margin(t = 6, b = 6)))
# Put a border around the legend if it's within the plot space
if(is.numeric(legend.position) && all(legend.position > 0) && all(legend.position < 1)) {
gg = gg + theme(legend.background = element_rect(fill = alpha(c("white"), 0.9), linewidth = .2, color = 'gray20'))
}
if(length(size_breaks) > 1) {
# If there is just one point fill color in the plot, make legend's size glyphs that color.
size_override = list()
if(use_fill_identity && uniqueN(effects$point_fill) == 1) {
only_color = effects$point_fill[1]
size_override = list(fill = only_color, alpha = 1)
}
size_labels = size_breaks
if(prevalence_method == 'percent') {
size_labels = scales::label_percent(accuracy = .1)(size_labels)
}
if(prevalence_method == 'both') {
# Have already verified that all effects have same number of samples
size_labels = paste0(size_breaks, ' (', scales::label_percent(accuracy = .1)(size_breaks/effects$num_samples[1]), ')')
}
size_range = c(1, 6)
if (length(size_breaks) < 3) {
size_range = c(1, length(size_breaks))
}
gg = gg + scale_size(breaks = size_breaks, labels = size_labels,
limits = c(min(effects$prevalence), max(effects$prevalence)),
guide = guide_legend(title.position = 'top', override.aes = size_override),
name = legend_size_name, range = size_range)
} else {
gg = gg + scale_size_identity()
}
# Validate label_individual_variants and decide whether individual labels are happening.
# Only worth labeling variants if there is more than one variant per variant_group/color grouping
if(identical(label_individual_variants, TRUE) &&
effects[, .N, by = c('variant_group', 'point_fill')][, all(N == 1)]) {
label_individual_variants = FALSE
}
if(identical(label_individual_variants, TRUE)) {
# Use variant_name (unless variant_id is necessary due to ambiguity) if nothing supplied,
# unless grouping by gene, in which case get aachange from gene name.
if(group_by == 'gene') {
effects[variant_type == 'aac', individual_label := sub('.*?[_ ]', '', variant_name)]
effects[variant_type != 'aac', individual_label := gsub('_', ' ', variant_name)]
} else if (uniqueN(effects$variant_id) != uniqueN(effects$variant_name)) {
effects[, individual_label := variant_id]
} else {
effects[, individual_label := gsub('_', ' ', variant_name)]
}
} else if(is.character(label_individual_variants) && length(label_individual_variants) == 1 &&
label_individual_variants %in% names(effects)) {
effects[, individual_label := effects[[label_individual_variants]]]
if(! is.character(effects$individual_label)) {
msg = paste0('Column specified for label_individual_variants (', label_individual_variants, ') is not type character.')
stop(pretty_message(msg, emit = F))
}
label_individual_variants = TRUE
} else if (! identical(label_individual_variants, FALSE)){
stop('label_individual_variants should be TRUE/FALSE or the name of a column in the effects table.')
}
if(label_individual_variants) {
# To-do: remove some labels when it seems like there will be way to many
num_variant_groups = uniqueN(effects$variant_group)
effects[, si_group_rank := frank(-selection_intensity), by = 'variant_group']
give_warning = FALSE
if(num_variant_groups > 25) {
label_text_size = 2
effects[si_group_rank > 3, individual_label := NA]
give_warning = max(effects$si_group_rank) > 3
} else if(num_variant_groups > 10) {
label_text_size = 2.25
effects[si_group_rank > 6, individual_label := NA]
give_warning = max(effects$si_group_rank) > 6
} else {
effects[si_group_rank > 15, individual_label := NA]
give_warning = max(effects$si_group_rank) > 15
effects[, individual_label_sizes := 2.5]
effects[si_group_rank > 6, individual_label_sizes := 2.25]
effects[si_group_rank > 10, individual_label_sizes := 2]
effects[, individual_label_sizes := min(individual_label_sizes), by = 'variant_group']
label_text_size = effects$individual_label_sizes
}
if(give_warning) {
msg = paste0('Some variant labels have been omitted due to the density of variants in the plot space.')
warning(pretty_message(msg, emit = F))
}
gg = gg + geom_label_repel(aes(label = individual_label), size = label_text_size, box.padding = .3, label.r = .2,
fill = alpha(c("white"), 0.9), label.size = .1, label.padding = .15,
segment.color = 'grey20', segment.size = .4,
position = position_nudge(x = 0, y = effects$y_nudge), na.rm = TRUE)
}
# Change axis label using group_by when not "variant" (unless user already explicitly specified
# via y_title).
if(group_by != 'variant' && is.null(y_title)) {
if (group_by == 'gene') {
gg = gg + labs(y = 'Gene') # auto-capitalize
} else {
gg = gg + labs(y = group_by)
}
}
# Validate legend_color_name
if(is.null(legend_color_name)) {
legend_color_name = color_by
} else if(! is.character(legend_color_name) || length(legend_color_name) != 1) {
stop('legend_color_name should be NULL or 1-length character.')
}
# Handle legend labels for colors.
if(! is.null(color_label)) {
if(! is.character(color_label)) {
stop('color_label should be type character.')
}
if(! color_label %in% names(effects)) {
stop("color_label column ", color_label, ' not found in effects table.')
}
effects[, fill_label := effects[[color_label]]]
if(! is.character(effects$fill_label)) {
stop("color_label column ", color_label, ' is not type character.')
}
if(uniqueN(effects[, .(point_fill, fill_label)]) != uniqueN(effects$fill_label)) {
stop("There is not a one-to-one correspondence between color names in color_by and labels in color_label.")
}
}
if(use_fill_identity) {
if(is.null(color_label)) {
gg = gg + scale_fill_identity()
} else {
gg = gg + scale_fill_identity(breaks = unique(effects$point_fill), labels = unique(effects$fill_label),
guide = guide_legend(), name = legend_color_name)
}
} else {
if(is.null(color_label)) {
if(is.numeric(effects$point_fill)) {
gg = gg + scale_fill_viridis_c(name = legend_color_name, option = viridis_option)
} else {
gg = gg + scale_fill_viridis_d(name = legend_color_name, begin = .2, end = .9, option = viridis_option)
}
} else {
unique_colors = unique(effects[, .(point_fill, fill_label)])
labels_by_color = setNames(unique_colors$fill_label, unique_colors$point_fill)
if(is.numeric(effects$point_fill)) {
gg = gg + scale_fill_viridis_c(name = legend_color_name, breaks = labels_by_color, labels = labels_by_color)
} else {
gg = gg + scale_fill_viridis_d(name = legend_color_name, labels = labels_by_color)
}
}
}
gg
}