-
Notifications
You must be signed in to change notification settings - Fork 10
/
plot_signature_effects.R
303 lines (272 loc) · 14.8 KB
/
plot_signature_effects.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#' Plot mutational source and effect attributions
#'
#' Compare the extent to which mutational signatures contribute mutations (mutational source share)
#' to the degree to which they contribute high-effect mutations (cancer effect share).
#'
#' @param mutational_effects Output from \link{mutational_signature_effects}(). To compare
#' groups of samples, supply a named list with each element corresponding to output
#' from a separate run of mutational_signature_effects().
#' @param signature_groupings A data.table of signature names and descriptions; signatures with
#' identical descriptions are grouped together. Only signatures present in the data get displayed.
#' Setting to "auto" (the default) uses the table returned by \code{\link{cosmic_signature_info}()}, which only
#' makes sense when using COSMIC signatures. A custom table should have columns "name",
#' "short_name", and "description". Additional options:
#' \itemize{
#' \item To force a signature group to appear in the plot even if it
#' has a low effect share, add a column called "prioritize" and set to TRUE where desired.
#' \item To make a signature appear in its own group, make its description unique.
#' \item Add a "color" column to manually specify colors for each group.
#' }
#' Alternatively, setting \code{signature_groupings = "cannataro"} applies the same signature
#' grouping and color palette as
#' \href{https://academic.oup.com/mbe/article/39/5/msac084/6570859}{Cannataro et al. 2022}.
#' @param viridis_option A viridis color mapping, specified with a single letter ('A' to 'H'). By
#' default, map 'G' (mako) is used.
#' @param num_sig_groups How many groups of signatures to display. Groups are ordered by their
#' highest effect shares, and the rest get lumped into an "other signatures" group.
#' @export
plot_signature_effects = function(mutational_effects = NULL,
signature_groupings = 'auto',
viridis_option = NULL,
num_sig_groups = 7) {
if(! require("ggplot2")) {
stop('Package ggplot2 must be installed for plotting.')
}
if(! is.numeric(num_sig_groups) || length(num_sig_groups) != 1 ||
num_sig_groups - as.integer(num_sig_groups) != 0 || num_sig_groups < 1) {
stop('num_sig_groups must be a positive integer.')
}
running_cannataro = identical(tolower(signature_groupings), 'cannataro')
using_cannataro_colors = is.null(viridis_option) && running_cannataro
if(is.null(viridis_option)) {
viridis_option = 'G'
} else if(! is.character(viridis_option) || length(viridis_option) != 1 || ! nchar(viridis_option) == 1) {
stop('Specify viridis color map with a single letter ("A"-"H")')
# Will leave the specific viridis character unvalidated, in case more maps are created in the future.
}
viridis_option = toupper(viridis_option)
# If num_sig_groups is not default, warn that num_sig_groups is ignored under cannataro
if(num_sig_groups != 7 && running_cannataro) {
warning('Ignoring num_sig_groups because signature_groupings = "cannataro".')
}
# assign signature_groupings to table according to signature_groupings parameter
if (identical(signature_groupings, 'auto')) {
signature_groupings = cosmic_signature_info()
}
# Check that mutational_effects is mutational_signature_effects() output, or a list of such outputs.
if (is(mutational_effects, 'list') && length(mutational_effects) == 2 &&
identical(names(mutational_effects), c("mutational_sources", "effect_shares"))) {
# Convert a single mutational effects output into 1-length list.
mutational_effects = list(mutational_effects)
sample_groupings = NA
} else if (is(mutational_effects, 'list') && ! is.null(names(mutational_effects)) &&
all(sapply(mutational_effects, function(x) length(x) == 2 &&
identical(names(x), c("mutational_sources", "effect_shares"))))) {
# Check that mutational_effects is a named list containing expected outputs.
# If so, extract names and put into sample_groupings.
sample_groupings = names(mutational_effects)
if(uniqueN(setdiff(sample_groupings, '')) != length(sample_groupings)) {
stop('mutational_effects list entries should have unique names')
}
} else {
stop('mutational_effects must be mutational_signature_effects() output or a named list of such outputs.')
}
# Create empty list to load data into
final_df_list = list()
df_weights = rbindlist(lapply(mutational_effects,
function(x) {
weights = x$mutational_sources$average_source_shares
data.table(type = 'SW', prop = weights, name = names(weights))
}), idcol = 'sample_group')
df_effects = rbindlist(lapply(mutational_effects,
function(x) {
effects = x$effect_shares$average_effect_shares
data.table(type = 'CEW', prop = effects, name = names(effects))
}), idcol = 'sample_group')
df_final = rbind(df_weights, df_effects)
if (running_cannataro) {
# group signatures into suggested categories as outlined in Cannataro et al.
signature_groupings = list(
"Deamination with age, clock-like (1)" = "SBS1",
"Unknown, clock-like (5)" = "SBS5",
"APOBEC (2, 13)" = c("SBS2", "SBS13"),
"Defective homologous recombination (3)" = "SBS3",
"Tobacco (4, 29)" = c("SBS4", "SBS29"),
"UV light (7a–d, 38)" = c("SBS7a", "SBS7b", "SBS7c", "SBS7d", "SBS38"),
"Prior treatment (11, 31, 32, 35)" = c("SBS11", "SBS31", "SBS32", "SBS35"),
"Mutagenic chemical exposure (22, 24, 42, 88)" = c("SBS22", "SBS24", "SBS42", "SBS88"),
"Alcohol-associated (16)" = "SBS16"
)
other_label = "Non-actionable and unknown signatures"
} else if(! is.data.table(signature_groupings)) {
stop('signature_groupings should be "auto", "cannataro", or a data.table describing the signatures (see docs).')
} else {
if(length(signature_groupings) != uniqueN(names(signature_groupings))) {
stop('Input signature_groupings table has repeated column names.')
}
required_cols = c('name', 'short_name', 'description')
missing_cols = setdiff(required_cols, names(signature_groupings))
if(length(missing_cols) > 0) {
stop('Missing columns in signature_groupings table: ', paste0(missing_cols, collapse = ', '), '.')
}
for(col in required_cols) {
if(! is.character(signature_groupings[[col]])) {
stop("Column ", col, " should be type character.")
}
}
if('other signatures' %in% signature_groupings$description) {
stop('signature_groupings has signatures with description \"other signatures\", which is reserved.')
}
other_label = 'other signatures'
all_effect_shares = rbindlist(lapply(mutational_effects, function(x) as.list(x$effect_shares$average_effect_shares)), idcol = 'cohort')
shares_melted = melt.data.table(all_effect_shares, id.vars = 'cohort', variable.factor = F, variable.name = 'name')
# Subset signature groupings to only include signatures that actually appear
signature_groupings = signature_groupings[name %in% shares_melted$name]
signature_groupings[is.na(short_name), short_name := '']
signature_groupings[is.na(description) | description == '',
let(description = short_name, short_name = '')]
shares_melted[signature_groupings, c('description', 'short_name') := .(description, short_name), on = 'name']
# Don't consider signatures in other groups (they'll be represented in the "other" group)
shares_melted = shares_melted[! is.na(short_name)]
shares_melted[, sig_grp_id := .GRP, by = 'description']
summed_shares = shares_melted[, .(in_grp_sum = sum(value), description = description[1]), by = c('cohort', 'sig_grp_id')]
prioritized_ids = character()
if('prioritize' %in% names(signature_groupings)) {
if(! is.logical(signature_groupings$prioritize)) {
stop('The optional prioritize column in signature_groupings is expected to be type logical.')
}
if(uniqueN(signature_groupings[, .(description, prioritize)]) != uniqueN(signature_groupings$description)) {
stop('Unusable prioritize column in signature_groupings table: Must have consistent value for all signatures with matching description..')
}
summed_shares[signature_groupings, prioritize := prioritize, on = 'description']
prioritized_ids = summed_shares[prioritize == T, unique(sig_grp_id)]
}
top_grp_ids = summed_shares[order(-in_grp_sum)][, unique(sig_grp_id)]
top_grp_ids = na.omit(unique(c(prioritized_ids, top_grp_ids))[1:num_sig_groups])
final_groupings = list()
for(id in top_grp_ids) {
grp_info = shares_melted[sig_grp_id == id]
short_names = paste(unique(grp_info$short_name), collapse = ', ')
descrip = grp_info$description[1]
if(short_names != '') {
short_names = paste0('(', short_names, ')')
}
curr_descrip = paste0(grp_info$description[1], ' ', short_names)
if(nchar(curr_descrip) > 45) {
curr_descrip = paste0(grp_info$description[1], '\n', short_names)
}
curr_sigs = unique(grp_info$name)
final_groupings[[curr_descrip]] = curr_sigs
}
if('color' %in% names(signature_groupings)) {
if(! is.character(signature_groupings$color)) {
stop('The optional color column in signature_groupings is expected to be type character.')
}
if(uniqueN(signature_groupings[, .(description, color)]) != uniqueN(signature_groupings$description)) {
stop('Unusable color column in signature_groupings table: Exactly one color must be associated with each signature group.')
}
sig_to_color = unique(signature_groupings[, .(name, color)])
df_final[sig_to_color, color := color, on = 'name']
df_final[! name %in% unlist(final_groupings), color := NA]
}
signature_groupings = final_groupings
}
# Signatures present in data but not represented in a signature grouping join "other" signatures
other_signatures = setdiff(df_effects$name, unlist(signature_groupings))
if(all(unlist(signature_groupings) %like% '^SBS') && ! all(other_signatures %like% '^SBS')) {
msg = paste0('Data contains some non-SBS signatures, but the signature_groupings table only describes SBS signatures. ',
'Non-SBS signatures will all be grouped with "', other_label, '". Consider updating ',
'the signature_groupings to include these signatures.')
warning(pretty_message(msg, emit = F))
}
signature_groupings[[other_label]] = other_signatures
# Unwind list to get a table matching signatures to their labels
signature_labels = rbindlist(lapply(1:length(signature_groupings),
function(x) data.table(name = signature_groupings[[x]],
label = names(signature_groupings)[x])))
# create group column and assign signature_groupings
# nested loop syntax is for cases when signature_groupings contains several signatures
# Ex: "UV light" = SBS7a-d and SBS38
df_final[signature_labels, group := label, on = 'name']
# first, we reorder and rename some values to make the graph pretty.
# reorder facets because the order is weird
df_final$facet = factor(df_final$sample_group,
levels = sample_groupings)
# rename values
df_final$weight_type = ifelse(df_final$type == "SW",
"Source\nshare",
"Effect\nshare")
# reorder weight bars manually to show Source Share, then Effect Share
df_final$weight_type = factor(
df_final$weight_type,
levels = c("Effect\nshare", "Source\nshare")
)
# Order signature groups by mean effect share (or use the cannataro order)
summed_shares = df_final[type == 'CEW' & group != other_label, .(summed_share = sum(prop)), by = c('group', 'sample_group')]
mean_share_order = summed_shares[, .(mean_share = mean(summed_share)), by = 'group'][order(mean_share), group]
final_order = c(other_label, mean_share_order)
df_final$group = factor(df_final$group, levels = final_order)
gg = ggplot(data = df_final) +
geom_bar(
mapping = aes(
y = weight_type,
fill = group,
weight = prop
),
position = 'fill',
width = .9,
color = 'black'
) +
xlab('Proportion') + ylab('') +
theme_classic() + theme(
axis.ticks.y = element_blank(),
axis.title.x = element_text(size = 10),
axis.text.y = element_text(size = 8),
axis.line.y = element_blank(),
legend.position = 'right') +
scale_x_continuous(n.breaks = 10, limits = c(-0.001, 1.001), expand = expansion(add = 0))
if(! any(is.na(df_final$facet))) {
gg = gg + facet_wrap(~ facet, ncol = 1, strip.position = 'left') +
theme(strip.background = element_blank(), strip.placement = 'outside',
strip.text.y.left = element_text(size = 12, angle = 0))
}
sig_legend_name = 'Signatures'
if(all(df_final$name %like% '^SBS')) {
sig_legend_name = 'Signatures (COSMIC SBS)'
}
if(using_cannataro_colors) {
# Use custom palette outlined in Cannataro et al.
cannataro_colors = c(
"Deamination with age, clock-like (1)" = "gray40",
"Unknown, clock-like (5)" = "gray60",
"APOBEC (2, 13)" = "#7570b3",
"Defective homologous recombination (3)" = "#e7298a",
"Tobacco (4, 29)" = "#a6761d",
"UV light (7a–d, 38)" = "#e6ab02",
"Prior treatment (11, 31, 32, 35)" = "#1b9e77",
"Mutagenic chemical exposure (22, 24, 42, 88)" = "#66a61e",
"Alcohol-associated (16)" = "#d95f02",
"Non-actionable and unknown signatures" = "black"
)
gg = gg + scale_fill_manual(name = sig_legend_name, values = cannataro_colors)
} else if(is.null(df_final$color)) {
gg = gg + scale_fill_viridis_d(name = sig_legend_name, option = viridis_option, begin = .2, direction = -1)
} else {
df_final[group == other_label, color := 'white']
group_to_color = setNames(unique(df_final$color), unique(df_final$group))
gg = gg + scale_fill_manual(name = sig_legend_name, values = group_to_color)
}
gg = gg + guides(fill = guide_legend(reverse=TRUE))
return(gg)
}
#' Get COSMIC signature descriptions
#'
#' Returns a table describing COSMIC signatures. All signatures from v3.0 to the latest release
#' (v3.4) are included, with information derived from the most recent information on the COSMIC
#' website. (Exception: A reported association between SBS16 and alcohol consumption, noted here, is
#' not mentioned on the COSMIC website.)
#'
#' @export
cosmic_signature_info = function() {
return(copy(cosmic_sbs_signature_etiology))
}