-
Notifications
You must be signed in to change notification settings - Fork 0
/
visualization.R
1769 lines (1550 loc) · 77.9 KB
/
visualization.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# helper functions ===
# find the name of the time column and its unit in a raw data frame
find_time_column <- function(df) {
# time column
time_pattern <- "^time\\.(.*)$"
time_column <- stringr::str_subset(names(df), time_pattern)
if (length(time_column) != 1)
stop("unclear which column is the time column, consider an explicit 'iso_convert_time' call, found: ",
str_c(time_column, collapse = ", "), call. = FALSE)
time_unit <- stringr::str_match(time_column, time_pattern) %>% {.[2] }
return(list(column = time_column, unit = time_unit))
}
# plotting data ====
#' Prepare plotting data from continuous flow files
#'
#' This function helps with the preparation of plotting data from continuous flow data files (i.e. the raw chromatogram data). Call either explicity and pass the result to \code{\link{iso_plot_continuous_flow_data}} or let \code{\link{iso_plot_continuous_flow_data}} take care of preparing the plotting data directly from the \code{iso_files}. If a \code{peak_table} is provided for peak annotation purposes, uses \code{\link{iso_combine_raw_data_with_peak_table}} to combine the raw data from the iso_files with the provided peak data table.
#'
#' @param iso_files collection of iso_file objects
#' @param data which masses and ratios to plot (e.g. \code{c("44", "45", "45/44")} - without the units), if omitted, all available masses and ratios are plotted. Note that ratios should be calculated using \code{\link{iso_calculate_ratios}} prior to plotting.
#' @param time_interval which time interval to plot
#' @param time_interval_units which units the time interval is in, default is "seconds"
#' @param filter any filter condition to apply to the data beyond the masses/ratio selection (param \code{data}) and time interval (param \code{time_interval}). For details on the available data columns see \link[isoreader]{iso_get_raw_data} with parameters \code{gather = TRUE} and \code{include_file_info = everything()} (i.e. all file info is available for plotting aesthetics).
#' @param normalize whether to normalize the data (default is FALSE, i.e. no normalization). If TRUE, normalizes each trace across all files (i.e. normalized to the global max/min). This is particularly useful for overlay plotting different mass and/or ratio traces (\code{panel = NULL}). Note that zooming (if \code{zoom} is set) is applied after normalizing.
#' @param zoom if not set, automatically scales to the maximum range in the selected time_interval in each plotting panel. If set, scales by the indicated factor, i.e. values > 1 are zoom in, values < 1 are zoom out, baseline always remains the bottom anchor point. Note that zooming is always relative to the max in each zoom_group (by default \code{zoom_group = data}, i.e. each trace is zoomed separately). The maximum considered may be outside the visible time window. Note that for \code{zoom_group} other than \code{data} (e.g. \code{file_id} or \code{NULL}), zooming is relative to the max signal across all mass traces. Typically it makes most sense to set the \code{zoom_group} to the same variable as the planned \code{panel} parameter to the plotting function. Lastly, note that zooming only affects masses, ratios are never zoomed.
#' @param peak_table a data frame that describes the peaks in this chromatogram. By default, the chromatographic data is prepared WITHOUT peaks information. Supply this parameter to add in the peaks data. Typically via \code{\link{iso_get_peak_table}}. Note that the following parameters must also be set correctly IF \code{peak_table} is supplied and has non-standard column names: \code{file_id}, \code{rt}, \code{rt_start}, \code{rt_end}.
#' @inheritParams iso_combine_raw_data_with_peak_table
#' @family plot functions
#' @export
iso_prepare_continuous_flow_plot_data <- function(
iso_files, data = c(), include_file_info = NULL,
time_interval = c(), time_interval_units = "seconds",
filter = NULL,
normalize = FALSE, zoom = NULL, zoom_group = data,
peak_table = NULL, file_id = default(file_id),
rt = default(rt), rt_start = default(rt_start), rt_end = default(rt_end),
rt_unit = NULL) {
# safety checks
if(!iso_is_continuous_flow(iso_files))
stop("can only prepare continuous flow iso_files for plotting", call. = FALSE)
# global vars
# FIXME for CRAN
# collect raw data
raw_data <- iso_get_raw_data(iso_files, gather = TRUE, quiet = TRUE)
if (nrow(raw_data) == 0) stop("no raw data in supplied iso_files", call. = FALSE)
# add in file info
file_info <- iso_get_file_info(iso_files, select = !!enquo(include_file_info), quiet = TRUE)
raw_data <- dplyr::left_join(raw_data, file_info, by = "file_id")
# check for zoom_gruop column(s) existence
aes_quos <- list(zoom_group = enquo(zoom_group))
if (rlang::quo_is_null(aes_quos$zoom_group)) aes_quos$zoom_group <- quo(1)
check_expressions(raw_data, aes_quos$zoom_group)
# only work with desired data (masses and ratios)
select_data <- if(length(data) == 0) unique(raw_data$data) else as.character(data)
if ( length(missing <- setdiff(select_data, unique(raw_data$data))) > 0 )
stop("data not available in the provided iso_files (don't include units): ", str_c(missing, collapse = ", "), call. = FALSE)
raw_data <- dplyr::filter(raw_data, data %in% select_data)
# time column
time_info <- find_time_column(raw_data)
# time interval
if (length(time_interval) == 2) {
time_interval <- scale_time(time_interval, to = time_info$unit, from = time_interval_units)
raw_data <- mutate(raw_data, time_min = time_interval[1], time_max = time_interval[2])
} else if (length(time_interval) > 0 && length(time_interval) != 2) {
stop("time interval needs to be a vector with two numeric entries, found: ", str_c(time_interval, collapse = ", "), call. = FALSE)
} else {
raw_data <- mutate(raw_data, time_min = -Inf, time_max = Inf)
}
raw_data <- select(raw_data, 1:time_info$column, time_min, time_max, everything())
# general filter
filter_quo <- enquo(filter)
if (!quo_is_null(filter_quo)) {
raw_data <- dplyr::filter(raw_data, !!filter_quo)
}
# border extrapolation function
extrapolate_border <- function(cutoff, border, change, time, value) {
cutoff <- unique(cutoff)
if (length(cutoff) != 1) stop("problematic cutoff", call. = FALSE)
bi <- which(border) # border indices
bi2 <- bi + 1 - change[bi] # end point of border
bi1 <- bi - change[bi] # start point of border
border_time <- (cutoff - value[bi1])/(value[bi2] - value[bi1]) * (time[bi2] - time[bi1]) + time[bi1]
time[bi] <- border_time
return(time)
}
# normalize data
normalize_data <- function(df) {
group_by(df, data) %>%
mutate(value = (value - min(value, na.rm = TRUE))/
(max(value, na.rm = TRUE) - min(value, na.rm = TRUE))) %>%
ungroup()
}
# plot data
plot_data <-
raw_data %>%
# make ratio identification simple
mutate(is_ratio = category == "ratio") %>%
# add units to data for proper grouping
mutate(
data_wo_units = data,
data = ifelse(!is.na(units), str_c(data, " [", units, "]"), data)
) %>%
select(1:category, is_ratio, data, data_wo_units, everything()) %>%
arrange(!!sym(time_info$column)) %>%
# find global zoom cutoffs per group before time filtering (don't consider ratios)
{
if (!is.null(zoom)) {
mutate(., ..zoom_group = !!aes_quos$zoom_group) %>%
group_by(..zoom_group) %>%
mutate(
baseline = value[!is_ratio] %>% { if(length(.) == 0) NA else min(.) },
max_signal = value[!is_ratio] %>% { if(length(.) == 0) NA else max(.) }) %>%
ungroup() %>%
select(-..zoom_group)
} else .
} %>%
# time filtering
{ if (length(time_interval) == 2) dplyr::filter(., dplyr::between(!!sym(time_info$column), time_interval[1], time_interval[2])) else . } %>%
# info fields
mutate(zoom_cutoff = NA_real_, normalized = FALSE) %>%
# normalizing
{
if (normalize) {
normalize_data(.) %>%
# zooming always based on the full (0 to 1) interval
mutate(baseline = 0, max_signal = 1, normalized = TRUE)
} else .
} %>%
# zooming
{
if (!is.null(zoom)) {
# extrapolate data (multi-panel requires this instead of coord_cartesian)
group_by(., file_id, data) %>%
# note: this does not do perfectly extrapolating the line when only a single
# data point is missing (extrapolation on the tail is missing) because it
# would require adding another row to account both for gap and extrapolated
mutate(
zoom_cutoff = 1/zoom * max_signal + (1 - 1/zoom) * baseline, # cutoffs
discard = ifelse(!is_ratio, value > zoom_cutoff, FALSE), # never zoom ratios
change = c(0, diff(discard)), # check for where the cutoffs
border = change == 1 | c(change[-1], 0) == -1, # identify borders
gap = c(0, change[-dplyr::n()]) == 1, # identify gaps next to one border so ggplot knows the line is interrupted
!!sym(time_info$column) := extrapolate_border(zoom_cutoff, border, change, !!sym(time_info$column), value), # calculate time at border
value = ifelse(border, zoom_cutoff, ifelse(gap, NA, value)) # assign values
) %>%
ungroup() %>%
dplyr::filter(!discard | border | gap) %>%
select(-discard, -change, -border, -gap)
# #%>%
# { # re-normalize after zooming if normalizing is turned on
# if (normalize) normalize_data(.) %>% mutate(zoom_cutoff = 1.0)
# else .
# }
} else .
} %>%
# switch to factors for proper grouping
{
data_levels <- tibble::deframe(select(., data, data_wo_units) %>% unique())
data_sorting <- sapply(select_data, function(x) which(data_levels == x)) %>% unlist(use.names = F)
mutate(.,
data = factor(data, levels = names(data_levels)[data_sorting]),
data_wo_units = factor(data_wo_units, levels = unique(as.character(data_levels)[data_sorting])))
} %>%
dplyr::arrange(tp, data)
# peaks table
if (!is.null(peak_table) && nrow(peak_table) > 0) {
plot_data <- iso_combine_raw_data_with_peak_table(
plot_data, peak_table,
file_id = !!enquo(file_id), data_trace = data,
rt = !!enquo(rt), rt_start = !!enquo(rt_start), rt_end = !!enquo(rt_end),
rt_unit = rt_unit)
}
# return
return(plot_data)
}
# raw data plots =====
#' Plot raw data from isoreader files
#'
#' Convenience function for making quick standard plots for raw isoreader data.
#' Calls \code{\link{iso_plot_continuous_flow_data}}, \code{\link{iso_plot_dual_inlet_data}} and \code{\link{iso_plot_scan_data}} for data specific plotting (see those functions for parameter details). For customizing plotting calls, it is recommended to use \code{\link{iso_plot_continuous_flow_data}}, \code{\link{iso_plot_dual_inlet_data}} and \code{\link{iso_plot_scan_data}} directly.
#'
#' @param iso_files collection of iso_file objects
#' @param ... parameters for the data specific plotting functions
#' @inheritParams iso_show_default_processor_parameters
#' @family plot functions
#' @export
iso_plot_raw_data <- function(iso_files, ..., quiet = default(quiet)) {
if(!iso_is_object(iso_files)) stop("can only plot iso files or lists of iso files", call. = FALSE)
iso_files <- iso_as_file_list(iso_files)
if (!quiet) sprintf("Info: plotting data from %d data file(s)", length(iso_files)) %>% message()
if (iso_is_continuous_flow(iso_files))
iso_plot_continuous_flow_data (iso_files, ...)
else if (iso_is_dual_inlet(iso_files))
iso_plot_dual_inlet_data (iso_files, ...)
else if (iso_is_scan(iso_files))
iso_plot_scan_data (iso_files, ...)
else
stop("plotting of this type of iso_files not yet supported", call. = FALSE)
}
# continuous flow ======
#' Plot chromatogram from continuous flow data
#'
#' This function provides easy plotting for mass and ratio chromatograms from continuous flow IRMS data. It can be called either directly with a set of \code{iso_file} objects, or with a data frame prepared for plotting chromatographic data (see \code{\link{iso_prepare_continuous_flow_plot_data}}).
#'
#' @param ... S3 method placeholder parameters, see class specific functions for details on parameters
#' @family plot functions
#' @export
iso_plot_continuous_flow_data <- function(...) {
UseMethod("iso_plot_continuous_flow_data")
}
#' @export
iso_plot_continuous_flow_data.default <- function(x, ...) {
stop("this function is not defined for objects of type '",
class(x)[1], "'", call. = FALSE)
}
#' @export
iso_plot_continuous_flow_data.iso_file <- function(iso_files, ...) {
iso_plot_continuous_flow_data(iso_as_file_list(iso_files), ...)
}
#' @inheritParams iso_prepare_continuous_flow_plot_data
#' @param peak_table a data frame that describes the peaks in this chromatogram. By default, the peak table from the \code{iso_files} is used if any peak features are requested in the plot (e.g. \code{peak_marker=TRUE} or \code{peak_bounds=TRUE}). If \code{peak_table} is supplied with non-standard column names, the following parameters must also be set correctly: \code{file_id}, \code{rt}, \code{rt_start}, \code{rt_end}, and potentially \code{rt_unit}.
#' @rdname iso_plot_continuous_flow_data
#' @export
iso_plot_continuous_flow_data.iso_file_list <- function(
iso_files, data = c(),
time_interval = c(), time_interval_units = "seconds",
filter = NULL,
normalize = FALSE, zoom = NULL,
panel = data, color = file_id, linetype = NULL, label = file_id,
peak_table = iso_get_peak_table(iso_files, quiet = TRUE), file_id = default(file_id),
rt = default(rt), rt_start = default(rt_start), rt_end = default(rt_end),
rt_unit = NULL,
peak_marker = FALSE, peak_bounds = FALSE, peak_bgrd = FALSE,
peak_label = NULL, peak_label_filter = NULL, peak_label_size = 2, peak_label_repel = 1) {
# safety checks
if(!iso_is_continuous_flow(iso_files))
stop("iso_plot_continuous_flow_data can only plot continuous flow iso_files", call. = FALSE)
# need peak table?
peak_table_quo <- enquo(peak_table)
peak_label_quo <- enquo(peak_label)
if (peak_marker || peak_bounds || peak_bgrd || !rlang::quo_is_null(peak_label_quo)) {
peak_table <- rlang::eval_tidy(peak_table_quo)
} else {
peak_table <- NULL
}
# retrieve data (with all info so additional aesthetics are easy to include)
panel_quo <- enquo(panel)
plot_data <- iso_prepare_continuous_flow_plot_data(
iso_files,
data = data,
include_file_info = everything(),
time_interval = time_interval,
time_interval_units = time_interval_units,
filter = !!enquo(filter),
normalize = normalize,
zoom = zoom,
zoom_group = !!panel_quo,
peak_table = peak_table,
file_id = !!enquo(file_id),
rt = !!enquo(rt),
rt_start = !!enquo(rt_start),
rt_end = !!enquo(rt_end),
rt_unit = rt_unit
)
# plot
iso_plot_continuous_flow_data(
plot_data,
panel = !!enquo(panel),
color = !!enquo(color),
linetype = !!enquo(linetype),
label = !!enquo(label),
peak_marker = peak_marker,
peak_bounds = peak_bounds,
peak_label = !!peak_label_quo,
peak_label_filter = !!enquo(peak_label_filter),
peak_label_size = peak_label_size,
peak_label_repel = peak_label_repel
)
}
#' @rdname iso_plot_continuous_flow_data
#' @param df a data frame of the chromatographic data prepared for plotting (see \code{\link{iso_prepare_continuous_flow_plot_data}})
#' @param panel whether to panel plot by anything - any column or complex expression is possible (see notes in the \code{filter} parameter for available raw data columns and \code{\link{iso_get_file_info}} for available file info columns) but the most commonly used options are \code{panel = NULL} (overlay all), \code{panel = data} (by mass/ratio data), \code{panel = file_id} (panel by files, alternatively use any appropriate file_info column or expression that's unique for each file). The default is panelling by the \code{data} column.
#' @param color whether to color plot by anything, options are the same as for \code{panel} but the default is \code{file_id}
#' @param linetype whether to differentiate by linetype, options are the same as for \code{panel} but the default is \code{NULL} (i.e. no linetype aesthetic). Note that a limited number of linetypes (6) is defined by default and the plot will fail if a higher number is required unless specified using \code{\link[ggplot2]{scale_linetype}}.
#' @param label this is primarily of use for turning the generated ggplots into interactive plots via \code{\link[plotly]{ggplotly}} as the \code{label} will be rendered as an additional mousover label. Any unique file identifier is a useful choice, the default is \code{file_id}.
#' @param peak_marker whether to mark identified peaks with a vertical line at the peak retention time. Only works if a \code{peak_table} was provided to identify the peaks and will issue a warning if \code{peak_marker = TRUE} but no peaks were identified.
#' @param peak_bounds whether to mark the boundaries of identified peaks with a vertical line at peak start and end retention times. Only works if a \code{peak_table} was provided to identify the peaks and will issue a warning if \code{peak_bounds = TRUE} but no peaks were identified.
#' @param peak_bgrd NOT YET IMPLEMENTED whether to show the background of identified peaks from start to end retention times. Only works if a \code{peak_table} was provided that has \code{bgrdX_start} and \code{bgrdX_end} columns in the same units as the raw data.
#' @param peak_label whether to label identified peaks. Any valid column or complex expression is supported and ALL columns in the provided \code{peak_table} can be used in this expression. The easiest way to generate well constructed peak labels is via the \code{\link{iso_format}} function. To provide more space for peak labels, it is sometimes useful to use a \code{zoom} value smaller than 1 to zoom out a bit, e.g. \code{zoom = 0.9}. If peak labels overlap, consider changing \code{peak_label_size} and/or \code{peak_label_repel}. Note that this only works if a \code{peak_table} was provided to identify the peaks and will issue a warning if \code{peak_label} is set but no peaks were identified. Also note that peaks whose value at the peak retention time is not visible on the panel due to e.g. a high \code{zoom} value will not have a visible label either.
#' @param peak_label_filter a filter for the peak labels (if supplied). Can be useful for highlighting only a subset of peaks with peak labels (e.g. only one data trace, or only those in a certain portion of the chromatogram). Only interpreted if \code{peak_table} is set.
#' @param peak_label_size the font size for the peak labels. Depends largely on how much data is shown and how busy the chromatograms are. Default is a rather small font size (2), adjust as needed.
#' @param peak_label_repel how strongly the labels repel each other. Increase the value if large labels overlap (e.g. to 5 or 10).
#' @export
iso_plot_continuous_flow_data.data.frame <- function(
df, panel = data, color = file_id, linetype = NULL, label = file_id,
peak_marker = FALSE, peak_bounds = FALSE, peak_bgrd = FALSE,
peak_label = NULL, peak_label_filter = NULL, peak_label_size = 2, peak_label_repel = 1
) {
# check for data
if (nrow(df) == 0) stop("no data provided", call. = FALSE)
# check for time column
time_info <- find_time_column(df)
# quos and other column checks
aes_quos <- list(panel = enquo(panel), color = enquo(color), linetype = enquo(linetype), label = enquo(label), peak_label = enquo(peak_label))
aes_cols <- get_column_names(
df,
file_id = quo("file_id"),
time_min = quo("time_min"), time_max = quo("time_max"),
is_ratio = quo("is_ratio"), data = quo("data"), value = quo("value"))
peak_cols <- c("peak_marker", "peak_point", "peak_start", "peak_end") %in% names(df)
check_expressions(df, aes_quos$color, aes_quos$linetype, aes_quos$label, aes_quos$panel)
# add panel column to allow expressions
if (!quo_is_null(aes_quos$panel)) {
df <- mutate(df, ..panel = !!aes_quos$panel)
}
# find overall plot parameters from data frame
normalize <- col_in_df(df, "normalized") & df$normalized[1]
zoom <- col_in_df(df, "zoom_cutoff") & !all(is.na(df$zoom_cutoff[1]))
if (col_in_df(df, "normalize")) stopifnot(all(df$normalized == df$normalized[1])) # should be a single value
# generate plot
p <- ggplot(df) +
aes(!!sym(time_info$column), value, group = paste(file_id, data)) +
scale_x_continuous(str_c("Time ", time_info$unit), expand = c(0, 0)) +
scale_y_continuous(if(normalize) "Normalized Signal" else "Signal", expand = c(0, 0)) +
theme_bw()
# peak cols safety check
if (!all(peak_cols) && (peak_marker || peak_bounds || !quo_is_null(aes_quos$peak_label))) {
peak_marker <- FALSE
peak_bounds <- FALSE
aes_quos$peak_label <- quo(NULL)
glue::glue(
"peak features requested but peak identifications seem to be missing - ",
"ignoring all peak feature parameters. Please make sure to provide a peak_table.") %>%
warning(immediate. = TRUE, call. = FALSE)
}
if (!quo_is_null(aes_quos$peak_label)) {
check_expressions(df, aes_quos$peak_label)
}
# peak boundaries - consider making this an area background
if (peak_bounds && nrow(dplyr::filter(df, peak_point > 0 & (peak_start | peak_end))) > 0) {
p <- p +
geom_rect(
data = function(df) dplyr::filter(df, peak_point > 0 & (peak_start | peak_end)) %>%
dplyr::group_by(peak_point) %>%
dplyr::mutate(xmin = ifelse(any(peak_start),
min((!!sym(time_info$column))[peak_start]), -Inf),
xmax = ifelse(any(peak_end),
max((!!sym(time_info$column))[peak_end]), Inf)) %>%
dplyr::ungroup() %>%
dplyr::filter(!is.infinite(xmin) | !is.infinite(xmax)) %>%
# collapse double entries without loosing any other potentially important aesthetics info
dplyr::select(-!!sym(time_info$column), -value, -peak_start, -peak_end, -peak_marker) %>%
{ if ("tp" %in% names(.)) dplyr::select(., -tp) else . } %>%
unique(),
mapping = aes(x = NULL, y = NULL, xmin = xmin, xmax = xmax, ymin = -Inf, ymax = Inf, color = NULL),
fill = "grey20", color = NA, alpha = 0.1, show.legend = FALSE
)
}
# peak markers
if (peak_marker) {
p <- p +
geom_vline(
data = function(df) dplyr::filter(df, peak_marker),
mapping = aes(xintercept = !!sym(time_info$column), color = NULL),
color = "black", linetype = 2
)
}
# peak backgrounds
# NOT YET IMPLEMENTED
# note that this requires some scaling (just like the time units) to do it right
if (peak_bgrd) {
warning("sorry, peak bgrds are not yet implemented", call. = FALSE, immediate. = FALSE)
}
# draw chromatograms
p <- p + geom_line()
# peak labels
if (!quo_is_null(aes_quos$peak_label)) {
peak_label_filter_quo <- enquo(peak_label_filter)
has_any_labels <-
dplyr::filter(df, peak_marker) %>%
{
if(!quo_is_null(peak_label_filter_quo))
dplyr::filter(., !!peak_label_filter_quo)
else
.
} %>%
nrow()
if (has_any_labels > 0) {
p <- p +
ggrepel::geom_label_repel(
data = function(df)
dplyr::filter(df, peak_marker) %>%
{
if(!quo_is_null(peak_label_filter_quo))
dplyr::filter(., !!peak_label_filter_quo)
else
.
},
mapping = aes_(label = aes_quos$peak_label),
show.legend = FALSE,
force = peak_label_repel,
#box.padding = 1,
min.segment.length = 0,
size = peak_label_size,
segment.color = "black",
segment.alpha = 0.5,
segment.size = 0.5,
direction = "both"
)
}
}
# zoom ghost points to make sure the zooming frame remains the same (if zoom is set)
if (zoom) {
panel_zoom_group <- if (!rlang::quo_is_null(aes_quos$panel)) quo(..panel) else quo(1)
get_column_names(df, baseline = quo(baseline)) # check that baseline exists
p <- p +
geom_point(data = function(df)
df %>%
group_by(!!panel_zoom_group) %>%
summarize(time = mean(!!sym(time_info$column), na.omit = TRUE), value = min(baseline, na.rm = TRUE)) %>%
dplyr::filter(!is.na(value)),
mapping = aes(x = time, y = value), inherit.aes = FALSE,
size = 0, alpha = 1, show.legend = FALSE) +
geom_point(data = function(df)
df %>%
group_by(!!panel_zoom_group) %>%
summarize(time = mean(!!sym(time_info$column), na.omit = TRUE), value = max(zoom_cutoff, na.rm = TRUE)) %>%
dplyr::filter(!is.na(value)),
mapping = aes(x = time, y = value), inherit.aes = FALSE,
size = 0, alpha = 1, show.legend = FALSE)
}
# display full time scale
if (!is.infinite(df$time_min[1]))
p <- p + expand_limits(x = df$time_min[1])
if (!is.infinite(df$time_max[1]))
p <- p + expand_limits(x = df$time_max[1])
# normalize plot y axis
if (normalize)
p <- p + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
# paneling
if (!quo_is_null(aes_quos$panel))
p <- p + facet_grid(..panel ~ ., scales = "free_y")
# color
if (!quo_is_null(aes_quos$color))
p <- p %+% aes_(color = aes_quos$color)
# linetype
if (!quo_is_null(aes_quos$linetype))
p <- p %+% aes_(linetype = aes_quos$linetype)
# label
if (!quo_is_null(aes_quos$label))
p <- p %+% aes_(label = aes_quos$label)
# return plot
return(p)
}
# dual inlet ========
#' Plot mass data from dual inlet files
#'
#' @inheritParams iso_plot_continuous_flow_data
#' @param filter any filter condition to apply to the data beyond the masses/ratio selection (param \code{data}) and time interval (param \code{time_interval}). For details on the available data columns see \link[isoreader]{iso_get_raw_data} with parameters \code{gather = TRUE} and \code{include_file_info = everything()} (i.e. all file info is available for plotting aesthetics).
#' @param panel whether to panel data by anything - any data column is possible (see notes in the \code{filter} parameter) but the most commonly used options are \code{panel = NULL} (overlay all), \code{panel = data} (by mass/ratio data), \code{panel = file_id} (panel by files, alternatively use any appropriate file_info column), and \code{panel = type} (panel by sample vs standard). Additionally it is possible to panel two variables against each other (i.e. use a \link[ggplot2]{facet_grid}), e.g. by specifying the formula \code{panel = data ~ file_id} (data in the panel rows, files in the panel columns) or \code{panel = data ~ type}.The default for this parameter is simple panelling by \code{data}.
#' @param shape whether to shape data points by anything, options are the same as for \code{panel} but the default is \code{type} (sample vs standard).
#' @param ... deprecated parameters
#' @note normalization is not useful for dual inlet data, except potentially between standard and sample - however, for this it is more meaningful to simply plot the relevant ratios together
#' @family plot functions
#' @export
iso_plot_dual_inlet_data <- function(
iso_files, data = c(), filter = NULL,
panel = data, color = file_id, linetype = NULL, shape = type, label = file_id,
...) {
# checks
if(!iso_is_dual_inlet(iso_files))
stop("iso_plot_dual_inlet_data can only plot dual inlet iso_files", call. = FALSE)
# check for deprecated parameters
dots <- list(...)
old <- c("panel_by", "color_by", "linetype_by", "shape_by")
if (any(old %in% names(dots))) {
glue("deprecated parameter(s): ",
"'{collapse(old[old %in% names(dots)], sep=\"', '\")}' ",
"- please check the function documentation for details on ",
"the updated parameters") %>%
stop(call. = FALSE)
}
if (length(dots) > 0) {
glue("unkown parameter(s): ",
"'{collapse(names(dots), sep=\"', '\")}' ") %>%
stop(call. = FALSE)
}
# global vars
cycle <- value <- type <- data_wo_units <- NULL
# collect raw data
raw_data <- iso_get_raw_data(iso_files, gather = TRUE, quiet = TRUE, include_file_info = everything())
if (nrow(raw_data) == 0) stop("no raw data in supplied iso_files", call. = FALSE)
# check for column existence
aes_quos <- list(panel = enquo(panel), color = enquo(color),
linetype = enquo(linetype), shape = enquo(shape),
label = enquo(label))
aes_cols <- list()
check_expressions(raw_data, aes_quos$color, aes_quos$linetype, aes_quos$shape, aes_quos$label)
if (quo_is_null(aes_quos$panel)) {
# no panel
aes_cols$panel <- c()
} else if (quo_is_symbol(aes_quos$panel)) {
# single symbol --> facet_wrap
aes_cols <- c(aes_cols, get_column_names(raw_data, panel = aes_quos$panel))
} else {
# formula --> facet_grid
aes_cols <- c(aes_cols, get_column_names(
raw_data,
panel_rows = aes_quos$panel %>% rlang::quo_squash() %>% rlang::f_lhs(),
panel_cols = aes_quos$panel %>% rlang::quo_squash() %>% rlang::f_rhs()))
}
# only work with desired data (masses and ratios)
select_data <- if(length(data) == 0) unique(raw_data$data) else as.character(data)
if ( length(missing <- setdiff(select_data, unique(raw_data$data))) > 0 )
stop("data not available in the provided iso_files: ", str_c(missing, collapse = ", "), call. = FALSE)
raw_data <- dplyr::filter(raw_data, data %in% select_data)
# general filter
filter_quo <- enquo(filter)
if (!quo_is_null(filter_quo)) {
raw_data <- dplyr::filter(raw_data, !!filter_quo)
}
# plot data
plot_data <-
raw_data %>%
# data with units and in correct order
mutate(
data_wo_units = data,
data = ifelse(!is.na(units), str_c(data, " [", units, "]"), data)
) %>% {
data_levels <- tibble::deframe(select(., data, data_wo_units) %>% unique())
data_sorting <- sapply(select_data, function(x) which(data_levels == x)) %>% unlist(use.names = F)
mutate(., data = factor(data, levels = names(data_levels)[data_sorting]))
}
# generate plot
group_quos <- list(quo(file_id))
p <- plot_data %>%
ggplot() +
aes(cycle, value) +
geom_line() +
geom_point(size = 2) +
scale_x_continuous("Cycle", breaks = c(0:max(plot_data$cycle))) +
scale_y_continuous("Signal") +
theme_bw()
# paneling
if (!quo_is_null(aes_quos$panel)) {
if (quo_is_symbol(aes_quos$panel))
p <- p + facet_wrap(rlang::new_formula(NULL, sym(aes_cols$panel)), scales = "free_y")
else
p <- p + facet_grid(rlang::new_formula(sym(aes_cols$panel_rows), sym(aes_cols$panel_cols)), scales = "free_y")
}
# color
if (!quo_is_null(aes_quos$color)) {
p <- p %+% aes_(color = aes_quos$color)
group_quos <- c(group_quos, aes_quos['color'])
}
# linetype
if (!quo_is_null(aes_quos$linetype)) {
p <- p %+% aes_(linetype = aes_quos$linetype)
group_quos <- c(group_quos, aes_quos['linetype'])
}
# shape_by
if (!quo_is_null(aes_quos$shape)) {
p <- p %+% aes_(shape = aes_quos$shape)
group_quos <- c(group_quos, aes_quos['shape'])
}
# group quo
p <- p %+% aes_(group = quo(paste(!!!group_quos)))
# label
if (!quo_is_null(aes_quos$label))
p <- p %+% aes_(label = aes_quos$label)
# return plot
return(p)
}
# scan ======
#' Plot data from scan files
#'
#' This function provides easy plotting for mass and ratio traces from IRMSs scan data. It can be called either directly with a set of \code{iso_file} objects, or with a data frame prepared for plotting scan data (see \code{\link{iso_prepare_scan_plot_data}}).
#'
#' @param ... S3 method placeholder parameters, see class specific functions for details on parameters
#' @family plot functions
#' @export
iso_plot_scan_data <- function(...) {
UseMethod("iso_plot_scan_data")
}
#' @export
iso_plot_scan_data.default <- function(x, ...) {
stop("this function is not defined for objects of type '",
class(x)[1], "'", call. = FALSE)
}
#' @export
iso_plot_scan_data.iso_file <- function(iso_files, ...) {
iso_plot_scan_data(iso_as_file_list(iso_files), ...)
}
#' @inheritParams iso_prepare_scan_plot_data
#' @rdname iso_plot_scan_data
#' @export
iso_plot_scan_data.iso_file_list <- function(
iso_files, data = c(), type, filter = NULL,
x_interval = c(), y_interval = c(),
panel = file_id, color = data, linetype = NULL, label = data, ...) {
# safety checks
if(!iso_is_scan(iso_files))
stop("iso_plot_scan_data can only plot scan iso_files", call. = FALSE)
# retrieve data (with all info so additional aesthetics are easy to include)
plot_data <- iso_prepare_scan_plot_data(
iso_files,
data = data,
include_file_info = everything(),
filter = !!enquo(filter)
)
# plot scan data
iso_plot_scan_data(
plot_data,
type = !!rlang::enquo(type),
x_interval = x_interval,
y_interval = y_interval,
panel = !!enquo(panel),
color = !!enquo(color),
linetype = !!enquo(linetype),
label = !!enquo(label),
...
)
}
#' @rdname iso_plot_scan_data
#' @param df a data frame of the scan data prepared for plotting (see \code{\link{iso_prepare_scan_plot_data}})
#' @param type which type of scan data to plot. Only required if there are more than one type of scan data.
#' @param x_interval optional constraints on x axis values
#' @param y_interval optional constraints on y axis values
#' @param ... additional parameters passed on to \link{iso_plot_data}
#' @inheritParams iso_prepare_scan_plot_data
#' @inheritParams iso_plot_data
#' @export
iso_plot_scan_data.data.frame <- function(
df, type, x_interval = c(), y_interval = c(),
panel = file_id, color = data, linetype = NULL, label = data, ...) {
# check for data
if (nrow(df) == 0) stop("no data provided", call. = FALSE)
# check for type
type_quo <- rlang::enquo(type)
types <- unique(df$type)
if (rlang::quo_is_missing(type_quo) && length(types) > 1) {
# too many types
sprintf(
"found more than 1 type of scan file: '%s'. Please specify which type to plot by setting the 'type' parameter to one of these options: %s",
paste(types, collapse = "', '"),
paste(sprintf("'type = \"%s\"'", types), collapse = " or ")
) %>% stop(call. = FALSE)
} else if (!rlang::quo_is_missing(type_quo)) {
# type defined, let's filter by it
filter_type <- rlang::eval_tidy(type_quo)
df <- dplyr::filter(df, .data$type == !!filter_type)
if (nrow(df) == 0) {
sprintf("no data for type '%s'. Available type: '%s'",
filter_type, paste(types, collapse = "', '")) %>%
stop(call. = FALSE)
}
}
# x interval safety checks
if (length(x_interval) > 0 && (!is.numeric(x_interval) || length(x_interval) != 2)) {
stop("x interval needs to be a vector with two numeric entries, found: ", str_c(x_interval, collapse = ", "), call. = FALSE)
}
# y interval safety checks
if (length(y_interval) > 0 && (!is.numeric(y_interval) || length(y_interval) != 2)) {
stop("y interval needs to be a vector with two numeric entries, found: ", str_c(y_interval, collapse = ", "), call. = FALSE)
}
# adjust y scale if x is set but y is not
if (length(x_interval) == 2 && is.numeric(x_interval) && !(length(y_interval) == 2 && is.numeric(y_interval))) {
df <- df %>%
dplyr::arrange(x) %>%
dplyr::group_by(file_id, data) %>%
dplyr::mutate(
..discard = x < x_interval[1] | x > x_interval[2],
..change = c(0, diff(..discard)),
..border = ..change == 1 | c(..change[-1], 0) == -1
) %>%
dplyr::ungroup() %>%
dplyr::filter(!..discard | ..border) %>%
dplyr::select(-..discard, -..change, -..border)
}
# x axis label
x_lab <- sprintf("%s [%s]", df$type, df$x_units) %>% unique()
if (length(x_lab) > 1) x_lab <- unique(df$type)
# generate plot
p <- iso_plot_data(
df, x, value, group = paste(file_id, data),
color = !!enquo(color), linetype = !!enquo(linetype), label = !!enquo(label),
panel = !!enquo(panel), lines = TRUE, ...
) +
scale_x_continuous(expand = c(0, 0)) +
labs(x = x_lab, y = "Signal")
# x and y intervals
if (length(x_interval) == 2 && length(y_interval) == 2) {
p <- p + coord_cartesian(xlim = x_interval, ylim = y_interval) +
scale_y_continuous(expand = c(0, 0))
} else if (length(x_interval) == 2) {
p <- p + coord_cartesian(xlim = x_interval)
} else if (length(y_interval) == 2) {
p <- p + coord_cartesian(ylim = y_interval) +
scale_y_continuous(expand = c(0, 0))
}
# return plot
return(p)
}
#' Prepare plotting data from scan files
#'
#' This function helps with the preparation of plotting data from scan files. Call either explicity and pass the result to \code{\link{iso_plot_scan_data}} or let \code{\link{iso_plot_scan_data}} take care of preparing the plotting data directly from the \code{iso_files}.
#'
#' @param iso_files collection of iso_file objects
#' @param data which masses and ratios to plot (e.g. \code{c("44", "45", "45/44")} - without the units), if omitted, all available masses and ratios are plotted. Note that ratios should be calculated using \code{\link{iso_calculate_ratios}} prior to plotting.
#' @param include_file_info which file information to include (see \link[isoreader]{iso_get_file_info}). Use c(...) to select multiple, supports all \link[dplyr]{select} syntax including renaming columns.
#' @param filter any filter condition to apply to the data beyond the masses/ratio selection (param \code{data}) and time interval (param \code{time_interval}). For details on the available data columns see \link[isoreader]{iso_get_raw_data} with parameters \code{gather = TRUE} and \code{include_file_info = everything()} (i.e. all file info is available for plotting aesthetics).
#'
#' @family plot functions
#' @export
iso_prepare_scan_plot_data <- function(
iso_files, data = c(), include_file_info = type, filter = NULL) {
# safety checks
if(!iso_is_scan(iso_files))
stop("can only prepare scan iso_files for plotting", call. = FALSE)
# collect raw data
raw_data <- iso_get_raw_data(iso_files, gather = TRUE, quiet = TRUE)
if (nrow(raw_data) == 0) stop("no raw data in supplied iso_files", call. = FALSE)
# add in file info
file_info <- iso_get_file_info(iso_files, select = !!enquo(include_file_info), quiet = TRUE)
if (!"type" %in% names(file_info)) {
stop("'type' must be included in the file information", call. = FALSE)
}
raw_data <- dplyr::left_join(raw_data, file_info, by = "file_id")
# only work with desired data
available_data <- unique(raw_data$data)
select_data <- if(length(data) == 0) available_data else as.character(data)
if ( length(missing <- setdiff(select_data, unique(raw_data$data))) > 0 )
stop("data not available in the provided iso_files (don't include units): ", str_c(missing, collapse = ", "), call. = FALSE)
raw_data <- dplyr::filter(raw_data, .data$data %in% select_data)
# general filter
filter_quo <- enquo(filter)
if (!quo_is_null(filter_quo)) {
raw_data <- dplyr::filter(raw_data, !!filter_quo)
if (nrow(raw_data) == 0) {
sprintf("no data left with filter '%s'", rlang::as_label(filter_quo)) %>%
stop(call. = FALSE)
}
}
# plot data
plot_data <-
raw_data %>%
# add units to data for proper grouping
dplyr::mutate(
data_wo_units = .data$data,
data = ifelse(!is.na(.data$units), paste0(.data$data, " [", .data$units, "]"), .data$data)
) %>%
dplyr::select(1:.data$category, .data$data, .data$data_wo_units, everything())
# switch to factors for proper grouping
data_levels <- tibble::deframe(select(plot_data, data, data_wo_units) %>% unique())
data_sorting <- purrr::map_int(select_data, ~which(data_levels == .x)) %>% unlist(use.names = FALSE)
plot_data <- plot_data %>%
dplyr::mutate(
data = factor(data, levels = names(data_levels)[data_sorting]),
data_wo_units = factor(data_wo_units, levels = unique(as.character(data_levels)[data_sorting]))
)
# return
return(plot_data)
}
# reference peaks =========
#' Plot reference peaks
#'
#' Visualize how consistent the reference peaks are across a serious of samples.
#'
#' @inheritParams iso_prepare_for_calibration
#' @param x which column to use for the x-axis
#' @param ratio which ratio column(s) to compare for the reference peaks (can be multiple)
#' @param group_id group identifier column(s) to clarify across which data groups the reference peak deviation should be calcualted. By default calculates reference peak variations within each analysis.
#' @param is_ref_condition condition to identify which of the peaks are reference peaks (unless the peaks are prefilterd already). Must be a column or expression that evaluates to a logical (TRUE/FALSE).
#' @param within_group deprectated, use \code{group_id} to group accordingly
#' @param is_ref_used deprecated, set an aesthetics directly via ... paramter to \link{iso_plot_data}
#' @param ... additional parameters passed to \link{iso_plot_data}
#' @family plot functions
#' @export
iso_plot_ref_peaks <- function(dt, x, ratio, ..., group_id = file_id, is_ref_condition = TRUE, within_group = TRUE, is_ref_used = NULL) {
# safety checks
param_quos <-
list(dt = enquo(dt), x = enquo(x), ratio = enquo(ratio), group_id = enquo(group_id),
is_ref_condition = enquo(is_ref_condition))
check_params <-
c(
dt = "no data table supplied",
x = "no x axis value supplied",
ratio = "no ratio column to compare reference peaks provided"
)
missing <- param_quos[names(check_params)] %>% map_lgl(quo_is_missing)
if (any(missing)) {
glue("missing parameter(s) '{collapse(names(check_params)[missing], sep = \"', '\")}':\n",
" - {collapse(check_params[missing], sep = '\n - ')}") %>%
stop(call. = FALSE)
}
# evaluate dt
dt <- rlang::eval_tidy(param_quos$dt)
# warnings
if(!missing(within_group))
warning("'within_gorup' parameter is deprecated, use 'group_id' to group accordingly", immediate. = TRUE, call. = FALSE)
if(!missing(is_ref_used))
warning("'is_ref_used' parameter is deprecated, please set an aesthetic directly by using the ... passed on to iso_plot_data", immediate. = TRUE, call. = FALSE)
# filter condition
refs <- filter(dt, !!param_quos$is_ref_condition)
if (nrow(refs) == 0)
glue::glue("no data to visualize, check your data table and is_ref_condition filter ('{rlang::as_label(param_quos$is_ref_condition)}')") %>%
stop(call. = FALSE)
# ratios
dt_cols <- get_column_names(dt, x = param_quos$x, ratio = param_quos$ratio, group_id = param_quos$group_id, n_reqs = list(group_id = "*", ratio = "+"))
# calculate ratio deltas
mutate_quos <-
map(dt_cols$ratio, ~quo( (!!sym(.x) / mean(!!sym(.x), na.rm = TRUE) - 1) * 1000)) %>%
setNames(names(dt_cols$ratio))
refs <- refs %>%
group_by(!!!map(dt_cols$group_id, sym)) %>%
mutate(!!!mutate_quos) %>%
ungroup()
# visualize
iso_plot_data(
refs, x = !!sym(dt_cols$x), y = c(!!!map(names(mutate_quos), sym)),
...,
geom_bar(stat = "identity", position = "dodge")
) + labs(y = "Deviation from average [\U2030]")
}
# data and calibration plots =========
#' Plot calibration range
#'
#' This function is deprecated, please use \link{iso_plot_data} and \link{iso_mark_calibration_range} instead.
#' @param ... deprecated
#' @export
iso_plot_calibration_range <- function(...) {
warning("iso_plot_calibration_range was deprecated as part of a major change in treatment of calibration ranges in isoprocessor version 0.3.8. Please use iso_evaluate_calibration_range to calculate calibration ranges for your terms of interest, iso_plot_data to visualize the data, and iso_mark_calibration_range to highlight the calculated calibration ranges visually.", immediate. = TRUE, call. = FALSE)
}
#' Visualize the data
#'
#' General purpose convenience visualization function. Simply add other ggplot components after calling this function to customize more (e.g. with \link[ggplot2]{facet_wrap} or \link[ggplot2]{theme} calls). Make sure to specify \code{lines = TRUE} and/or \code{points = TRUE} to add the lines/points respectively. Accepts multiple y variables in which case they are plotted in a \link[ggplot2]{facet_wrap} with new variables \code{panel} holding the name of the y variable panels, \code{y_value} holding the values and \code{y_error} holding the error values (if \code{y_error} is supplied). Also always generates a new column called \code{variable} that holds the variable names (\code{y}) supplied to this function. All aesthetics parameters expect variables or expressions that are valid in the context of the \code{dt}. For convenience, all aesthetics can also be (re)-named on the fly with \code{c(new_name = expr)} and will include column units in the legend captions by default.
#'
#' @param dt data frame to plot data from
#' @param x the column or expression for the x-axis aesthetic. Can be a numeric, text or datetime column (text and datetime column labels will be automatically rotated by 90 degrees). For clarity of the plot, only one x variable or expression is allowed. Use named vector with \code{c(new_x = x)} to rename the x variable or expression on the fly. By default will show units in the axis names if there are any. If a datetime column is provided for \code{x}, parameters \code{date_breaks} (example: \code{date_breaks = "2 hours"}) and \code{date_labels} can be set to fine-tune the x-axis appearance. See \link[ggplot2]{scale_date} for additional details. Note that \code{x} can also be \code{x = NULL} for single data point plots - in this case the x axis is completely omitted.
#' @param y which columns/expressions to visualize. Combine with \code{c(y1, y2)} or use \link[dplyr]{select} syntax (e.g. \code{starts_with(...)}) to show multiple variables in a \link[ggplot2]{facet_wrap}. Use named vector with \code{c(new_y1 = y1)} to rename variables/expressions on the fly. By default will show units in the axis names if there are any.
#' @param group what to group by, multiple columns allowed (combine with \code{paste(...)}), usually not necessary if groupings are fully defined through other aesthetics
#' @param color variable to use for color aesthetic for the plot or constant value for the point and line color
#' @param fill variable to use for the fill aesthetic of the plot or constant value for the point fill
#' @param shape variable to use for shape aesthetic for the plot or constant vlaue for the point shape
#' @param size variable to use for size aesthetic for the plot or constant value for the points size
#' @param linetype variable to use for linetype aesthetic for the plot or constant value for the line type
#' @param alpha variable to use for the opacity aesthetic for the plot or constant value for the point and line opacity (1 = 100\% opaque, 0 = completely transparent)
#' @param y_error an error column for drawing y error bars - if multiple \code{y} are provided, error needs to point to the same number of columns
#' @param lines whether to plot lines (FALSE by default)
#' @param points whether to plot points (FALSE by default)
#' @param label this is primarily of use for turning the generated ggplots into interactive plots via \code{\link[plotly]{ggplotly}} as the \code{label} will be rendered as an additional mousover label.
#' @param panel whether to panel the data by anything. If using a single parameter (e.g. \code{panel = panel}), will generate a \link[ggplot2]{facet_wrap}. If using a formula (e.g. \code{panel = panel ~ .} or \code{panel = file_id ~ panel}), will generate a \link[ggplot2]{facet_grid}. The default for this parameter is to panel via facet grid by the y variable name but only if multiple \code{y} columns are provided. Otherwise will not generate any facets. If additional facet parameters are desired, please leave use \link[ggplot2]{facet_wrap} and \link[ggplot2]{facet_grid} diretly.
#' @param panel_scales the \code{scales} parameter for the facets (if any are used)
#' @param date_breaks what breaks to use for the x axis if it is a datetime
#' @param date_labels datetime label pattern for x axis if it is a datetime
#' @param ... additional ggplot objects (e.g. \code{geom_smooth()}) that should be added to the plot PRIOR to the automatically generated layers for error bars, points and lines. Notet that to add geoms on top, please use regular \code{iso_plot_data() + geom_smooth() + ...} syntax instead.
#' @family plot functions
#' @export
iso_plot_data <- function(
# aesthetics
dt, x, y,
# additional geom
...,
# optional arguments
y_error = NULL, group = NULL,
color = NULL, fill = NULL, shape = NULL, size = 4,
linetype = NULL, alpha = NULL, label = NULL,
# panels
panel = panel ~ ., panel_scales = "free_y",
# geoms
lines = FALSE, points = FALSE,
# styling
date_breaks = NULL, date_labels = "%d %b %H:%M") {
# safety checks
if (missing(dt)) stop("no data table supplied", call. = FALSE)
if (nrow(dt) == 0) stop("the provided data table has no data to plot (0 rows)", call. = FALSE)
if (missing(x)) stop("have to provide an x variable or expression to plot", call. = FALSE)
if (missing(y)) stop("have to provide at least one y variable or expression to plot", call. = FALSE)
#check additional geoms for issues