-
Notifications
You must be signed in to change notification settings - Fork 0
/
aggregate.R
116 lines (99 loc) · 3.16 KB
/
aggregate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#' Sample aggregation
#' @rdname aggregate
#' @description Aggregation of sample features based on a grouping variable.
#' @param d S4 object of class `AnalysisData`
#' @param cls info columns across which to aggregate the data
#' @return An S4 object of class `AnalysisData` containing the aggregated data.
#' @details
#' Sample aggregation allows the electronic pooling of sample features based on a grouping variable.
#' This is useful in situations such as the presence of technical replicates that can be aggregated to reduce the effects of pseudo replication.
#' @section Methods:
#' * `aggregateMean`: Aggregate sample features to the group mean.
#' * `aggregateMedian`: Aggregate sample features to the group median.
#' * `aggregateSum`: Aggregate sample features to the group total.
#' @examples
#' ## Each of the following examples shows the application of the aggregation method and then
#' ## a Principle Component Analysis is plotted to show it's effect on the data structure.
#'
#' ## Initial example data preparation
#' library(metaboData)
#'
#' d <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
#' occupancyMaximum(occupancy = 2/3)
#'
#' d %>%
#' plotPCA(cls = 'day')
#'
#' ## Mean aggregation
#' d %>%
#' aggregateMean(cls = c('day','class')) %>%
#' plotPCA(cls = 'day',ellipses = FALSE)
#'
#' ## Median aggregation
#' d %>%
#' aggregateMedian(cls = c('day','class')) %>%
#' plotPCA(cls = 'day',ellipses = FALSE)
#'
#' ## Sum aggregation
#' d %>%
#' aggregateSum(cls = c('day','class')) %>%
#' plotPCA(cls = 'day',ellipses = FALSE)
#' @export
setGeneric("aggregateMean", function(d,cls = 'class')
standardGeneric("aggregateMean")
)
#' @rdname aggregate
setMethod('aggregateMean',signature = 'AnalysisData',
function(d,cls = 'class'){
d <- aggregate(d,'mean',cls)
return(d)
}
)
#' @rdname aggregate
#' @export
setGeneric("aggregateMedian", function(d,cls = 'class')
standardGeneric("aggregateMedian")
)
#' @rdname aggregate
setMethod('aggregateMedian',signature = 'AnalysisData',
function(d, cls = 'class'){
d <- aggregate(d,'median',cls)
return(d)
}
)
#' @rdname aggregate
#' @export
setGeneric("aggregateSum", function(d,cls = 'class')
standardGeneric("aggregateSum")
)
#' @rdname aggregate
#' @importFrom dplyr arrange_all group_by_all
setMethod('aggregateSum',signature = 'AnalysisData',
function(d,cls = 'class'){
d <- aggregate(d,'sum',cls)
return(d)
}
)
aggregate <- function(d,method,cls){
aggregateMethod <- switch(method,
mean = mean,
median = median,
sum = sum)
sample_info <- d %>%
sinfo() %>%
select(all_of(cls))
dat(d) <- d %>%
dat() %>%
bind_cols(sample_info) %>%
gather('Feature','Intensity',-all_of(cls)) %>%
group_by(across(all_of(cls)),Feature) %>%
summarise(Intensity = aggregateMethod(Intensity)) %>%
ungroup() %>%
spread(Feature,Intensity) %>%
select(-all_of(cls))
sinfo(d) <- sample_info %>%
group_by_all() %>%
summarise() %>%
arrange_all()
return(d)
}