Skip to content

Commit 80181f5

Browse files
author
Jasen Finch
authored
Merge pull request #86 from jasenfinch/devel
v0.14.9
2 parents e17b352 + 882ea24 commit 80181f5

File tree

147 files changed

+1001
-263
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

147 files changed

+1001
-263
lines changed

DESCRIPTION

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: metabolyseR
22
Title: Methods for Pre-Treatment, Data Mining and Correlation Analyses of Metabolomics Data
3-
Version: 0.14.8
3+
Version: 0.14.9
44
Authors@R: person("Jasen", "Finch", email = "jsf9@aber.ac.uk", role = c("aut", "cre"))
55
Description: A tool kit for pre-treatment, modelling, feature selection and correlation analyses of metabolomics data.
66
URL: https://jasenfinch.github.io/metabolyseR
@@ -57,6 +57,7 @@ Collate: allClasses.R
5757
info.R
5858
join.R
5959
keep.R
60+
mds.R
6061
metabolyse.R
6162
metabolyseR.R
6263
modelling.R
@@ -80,6 +81,7 @@ Collate: allClasses.R
8081
remove.R
8182
randomForest.R
8283
rsd.R
84+
roc.R
8385
show-method.R
8486
split.R
8587
transform.R

NAMESPACE

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ export(keepClasses)
4343
export(keepFeatures)
4444
export(keepSamples)
4545
export(linearRegression)
46+
export(mds)
4647
export(metabolyse)
4748
export(metrics)
4849
export(modellingMethods)
@@ -72,13 +73,15 @@ export(preTreated)
7273
export(preTreatmentElements)
7374
export(preTreatmentMethods)
7475
export(preTreatmentParameters)
76+
export(proximity)
7577
export(randomForest)
7678
export(raw)
7779
export(reAnalyse)
7880
export(removeClasses)
7981
export(removeFeatures)
8082
export(removeSamples)
8183
export(response)
84+
export(roc)
8285
export(rsd)
8386
export(sinfo)
8487
export(split)
@@ -113,6 +116,7 @@ importFrom(crayon,green)
113116
importFrom(crayon,red)
114117
importFrom(crayon,yellow)
115118
importFrom(doFuture,registerDoFuture)
119+
importFrom(dplyr,across)
116120
importFrom(dplyr,arrange)
117121
importFrom(dplyr,arrange_all)
118122
importFrom(dplyr,bind_cols)
@@ -124,14 +128,18 @@ importFrom(dplyr,full_join)
124128
importFrom(dplyr,group_by)
125129
importFrom(dplyr,group_by_all)
126130
importFrom(dplyr,group_by_at)
131+
importFrom(dplyr,group_map)
127132
importFrom(dplyr,left_join)
128133
importFrom(dplyr,mutate)
129134
importFrom(dplyr,mutate_all)
130135
importFrom(dplyr,mutate_at)
136+
importFrom(dplyr,mutate_if)
131137
importFrom(dplyr,n)
138+
importFrom(dplyr,relocate)
132139
importFrom(dplyr,rename)
133140
importFrom(dplyr,rowwise)
134141
importFrom(dplyr,select)
142+
importFrom(dplyr,select_if)
135143
importFrom(dplyr,summarise)
136144
importFrom(dplyr,summarise_all)
137145
importFrom(dplyr,ungroup)

NEWS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
# metabolyseR 0.14.9
2+
3+
* Suppressed name repair console message encountered during random forest permutation testing.
4+
5+
* Added the [`proximity()`](https://jasenfinch.github.io/metabolyseR/reference/modelling-accessors.html) method for extracting sample proximities from the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class.
6+
7+
* Added the [`mds()`](https://jasenfinch.github.io/metabolyseR/reference/mds.html) method to perform multidimensional scaling on sample proximities from the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class.
8+
9+
* Added the [`roc()`](https://jasenfinch.github.io/metabolyseR/reference/roc.html) method to calculate receiver-operator characteristic curves from the [`RandomForest`](https://jasenfinch.github.io/metabolyseR/reference/RandomForest-class.html) S4 class.
10+
111
# metabolyseR 0.14.8
212

313
* An error is now thrown during random forest classification when less than two classes are specified.

R/mds.R

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#' Multidimensional scaling (MDS)
2+
#' @rdname mds
3+
#' @description Multidimensional scaling of random forest proximities.
4+
#' @param x S4 object of class `RandomForest`, `Analysis` or a list
5+
#' @param dimensions The number of dimensions by which the data are to be represented.
6+
#' @param idx sample information column to use for sample names. If `NULL`, the sample row number will be used. Sample names should be unique for each row of data.
7+
#' @return
8+
#' A tibble containing the scaled dimensions.
9+
#' @examples
10+
#' library(metaboData)
11+
#'
12+
#' x <- analysisData(abr1$neg[,200:300],abr1$fact) %>%
13+
#' occupancyMaximum(cls = 'day') %>%
14+
#' transformTICnorm()
15+
#'
16+
#' rf <- randomForest(x,cls = 'day')
17+
#'
18+
#' mds(rf)
19+
#' @export
20+
21+
setGeneric("mds", function(x,dimensions = 2,idx = NULL)
22+
standardGeneric("mds")
23+
)
24+
25+
#' @rdname mds
26+
#' @importFrom dplyr mutate_if group_map select_if across
27+
28+
setMethod('mds',signature = 'RandomForest',
29+
function(x,dimensions = 2,idx = NULL){
30+
31+
group_vars <- switch(type(x),
32+
classification = c('Response','Comparison'),
33+
regression = 'Response',
34+
unsupervised = NULL)
35+
36+
dissimilarities <- x %>%
37+
proximity(idx = idx) %>%
38+
mutate(across(Sample1:Sample2,as.character)) %>%
39+
spread(Sample2,Proximity) %>%
40+
mutate_if(is.numeric,~ 1 - .x)
41+
42+
mds_dimensions <- dissimilarities %>%
43+
group_by_at(group_vars) %>%
44+
group_map(~ .x %>%
45+
select_if(is.numeric) %>%
46+
cmdscale(k = dimensions) %>%
47+
set_colnames(str_c('Dimension ',
48+
seq_len(dimensions))) %>%
49+
as_tibble() %>%
50+
bind_cols(select_if(.x %>%
51+
ungroup(),
52+
is.character)) %>%
53+
relocate(contains('Dimension'),
54+
.after = last_col()),
55+
.keep = TRUE
56+
) %>%
57+
bind_rows() %>%
58+
rename(Sample = Sample1)
59+
60+
if (is.null(idx)){
61+
mds_dimensions <- mds_dimensions %>%
62+
mutate(Sample = as.numeric(Sample)) %>%
63+
arrange(across(c(group_vars,'Sample')))
64+
}
65+
66+
return(mds_dimensions)
67+
}
68+
)
69+
70+
#' @rdname mds
71+
72+
setMethod('mds',signature = 'list',
73+
function(x,dimensions = 2,idx = NULL){
74+
object_classes <- x %>%
75+
map_chr(class)
76+
77+
if (FALSE %in% (object_classes == 'RandomForest')) {
78+
message(
79+
str_c('All objects contained within supplied list ',
80+
'that are not of class RandomForest will be ignored.'))
81+
}
82+
83+
x <- x[object_classes == 'RandomForest']
84+
85+
if (length(x) > 0) {
86+
x %>%
87+
map(mds,dimensions = dimensions,idx = idx) %>%
88+
bind_rows()
89+
} else {
90+
tibble()
91+
}
92+
93+
})
94+
95+
#' @rdname mds
96+
97+
setMethod('mds',signature = 'Analysis',
98+
function(x,dimensions = 2,idx = NULL){
99+
x %>%
100+
analysisResults('modelling') %>%
101+
mds(dimensions = dimensions,
102+
idx = idx)
103+
})

R/metabolyseR.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ globalVariables(
77
'Occupancy','adjustedPvalue','adjusted.p.value','Label','response',
88
'-log10(p)','DF1','Sample1','Proximity','Dimension 1','Dimension 2',
99
'x','.level','sensitivity','specificity','Mode','RSD','Median','Colour',
10-
'Index','TIC','y','label','batch','correction','N','term','Metric','Frequency','|r|'
10+
'Index','TIC','y','label','batch','correction','N','term','Metric','Frequency','|r|','idx_1','idx_2'
1111
))

R/modelling-accessors.R

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#' @param cls sample information column to use
66
#' @param metric importance metric for which to retrieve explanatory features
77
#' @param threshold threshold below which explanatory features are extracted
8+
#' @param idx sample information column to use for sample names. If `NULL`, the sample row number will be used. Sample names should be unique for each row of data.
89
#' @param ... arguments to parse to method for specific class
910
#' @section Methods:
1011
#' * `binaryComparisons`: Return a vector of all possible binary comparisons for a given sample information column.
@@ -13,6 +14,7 @@
1314
#' * `metrics`: Retrieve the model performance metrics for a random forest analysis
1415
#' * `importanceMetrics`: Retrieve the available feature importance metrics for a random forest analysis.
1516
#' * `importance`: Retrieve feature importance results.
17+
#' * `proximity`: Retrieve the random forest sample proximities.
1618
#' * `explanatoryFeatures`: Retrieve explanatory features.
1719
#' @examples
1820
#' library(metaboData)
@@ -40,6 +42,9 @@
4042
#' ## Retrieve the feature importance results
4143
#' importance(rf_analysis)
4244
#'
45+
#' ## Retrieve the sample proximities
46+
#' proximity(rf_analysis)
47+
#'
4348
#' ## Retrieve the explanatory features
4449
#' explanatoryFeatures(rf_analysis,metric = 'FalsePositiveRate',threshold = 0.05)
4550
#' @export
@@ -210,6 +215,98 @@ setMethod('importance',signature = 'Analysis',
210215
#' @rdname modelling-accessors
211216
#' @export
212217

218+
setGeneric("proximity", function(x,idx = NULL)
219+
standardGeneric("proximity")
220+
)
221+
222+
#' @rdname modelling-accessors
223+
#' @importFrom dplyr relocate
224+
225+
setMethod('proximity',signature = 'RandomForest',
226+
function(x,idx = NULL){
227+
228+
group_vars <- switch(type(x),
229+
classification = c('Response','Comparison'),
230+
regression = 'Response',
231+
unsupervised = NULL) %>%
232+
c(.,'Sample1','Sample2')
233+
234+
proximities <- x@proximities %>%
235+
group_by_at(group_vars) %>%
236+
summarise(Proximity = mean(Proximity),
237+
.groups = 'drop')
238+
239+
if (!is.null(idx)){
240+
sample_idx <- x %>%
241+
clsExtract(cls = idx)
242+
243+
if (any(duplicated(sample_idx))){
244+
stop(str_c('Duplicated sample names found in sample information column `',
245+
idx,
246+
'`. The specified sample names should be unique to each sample.'),
247+
call. = FALSE)
248+
}
249+
250+
sample_idx <- sample_idx %>%
251+
tibble(idx = .) %>%
252+
rowid_to_column(var = 'row')
253+
254+
proximities <- proximities %>%
255+
left_join(sample_idx,
256+
by = c('Sample1' = 'row')) %>%
257+
rename(idx_1 = idx) %>%
258+
left_join(sample_idx,
259+
by = c('Sample2' = 'row')) %>%
260+
rename(idx_2 = idx) %>%
261+
select(-Sample1,
262+
-Sample2,
263+
Sample1 = idx_1,
264+
Sample2 = idx_2) %>%
265+
relocate(Proximity,.after = Sample2)
266+
}
267+
268+
return(proximities)
269+
}
270+
)
271+
272+
#' @rdname modelling-accessors
273+
274+
setMethod('proximity',signature = 'list',
275+
function(x,idx = NULL){
276+
object_classes <- x %>%
277+
map_chr(class)
278+
279+
if (FALSE %in% (object_classes == 'RandomForest')) {
280+
message(
281+
str_c('All objects contained within supplied list ',
282+
'that are not of class RandomForest will be ignored.'))
283+
}
284+
285+
x <- x[object_classes == 'RandomForest']
286+
287+
if (length(x) > 0) {
288+
x %>%
289+
map(proximity,idx = idx) %>%
290+
bind_rows()
291+
} else {
292+
tibble()
293+
}
294+
295+
})
296+
297+
#' @rdname modelling-accessors
298+
299+
setMethod('proximity',signature = 'Analysis',
300+
function(x,idx = NULL){
301+
x %>%
302+
analysisResults(element = 'modelling') %>%
303+
proximity(idx = idx)
304+
})
305+
306+
307+
#' @rdname modelling-accessors
308+
#' @export
309+
213310
setGeneric('explanatoryFeatures', function(x,...)
214311
standardGeneric("explanatoryFeatures")
215312
)

0 commit comments

Comments
 (0)