/
cleanDataset.R
99 lines (90 loc) · 3.66 KB
/
cleanDataset.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#' Clean the dataset
#'
#' Clean the SummarizedExperiment by making sure that names of various fields are available and unique.
#'
#' @param se A \linkS4class{SummarizedExperiment} object or one of its subclasses.
#'
#' @return
#' A cleaned version of \code{se}.
#'
#' @details
#' Various \linkS4class{Panel}s assume that the row and column names of the input SummarizedExperiment are available and unique.
#' This function enforces that, adding consecutive integer names if not available and calling \code{\link{make.unique}} if they are duplicated.
#'
#' Various \linkS4class{Panel}s further assume that the \code{\link{assay}}, \code{\link{rowData}}, \code{\link{colData}} names are unique;
#' if this is not the case, \code{\link{selectInput}} behaves in unexpected (and incorrect) ways.
#' This function enforces that as well by running them through \code{\link{make.unique}}.
#'
#' For \linkS4class{SingleCellExperiment} object, we enforce uniqueness in the \code{\link{reducedDims}}.
#'
#' All changes result in warnings as a \dQuote{sensible} object is not expected to require any work.
#'
#' @author Aaron Lun
#'
#' @name cleanDataset
#'
#' @examples
#' # Creating a very naughty SE.
#' se <- SummarizedExperiment(list(cbind(1:10, 2:11), cbind(2:11, 3:12)),
#' colData=DataFrame(A=1:2, A=3:4, check.names=FALSE),
#' rowData=DataFrame(B=1:10, B=1:10, check.names=FALSE))
#' se
#'
#' cleanDataset(se)
NULL
#' @export
#' @rdname cleanDataset
#' @importFrom SummarizedExperiment rowData colData rowData<- colData<- assayNames assayNames<-
setMethod("cleanDataset", "SummarizedExperiment", function(se) {
if (is.null(rownames(se))) {
if (nrow(se) > 0) {
warning("filling in the missing 'rownames(se)'")
}
rownames(se) <- seq_len(nrow(se))
} else if (anyDuplicated(rownames(se))) {
warning("duplicated 'rownames(se)' detected, making them unique")
rownames(se) <- make.unique(rownames(se))
}
if (is.null(colnames(se))) {
if (ncol(se) > 0) {
warning("filling in the missing 'colnames(se)'")
}
colnames(se) <- seq_len(ncol(se))
} else if (anyDuplicated(colnames(se))) {
warning("duplicated 'colnames(se)' detected, making them unique")
colnames(se) <- make.unique(colnames(se))
}
if (anyDuplicated(colnames(rowData(se)))) {
warning("duplicated 'colnames(rowData(se))' detected, making them unique")
colnames(rowData(se)) <- make.unique(colnames(rowData(se)))
}
if (anyDuplicated(colnames(colData(se)))) {
warning("duplicated 'colnames(colData(se))' detected, making them unique")
colnames(colData(se)) <- make.unique(colnames(colData(se)))
}
if (is.null(assayNames(se)) && length(assays(se)) > 0) {
warning("empty 'assayNames(se)' detected, renaming to 'unnamed'")
assayNames(se) <- rep("unnamed", length(assays(se)))
} else if (any(empty <- assayNames(se)=="")) {
warning("empty 'assayNames(se)' detected, renaming to 'unnamed'")
assayNames(se)[empty] <- 'unnamed'
}
if (anyDuplicated(assayNames(se))) {
warning("duplicated 'assayNames(se)' detected, making them unique")
assayNames(se) <- make.unique(assayNames(se))
}
se
})
#' @export
#' @rdname cleanDataset
#' @importFrom SingleCellExperiment reducedDimNames reducedDimNames<-
setMethod("cleanDataset", "SingleCellExperiment", function(se) {
se <- callNextMethod()
if (anyDuplicated(reducedDimNames(se))) {
#nocov start
warning("duplicated 'reducedDimNames(se)' detected, making them unique")
reducedDimNames(se) <- make.unique(reducedDimNames(se))
#nocov end
}
se
})