-
Notifications
You must be signed in to change notification settings - Fork 2
/
checkVarNames.R
92 lines (89 loc) · 3.24 KB
/
checkVarNames.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#### Check Names
#############################################################################
#' Check names for \code{SQLite} column name conventions.
#'
#' Checks names for \code{SQLite} column name conventions and
#' applies appropriate variable name changes to \code{GADSdat} or \code{all_GADSdat} objects.
#'
#' Invalid column names in a \code{SQLite} data base include
#' \itemize{
#' \item \code{SQLite} keywords (see \code{\link[eatDB]{sqlite_keywords}}) and
#' \item column names with a \code{"."} in it.
#' }
#'
#' The corresponding variable name changes are
#' \itemize{
#' \item appending the suffix \code{"Var"} to all \code{SQLite} keywords and
#' \item changing all \code{"."} in variable names to \code{"_"}.
#' }
#'
#'Note that avoiding \code{"."} in variable names is beneficial for multiple reasons, such as
#'avoiding confusion with \code{S3} methods in \code{R} and issues when importing from \code{Stata}.
#'
#'@param GADSdat \code{GADSdat} or \code{all_GADSdat} object.
#'@param checkKeywords Logical. Should \code{SQLite} keywords be checked and modified?
#'@param checkDots Logical. Should occurrences of \code{"."} be checked and modified?
#'
#'@return Returns the original object with updated variable names.
#'
#'@examples
#'# Change example data set (create an invalid variable name)
#' pisa2 <- changeVarNames(pisa, oldNames = "computer_age",
#' newNames = "computer.age")
#'
#' pisa3 <- checkVarNames(pisa2)
#'
#'@export
checkVarNames <- function(GADSdat, checkKeywords = TRUE, checkDots = TRUE) {
UseMethod("checkVarNames")
}
#'@export
checkVarNames.GADSdat <- function(GADSdat, checkKeywords = TRUE, checkDots = TRUE) {
check_GADSdat(GADSdat)
GADSdat[["labels"]][, "varName"] <- sapply(GADSdat[["labels"]][, "varName"], checkVarNames)
names(GADSdat[["dat"]]) <- sapply(names(GADSdat[["dat"]]), checkVarNames)
GADSdat
}
#'@export
checkVarNames.all_GADSdat <- function(GADSdat, checkKeywords = TRUE, checkDots = TRUE) {
check_all_GADSdat(GADSdat)
GADSdat[["allLabels"]][, "varName"] <- sapply(GADSdat[["allLabels"]][, "varName"], checkVarNames)
GADSdat[["datList"]] <- lapply(GADSdat[["datList"]], function(df) {
names(df) <- sapply(names(df), checkVarNames)
df
})
GADSdat
}
#'@export
checkVarNames.data.frame <- function(GADSdat, checkKeywords = TRUE, checkDots = TRUE) {
names(GADSdat) <- checkVarNames(names(GADSdat))
GADSdat
}
#'@export
checkVarNames.character <- function(GADSdat, checkKeywords = TRUE, checkDots = TRUE) {
NewName <- GADSdat
check_logicalArgument(checkKeywords)
check_logicalArgument(checkDots)
if(any(is.na(GADSdat))) {
stop("Column names can not be NA.")
}
#browser()
## SQLite Keywords
if(checkKeywords) {
keyword_matches <- tolower(GADSdat) %in% tolower(eatDB::sqlite_keywords)
NewName[keyword_matches] <- paste0(GADSdat[keyword_matches], "Var")
NewName <- make.names(NewName)
}
## Dots
if(checkDots){
NewName <- gsub("\\.", "_", NewName)
}
## report all changes
which_changed <- which(NewName != GADSdat)
if(length(which_changed) > 0) {
for(i in which_changed) {
message(paste(GADSdat[i], "has been renamed to", NewName[i]))
}
}
NewName
}