Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Performance improvement - replace as.data.frame with plyr::quickdf

  • Loading branch information...
commit a946d3967fc49d62534276c588ed303a8a5cb4e6 1 parent 795aca5
@andrie authored
View
15 R/internal-xml.R
@@ -30,7 +30,7 @@ getSSSrecord <- function(xmlNode){
} else {
pto <- p[[1]]
}
- data.frame(
+ quickdf(list(
ident = as.character(xmlAttrs (xmlNode)["ident"]),
type = as.character(xmlAttrs (xmlNode)["type"]),
name = as.character(xmlValue (xmlNode[["name"]])[1]),
@@ -39,9 +39,9 @@ getSSSrecord <- function(xmlNode){
positionFinish = as.character(pto),
subfields = subfields,
width = width,
- hasValues = !is.null(xmlNode[["values"]]),
- stringsAsFactors = FALSE
- )
+ hasValues = !is.null(xmlNode[["values"]])
+ #stringsAsFactors = FALSE
+ ))
}
#is.character0 <- function(x){
@@ -59,19 +59,20 @@ getSSScodes <- function(x){
size <- xmlSize(x[["values"]])
if (is.null(x[["values"]])){
df <- data.frame(
- ident = as.character(xmlAttrs (x)["ident"]),
+ ident = as.character(xmlAttrs(x)["ident"]),
code = NA,
codevalues = NA,
stringsAsFactors = FALSE
)
} else {
+ #browser()
df <- data.frame(
- ident = rep(as.character(xmlAttrs (x)["ident"]), size),
+ ident = rep(unname(xmlAttrs (x)["ident"]), size),
code = as.character(xmlSApply(x[["values"]], xmlAttrs)),
codevalues = as.character(xmlSApply(x[["values"]], xmlValue)),
stringsAsFactors = FALSE
)
- df <- subset(df, df$codevalues!="character(0)")
+ df <- df[df$codevalues!="character(0)", ]
}
df
View
9 R/read-sss.R
@@ -27,13 +27,14 @@ readSSSmetadata <- function(SSSfilename){
#' @seealso readSSSmetadata, read.sss, readSSSdata
parseSSSmetadata <- function(XMLdoc){
r <- xmlRoot(XMLdoc)[["survey"]][["record"]]
- variables <- as.data.frame(
- do.call(rbind, lapply(xmlChildren(r), getSSSrecord)),
- stringsAsFactors=FALSE)
+ variables <- quickdf(
+ do.call(rbind, lapply(xmlChildren(r), getSSSrecord))
+ #stringsAsFactors=FALSE)
+ )
variables$positionFinish <- as.numeric(variables$positionFinish)
variables$positionStart <- as.numeric(variables$positionStart)
- codes <- as.data.frame(do.call(rbind, lapply(xmlChildren(r), getSSScodes)), stringsAsFactors=FALSE)
+ codes <- quickdf(do.call(rbind, lapply(xmlChildren(r), getSSScodes)))#, stringsAsFactors=FALSE)
list(variables=variables, codes=codes)
}
View
56 inst/tests/test-external.R
@@ -18,41 +18,43 @@ test_that("readSSSmetadata works", {
test_that("parseSSSmetadata works", {
test <- parseSSSmetadata(readSSSmetadata(filenameSSS))
- rest <- structure(list(variables = structure(list(ident = c("1", "2",
- "3", "4", "5", "6", "7", "99"), type = c("single", "multiple",
- "character", "multiple", "quantity", "logical", "single", "quantity"
- ), name = c("Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q99"),
+ rest <- structure(list(
+ variables = structure(list(
+ ident = c("1", "2", "3", "4", "5", "6", "7", "99"),
+ type = c("single", "multiple", "character", "multiple",
+ "quantity", "logical", "single", "quantity"),
+ name = c("Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q99"),
label = c("Number of visits", "Attractions visited", "Other attractions visited",
"Two favourite attractions visited", "Miles travelled", "Would come again",
- "When is that most likely to be", "Case weight"), positionStart = c(1,
- 2, 11, 41, 43, 46, 47, 48), positionFinish = c(1, 10, 40,
- 42, 45, 46, 47, 54), subfields = c("0", "0", "0", "2", "0",
- "0", "0", "0"), width = c(0, 0, 0, 1, 0, 0, 0, 0), hasValues = c(TRUE,
- TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE)), .Names = c("ident",
- "type", "name", "label", "positionStart", "positionFinish", "subfields",
- "width", "hasValues"), row.names = c("variable", "variable1",
- "variable2", "variable3", "variable4", "variable5", "variable6",
- "variable7"), class = "data.frame"), codes = structure(list(ident = c("1",
- "1", "1", "2", "2", "2", "2", "2", "2", "3", "4", "4", "4", "4",
- "4", "4", "5", "5", "6", "7", "7", "7", "99"), code = c("1",
- "2", "3", "1", "2", "3", "4", "5", "9", NA, "1", "2", "3", "4",
- "5", "9", "500", "999", NA, "1", "2", "3", NA), codevalues = c("First visit",
- "Visited before within the year", "Visited before that", "Sherwood Forest",
+ "When is that most likely to be", "Case weight"),
+ positionStart = c(1, 2, 11, 41, 43, 46, 47, 48),
+ positionFinish = c(1, 10, 40, 42, 45, 46, 47, 54),
+ subfields = c("0", "0", "0", "2", "0", "0", "0", "0"),
+ width = c(0, 0, 0, 1, 0, 0, 0, 0),
+ hasValues = c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE)),
+ .Names = c("ident", "type", "name", "label", "positionStart", "positionFinish",
+ "subfields", "width", "hasValues"),
+ row.names = c(NA, 8L),
+ class = "data.frame"),
+ codes = structure(list(
+ ident = c("1", "1", "1", "2", "2", "2", "2", "2", "2", "3",
+ "4", "4", "4", "4", "4", "4", "5", "5", "6", "7", "7", "7", "99"),
+ code = c("1", "2", "3", "1", "2", "3", "4", "5", "9", NA, "1", "2", "3", "4",
+ "5", "9", "500", "999", NA, "1", "2", "3", NA),
+ codevalues = c("First visit", "Visited before within the year", "Visited before that", "Sherwood Forest",
"Nottingham Castle", "\"Friar Tuck\" Restaurant", "\"Maid Marion\" Cafe",
"Mining museum", "Other", NA, "Sherwood Forest", "Nottingham Castle",
"\"Friar Tuck\" Restaurant", "\"Maid Marion\" Cafe", "Mining museum",
"Other", "500 or more", "Not stated", NA, "Within 3 months",
- "Between 3 months and 1 year", "More than 1 years time", NA)), .Names = c("ident",
- "code", "codevalues"), row.names = c("variable.1", "variable.2",
- "variable.3", "variable.11", "variable.21", "variable.31", "variable.4",
- "variable.5", "variable.6", "variable", "variable.12", "variable.22",
- "variable.32", "variable.41", "variable.51", "variable.61", "variable.23",
- "variable.33", "variable1", "variable.13", "variable.24", "variable.34",
- "variable2"), class = "data.frame")), .Names = c("variables",
- "codes"))
+ "Between 3 months and 1 year", "More than 1 years time", NA)),
+ .Names = c("ident", "code", "codevalues"),
+ row.names = c(NA, 23L),
+ class = "data.frame")),
+ .Names = c("variables", "codes"))
expect_is(test, "list")
- expect_equal(test, rest)
+ expect_equal(test$variables, rest$variables)
+ expect_equal(test$codes, rest$codes)
})
View
1  inst/tests/test-internal.R
@@ -10,3 +10,4 @@ filenameASC <-"sample.asc"
#context("Test internal functions")
#test_that("internal functions work")
+
Please sign in to comment.
Something went wrong with that request. Please try again.