Permalink
Browse files

Performance improvement - replace as.data.frame with plyr::quickdf

  • Loading branch information...
1 parent 795aca5 commit a946d3967fc49d62534276c588ed303a8a5cb4e6 @andrie committed Oct 30, 2011
Showing with 43 additions and 38 deletions.
  1. +8 −7 R/internal-xml.R
  2. +5 −4 R/read-sss.R
  3. +29 −27 inst/tests/test-external.R
  4. +1 −0 inst/tests/test-internal.R
View
@@ -30,7 +30,7 @@ getSSSrecord <- function(xmlNode){
} else {
pto <- p[[1]]
}
- data.frame(
+ quickdf(list(
ident = as.character(xmlAttrs (xmlNode)["ident"]),
type = as.character(xmlAttrs (xmlNode)["type"]),
name = as.character(xmlValue (xmlNode[["name"]])[1]),
@@ -39,9 +39,9 @@ getSSSrecord <- function(xmlNode){
positionFinish = as.character(pto),
subfields = subfields,
width = width,
- hasValues = !is.null(xmlNode[["values"]]),
- stringsAsFactors = FALSE
- )
+ hasValues = !is.null(xmlNode[["values"]])
+ #stringsAsFactors = FALSE
+ ))
}
#is.character0 <- function(x){
@@ -59,19 +59,20 @@ getSSScodes <- function(x){
size <- xmlSize(x[["values"]])
if (is.null(x[["values"]])){
df <- data.frame(
- ident = as.character(xmlAttrs (x)["ident"]),
+ ident = as.character(xmlAttrs(x)["ident"]),
code = NA,
codevalues = NA,
stringsAsFactors = FALSE
)
} else {
+ #browser()
df <- data.frame(
- ident = rep(as.character(xmlAttrs (x)["ident"]), size),
+ ident = rep(unname(xmlAttrs (x)["ident"]), size),
code = as.character(xmlSApply(x[["values"]], xmlAttrs)),
codevalues = as.character(xmlSApply(x[["values"]], xmlValue)),
stringsAsFactors = FALSE
)
- df <- subset(df, df$codevalues!="character(0)")
+ df <- df[df$codevalues!="character(0)", ]
}
df
View
@@ -27,13 +27,14 @@ readSSSmetadata <- function(SSSfilename){
#' @seealso readSSSmetadata, read.sss, readSSSdata
parseSSSmetadata <- function(XMLdoc){
r <- xmlRoot(XMLdoc)[["survey"]][["record"]]
- variables <- as.data.frame(
- do.call(rbind, lapply(xmlChildren(r), getSSSrecord)),
- stringsAsFactors=FALSE)
+ variables <- quickdf(
+ do.call(rbind, lapply(xmlChildren(r), getSSSrecord))
+ #stringsAsFactors=FALSE)
+ )
variables$positionFinish <- as.numeric(variables$positionFinish)
variables$positionStart <- as.numeric(variables$positionStart)
- codes <- as.data.frame(do.call(rbind, lapply(xmlChildren(r), getSSScodes)), stringsAsFactors=FALSE)
+ codes <- quickdf(do.call(rbind, lapply(xmlChildren(r), getSSScodes)))#, stringsAsFactors=FALSE)
list(variables=variables, codes=codes)
}
View
@@ -18,41 +18,43 @@ test_that("readSSSmetadata works", {
test_that("parseSSSmetadata works", {
test <- parseSSSmetadata(readSSSmetadata(filenameSSS))
- rest <- structure(list(variables = structure(list(ident = c("1", "2",
- "3", "4", "5", "6", "7", "99"), type = c("single", "multiple",
- "character", "multiple", "quantity", "logical", "single", "quantity"
- ), name = c("Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q99"),
+ rest <- structure(list(
+ variables = structure(list(
+ ident = c("1", "2", "3", "4", "5", "6", "7", "99"),
+ type = c("single", "multiple", "character", "multiple",
+ "quantity", "logical", "single", "quantity"),
+ name = c("Q1", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q99"),
label = c("Number of visits", "Attractions visited", "Other attractions visited",
"Two favourite attractions visited", "Miles travelled", "Would come again",
- "When is that most likely to be", "Case weight"), positionStart = c(1,
- 2, 11, 41, 43, 46, 47, 48), positionFinish = c(1, 10, 40,
- 42, 45, 46, 47, 54), subfields = c("0", "0", "0", "2", "0",
- "0", "0", "0"), width = c(0, 0, 0, 1, 0, 0, 0, 0), hasValues = c(TRUE,
- TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE)), .Names = c("ident",
- "type", "name", "label", "positionStart", "positionFinish", "subfields",
- "width", "hasValues"), row.names = c("variable", "variable1",
- "variable2", "variable3", "variable4", "variable5", "variable6",
- "variable7"), class = "data.frame"), codes = structure(list(ident = c("1",
- "1", "1", "2", "2", "2", "2", "2", "2", "3", "4", "4", "4", "4",
- "4", "4", "5", "5", "6", "7", "7", "7", "99"), code = c("1",
- "2", "3", "1", "2", "3", "4", "5", "9", NA, "1", "2", "3", "4",
- "5", "9", "500", "999", NA, "1", "2", "3", NA), codevalues = c("First visit",
- "Visited before within the year", "Visited before that", "Sherwood Forest",
+ "When is that most likely to be", "Case weight"),
+ positionStart = c(1, 2, 11, 41, 43, 46, 47, 48),
+ positionFinish = c(1, 10, 40, 42, 45, 46, 47, 54),
+ subfields = c("0", "0", "0", "2", "0", "0", "0", "0"),
+ width = c(0, 0, 0, 1, 0, 0, 0, 0),
+ hasValues = c(TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE)),
+ .Names = c("ident", "type", "name", "label", "positionStart", "positionFinish",
+ "subfields", "width", "hasValues"),
+ row.names = c(NA, 8L),
+ class = "data.frame"),
+ codes = structure(list(
+ ident = c("1", "1", "1", "2", "2", "2", "2", "2", "2", "3",
+ "4", "4", "4", "4", "4", "4", "5", "5", "6", "7", "7", "7", "99"),
+ code = c("1", "2", "3", "1", "2", "3", "4", "5", "9", NA, "1", "2", "3", "4",
+ "5", "9", "500", "999", NA, "1", "2", "3", NA),
+ codevalues = c("First visit", "Visited before within the year", "Visited before that", "Sherwood Forest",
"Nottingham Castle", "\"Friar Tuck\" Restaurant", "\"Maid Marion\" Cafe",
"Mining museum", "Other", NA, "Sherwood Forest", "Nottingham Castle",
"\"Friar Tuck\" Restaurant", "\"Maid Marion\" Cafe", "Mining museum",
"Other", "500 or more", "Not stated", NA, "Within 3 months",
- "Between 3 months and 1 year", "More than 1 years time", NA)), .Names = c("ident",
- "code", "codevalues"), row.names = c("variable.1", "variable.2",
- "variable.3", "variable.11", "variable.21", "variable.31", "variable.4",
- "variable.5", "variable.6", "variable", "variable.12", "variable.22",
- "variable.32", "variable.41", "variable.51", "variable.61", "variable.23",
- "variable.33", "variable1", "variable.13", "variable.24", "variable.34",
- "variable2"), class = "data.frame")), .Names = c("variables",
- "codes"))
+ "Between 3 months and 1 year", "More than 1 years time", NA)),
+ .Names = c("ident", "code", "codevalues"),
+ row.names = c(NA, 23L),
+ class = "data.frame")),
+ .Names = c("variables", "codes"))
expect_is(test, "list")
- expect_equal(test, rest)
+ expect_equal(test$variables, rest$variables)
+ expect_equal(test$codes, rest$codes)
})
@@ -10,3 +10,4 @@ filenameASC <-"sample.asc"
#context("Test internal functions")
#test_that("internal functions work")
+

0 comments on commit a946d39

Please sign in to comment.