version 0.0-8.5

cran · May 21, 2011 · ad312e6 · ad312e6
1 parent c65f39f
commit ad312e6
Show file tree

Hide file tree

Showing 14 changed files with 144 additions and 96 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SGP
-Version: 0.0-8.0
-Date: 2011-5-5
+Version: 0.0-8.5
+Date: 2011-5-20
 Title: An R Package for the Calculation and Visualization of Student
         Growth Percentiles.
 Author@R: c(person("Damian", "Betebenner",
@@ -16,8 +16,8 @@ Author: Damian W. Betebenner <dbetebenner@nciea.org>, with
         Jonas (Virginia), Dr. Philip Olsen (Wisconsin), Nick Stroud
         (Wisconsin).
 Maintainer: Damian W. Betebenner <dbetebenner@nciea.org>
-Depends: R (>= 2.10), colorspace, data.table (>= 1.5.3), foreach, grid,
-        gridBase, gtools, plyr, quantreg, sn, splines
+Depends: R (>= 2.10), boot, colorspace, data.table (>= 1.5.3), foreach,
+        grid, gridBase, gtools, plyr, quantreg, sn, splines
 Suggests: pdf2
 Description: Functions to calculate student growth percentiles and
         percentile growth projections/trajectories for students using
@@ -35,6 +35,6 @@ LazyData: Yes
 License: CC BY-SA 3.0 US | CC BY-NC-SA 3.0 + file LICENSE
 Repository: CRAN
 Repository/R-Forge/Project: sgp
-Repository/R-Forge/Revision: 125
-Date/Publication: 2011-05-08 05:45:44
-Packaged: 2011-05-06 20:47:34 UTC; rforge
+Repository/R-Forge/Revision: 126
+Date/Publication: 2011-05-21 20:15:01
+Packaged: 2011-05-20 20:44:26 UTC; rforge
diff --git a/R/abcSGP.R b/R/abcSGP.R
@@ -22,12 +22,15 @@ function(sgp_object,
 		institution_level="GRADE",
 		demographic=c("GENDER", "ETHNICITY", "FREE_REDUCED_LUNCH_STATUS", "ELL_STATUS", "IEP_STATUS", "GIFTED_AND_TALENTED_PROGRAM_STATUS", "CATCH_UP_KEEP_UP_STATUS_INITIAL"),
 		institution_inclusion=list(STATE="STATE_ENROLLMENT_STATUS", DISTRICT_NUMBER="DISTRICT_ENROLLMENT_STATUS", SCHOOL_NUMBER="SCHOOL_ENROLLMENT_STATUS")),
-	confidence.interval.groups=list(institution="SCHOOL_NUMBER",
-		content="CONTENT_AREA",
-		time="YEAR",
-		institution_level=NULL,
-		demographic=NULL,
-		institution_inclusion=list(STATE=NULL, DISTRICT_NUMBER=NULL, SCHOOL_NUMBER="SCHOOL_ENROLLMENT_STATUS"))) {
+        confidence.interval.groups=list(TYPE="Bootstrap",
+                VARIABLES=c("SGP"),
+                QUANTILES=c(0.025, 0.975),
+                GROUPS=list(institution="SCHOOL_NUMBER",
+                content="CONTENT_AREA",
+                time="YEAR",
+                institution_level= NULL,
+                demographic=NULL,
+                institution_inclusion=list(STATE=NULL, DISTRICT_NUMBER=NULL, SCHOOL_NUMBER="SCHOOL_ENROLLMENT_STATUS")))) {
 
         started.at <- proc.time()
 	message(paste("Started abcSGP", date()), "\n")

diff --git a/R/analyzeSGP.R b/R/analyzeSGP.R
@@ -52,9 +52,9 @@ function(sgp_object,
 							panel.data.vnames=sgp.vnames,
 							grade.progression=k,
 							calculate.confidence.intervals=list(state=state,  
-								confidence.quantiles=NULL,
+								confidence.quantiles=c(0.16,0.84),
 								simulation.iterations=100, 
-								distribution="Skew-Normal", round=1))
+								distribution="Normal", round=1))
 					} ## END k loop
 				} else {
 					for (k in sgp.iter[["sgp.grade.sequences"]]) {

diff --git a/R/combineSGP.R b/R/combineSGP.R
@@ -15,7 +15,7 @@ function(sgp_object,
 	}
 
 
-	## Merge SGP with student data
+	## Merge SGPs with student data
 
 	if (sgp.percentiles) { 
 		tmp.list <- list() 
@@ -29,7 +29,6 @@ function(sgp_object,
 			key=paste(key(sgp_object[["Student"]]), collapse=","))[sgp_object[["Student"]]]
 	}
 
-
 	## Create SGP targets and merge with student data
 
 	if (sgp.projections.lagged) { 

diff --git a/R/studentGrowthPercentiles.R b/R/studentGrowthPercentiles.R
@@ -23,8 +23,8 @@ function(panel.data,           ## REQUIRED
 	convert.using.loss.hoss=TRUE,
 	goodness.of.fit=TRUE) {
 
-	started.at=proc.time()
-        message(paste("\tStarted studentGrowthPercentiles", date()))
+	started.at <- proc.time()
+	started.date <- date()
 
 	##########################################################
 	###
@@ -249,8 +249,14 @@ function(panel.data,           ## REQUIRED
 					textGrob(x=-17, y=50, "Empirical SGP Distribution", default.units="native", gp=gpar(cex=0.7), rot=90, vp="qq")))))
 	} 
 
-	.csem.score.simulator <- function(scale_scores, grade, content_area, state, distribution="Normal", round=1) {
+	.csem.score.simulator <- function(scale_scores, grade, content_area, year, state, distribution="Normal", round=1) {
+		GRADE <- CONTENT_AREA <- YEAR <- NULL ## To avoid R CMD check warnings
 		min.max <- stateData[[state]][["Achievement"]][["Knots_Boundaries"]][[content_area]][[paste("loss.hoss_", grade, sep="")]]
+		if ("YEAR" %in% names(stateData[[state]][["Assessment_Program_Information"]][["CSEM"]])) {
+			CSEM_Data <- subset(stateData[[state]][["Assessment_Program_Information"]][["CSEM"]], GRADE==grade & CONTENT_AREA==content_area & YEAR==year)
+		} else {
+			CSEM_Data <- subset(stateData[[state]][["Assessment_Program_Information"]][["CSEM"]], GRADE==grade & CONTENT_AREA==content_area)
+		}
 		CSEM_Data <- stateData[[state]][["Assessment_Program_Information"]][["CSEM"]][
 			stateData[[state]][["Assessment_Program_Information"]][["CSEM"]][["GRADE"]]==grade & 
 			stateData[[state]][["Assessment_Program_Information"]][["CSEM"]][["CONTENT_AREA"]]==content_area,]
@@ -412,7 +418,7 @@ function(panel.data,           ## REQUIRED
 	### Create Panel_Data based upon class of input data
 
 	if (is.matrix(panel.data)) {
-		Panel_Data <- as.data.frame(panel.data, stringsAsFactors=FALSE)
+		Panel_Data <- panel.data <- as.data.frame(panel.data, stringsAsFactors=FALSE)
 	}
 	if (identical(class(panel.data), "list")) {
         	if (!identical(class(panel.data[["Panel_Data"]]), "data.frame")) {
@@ -530,17 +536,21 @@ function(panel.data,           ## REQUIRED
 					set.seed(k)
 					if (k==1) {
 						tmp.csem.quantiles[[j]] <- data.frame(ID=tmp.data[["ID"]],
-						SGP_SIM_1=.get.quantiles(tmp.predictions, .csem.score.simulator(tmp.data[[tail(SS,1)]],
+						SGP_SIM_1=.get.quantiles(tmp.predictions, .csem.score.simulator(
+							tmp.data[[tail(SS,1)]],
 							tmp.last,
 							sgp.labels$my.subject,
+							sgp.labels$my.year,
 							calculate.confidence.intervals$state,
 							calculate.confidence.intervals$distribution,
 							calculate.confidence.intervals$round)))
 					} else {
 						tmp.csem.quantiles[[j]] <- cbind(tmp.csem.quantiles[[j]], 
-							.get.quantiles(tmp.predictions, .csem.score.simulator(tmp.data[[tail(SS,1)]],
+							.get.quantiles(tmp.predictions, .csem.score.simulator(
+								tmp.data[[tail(SS,1)]],
 								tmp.last,
 								sgp.labels$my.subject,
+								sgp.labels$my.year,
 								calculate.confidence.intervals$state,
 								calculate.confidence.intervals$distribution,
 								calculate.confidence.intervals$round)))
@@ -579,7 +589,7 @@ function(panel.data,           ## REQUIRED
 
 			if (!is.null(calculate.confidence.intervals$confidence.quantiles)) {
 				tmp.cq <- round(t(apply(simulation.data[, -1, with=FALSE], 1, quantile, probs = calculate.confidence.intervals$confidence.quantiles)))
-					colnames(tmp.cq) <- paste("CONFIDENCE_QUANTILE_", calculate.confidence.intervals$confidence.quantiles, sep="")
+					colnames(tmp.cq) <- paste("SGP_", calculate.confidence.intervals$confidence.quantiles, "_CONFIDENCE_BOUND", sep="")
 					quantile.data <- cbind(quantile.data, tmp.cq)
 			}
 			Simulated_SGPs[[tmp.path]] <- rbind.fill(Simulated_SGPs[[tmp.path]], .unget.data.table(simulation.data, ss.data)) 
@@ -598,8 +608,9 @@ function(panel.data,           ## REQUIRED
 		}
 	} ## End if calculate.sgps
 
-	### Announce Completion & Return SGP Object
+	### Start/Finish Message & Return SGP Object
 
+        message(paste("\tStarted studentGrowthPercentiles", started.date))
 	message(paste("\tSubject: ", sgp.labels$my.subject, ", Year: ", sgp.labels$my.year, ", Grade Progression: ", paste(tmp.gp, collapse=", "), " ", sgp.labels$my.extra.label, sep=""))
 	message(paste("\tFinished SGP Student Growth Percentile Analysis", date(), "in", timetaken(started.at), "\n")) 
 

diff --git a/R/studentGrowthProjections.R b/R/studentGrowthProjections.R
@@ -16,7 +16,7 @@ function(panel.data,	## REQUIRED
 	projcuts.digits=0) {
 
 	started.at=proc.time()
-        message(paste("\tStarted studentGrowthProjections", date()))
+	started.date <- date()
 
 	##########################################################
 	###
@@ -398,6 +398,7 @@ function(panel.data,	## REQUIRED
 
 	### Announce Completion & Return SGP Object
 
+        message(paste("\tStarted studentGrowthProjections", started.date))
 	message(paste("\tSubject: ", sgp.labels$my.subject, ", Year: ", sgp.labels$my.year, ", Grade Progression: ", paste(grade.progression, collapse=", "), " ", sgp.labels$my.extra.label, sep="")) 
 	message(paste("\tFinished studentGrowthProjections", date(), "in", timetaken(started.at), "\n")) 
 

diff --git a/R/summarizeSGP.R b/R/summarizeSGP.R
@@ -13,12 +13,15 @@ function(sgp_object,
 		institution_level="GRADE",
 		demographic=c("GENDER", "ETHNICITY", "FREE_REDUCED_LUNCH_STATUS", "ELL_STATUS", "IEP_STATUS", "GIFTED_AND_TALENTED_PROGRAM_STATUS", "CATCH_UP_KEEP_UP_STATUS"),
 		institution_inclusion=list(STATE="STATE_ENROLLMENT_STATUS", DISTRICT_NUMBER="DISTRICT_ENROLLMENT_STATUS", SCHOOL_NUMBER="SCHOOL_ENROLLMENT_STATUS")),
-	confidence.interval.groups=list(institution="SCHOOL_NUMBER",
+	confidence.interval.groups=list(TYPE="Bootstrap",
+		VARIABLES=c("SGP"),
+		QUANTILES=c(0.025, 0.975),
+		GROUPS=list(institution="SCHOOL_NUMBER",
 		content="CONTENT_AREA",
 		time="YEAR",
 		institution_level= NULL,
 		demographic=NULL,
-		institution_inclusion=list(STATE=NULL, DISTRICT_NUMBER=NULL, SCHOOL_NUMBER="SCHOOL_ENROLLMENT_STATUS"))) {
+		institution_inclusion=list(STATE=NULL, DISTRICT_NUMBER=NULL, SCHOOL_NUMBER="SCHOOL_ENROLLMENT_STATUS")))) {
 
         started.at <- proc.time()
         message(paste("Started summarizeSGP", date()))
@@ -54,6 +57,7 @@ function(sgp_object,
 	}
 
 	median_na <- function(x) median(as.numeric(x), na.rm=TRUE)
+	boot.median <- function(x,i) median(x[i], na.rm=TRUE)
 	mean_na <- function(x, result.digits=1) round(mean(as.numeric(x), na.rm=TRUE), digits=result.digits)
 	num_non_missing <- function(x) sum(!is.na(as.numeric(x)))
 
@@ -65,7 +69,7 @@ function(sgp_object,
 		for (i in seq(length(in.categories))) {
 			tmp.result[[i]] <-  round(100*sum(tmp[in.categories[[i]]])/sum(tmp[of.categories[[i]]]), digits=result.digits)
 		}
-	return(unlist(tmp.result))
+		return(unlist(tmp.result))
 	}
 
 	percent_at_above_target <- function(sgp, target, result.digits=1) {
@@ -74,23 +78,26 @@ function(sgp_object,
 		return(tmp.pct)
 	}
 
+	boot.sgp <- function(data, conf.quantiles=c(0.025, 0.975)) {
+		as.list(quantile(boot(data, boot.median, 100)$t, probs=conf.quantiles, na.rm=TRUE))
+	}
+
 	sgpSummary <- function(sgp.groups.to.summarize, confidence.interval.groups.to.summarize) {
   		SGP_SIM <- V1 <- .SD <- NULL  ## To prevent R CMD check warning
-		ListExpr <- parse(text=paste("quote(as.list(c(", paste(unlist(sgp.summaries), collapse=", "),")))",sep=""))
+		ListExpr <- parse(text=paste("quote(c(", paste(unlist(sgp.summaries), collapse=", "),"))",sep=""))
 		ByExpr <- parse(text=paste("quote(list(", paste(sgp.groups.to.summarize, collapse=", "), "))", sep=""))
 		tmp <- tmp.dt[, eval(eval(ListExpr)), by=eval(eval(ByExpr))]
-		names(tmp)[-seq(length(unlist(strsplit(as.character(sgp.groups.to.summarize), ", "))))] <- unlist(strsplit(names(sgp.summaries), "[.]"))
-		if (confidence.interval.groups.to.summarize) {
+		if (confidence.interval.groups.to.summarize & confidence.interval.groups$TYPE=="CSEM") {
 			SIM_ByExpr1 <- parse(text=paste("quote(list(", paste(unlist(strsplit(as.character(sgp.groups.to.summarize), ", "))
   				[!(unlist(strsplit(as.character(sgp.groups.to.summarize), ", "))) %in% key(tmp.dt)], collapse=", "), 
-			  	", ", paste(names(tmp.simulation.dt)[grep("SGP_SIM_", names(tmp.simulation.dt))], collapse=", "), "))", sep=""))
+		  		", ", paste(names(tmp.simulation.dt)[grep("SGP_SIM_", names(tmp.simulation.dt))], collapse=", "), "))", sep=""))
 			SIM_ByExpr2 <- parse(text=paste("quote(list(", paste(sgp.groups.to.summarize, collapse=", "), "))", sep=""))
-			tmp.sim <- tmp.dt[tmp.simulation.dt, eval(eval(SIM_ByExpr1))][, -(1:2), with=FALSE][,
-				lapply(.SD, median_na), by=eval(eval(SIM_ByExpr2))][, 
-				as.list(round(apply(.SD, 1, quantile, probs=c(0.025, 0.975)))), by=eval(eval(SIM_ByExpr2))]
-			names(tmp.sim)[(dim(tmp.sim)[2]-1):dim(tmp.sim)[2]] <- c("LOWER_MEDIAN_SGP_95_CONF_BOUND", "UPPER_MEDIAN_SGP_95_CONF_BOUND")
+				tmp.sim <- tmp.dt[tmp.simulation.dt, eval(eval(SIM_ByExpr1))][, -(1:2), with=FALSE][,
+					lapply(.SD, median_na), by=eval(eval(SIM_ByExpr2))][, 
+				as.list(round(apply(.SD, 1, quantile, probs=confidence.interval.groups$QUANTILES))), by=eval(eval(SIM_ByExpr2))]
 			tmp <- data.table(merge.data.frame(tmp, tmp.sim, by = unlist(strsplit(as.character(sgp.groups.to.summarize), ", ")),all=TRUE))
 		}
+		names(tmp)[-seq(length(unlist(strsplit(as.character(sgp.groups.to.summarize), ", "))))] <- sgp.summaries.names
 		message(paste("Finished with", sgp.groups.to.summarize))
 		return(tmp)
 	}
@@ -108,12 +115,25 @@ function(sgp_object,
 			key=paste(key(tmp.dt), collapse=","))
 	}
 
-	## Take subset of data
+	## Prepare data
 
 	tmp.dt <- data.table(STATE=state, sgp_object[["Student"]][CJ("VALID_CASE", content_areas, years), mult="all"], key="VALID_CASE, ID, CONTENT_AREA, YEAR")
 
 	if (!is.null(confidence.interval.groups)) {
-		tmp.simulation.dt <- combineSims(sgp_object); gc()
+		if (confidence.interval.groups$TYPE == "Bootstrap") {
+			tmp.list <- list()
+			tmp.quantiles <- paste("c(", paste(confidence.interval.groups$QUANTILES, collapse=", "), ")", sep="")
+			for (i in confidence.interval.groups$VARIABLES) {
+					tmp.list[[paste("MEDIAN_", i, "_QUANTILES", sep="")]] <- paste("boot.sgp(", i, ", ", tmp.quantiles, ")", sep="")
+			}
+			sgp.summaries <- c(sgp.summaries, tmp.list)
+		}
+		if (confidence.interval.groups$TYPE == "CSEM") {
+			tmp.simulation.dt <- combineSims(sgp_object); gc()
+		}
+		sgp.summaries.names <- c(head(names(sgp.summaries), -1), paste("MEDIAN_SGP_", confidence.interval.groups$QUANTILES, "_CONFIDENCE_BOUND", sep=""))
+	} else {
+		sgp.summaries.names <- names(sgp.summaries)
 	}
 
 	## Create summary tables
@@ -126,23 +146,23 @@ function(sgp_object,
 			group.format(summary.groups[["institution_inclusion"]][[i]]),
 			group.format(summary.groups[["demographic"]])), sep=""))
 
-  	if (!is.null(confidence.interval.groups) & i %in% confidence.interval.groups$institution) {
+  	if (!is.null(confidence.interval.groups[["GROUPS"]]) & i %in% confidence.interval.groups[["GROUPS"]][["institution"]]) {
 			ci.groups <- do.call(paste, c(expand.grid(i,
-				group.format(confidence.interval.groups[["content"]]),
-				group.format(confidence.interval.groups[["time"]]),
-				group.format(confidence.interval.groups[["institution_level"]]),
-				group.format(confidence.interval.groups[["institution_inclusion"]][[i]]),
-				group.format(confidence.interval.groups[["demographic"]])), sep=""))
+				group.format(confidence.interval.groups[["GROUPS"]][["content"]]),
+				group.format(confidence.interval.groups[["GROUPS"]][["time"]]),
+				group.format(confidence.interval.groups[["GROUPS"]][["institution_level"]]),
+				group.format(confidence.interval.groups[["GROUPS"]][["institution_inclusion"]][[i]]),
+				group.format(confidence.interval.groups[["GROUPS"]][["demographic"]])), sep=""))
 		}
 
-		if (!is.null(confidence.interval.groups) & i %in% confidence.interval.groups$institution) {
+		if (!is.null(confidence.interval.groups[["GROUPS"]]) & i %in% confidence.interval.groups[["GROUPS"]][["institution"]]) {
 			j <- k <- NULL ## To prevent R CMD check warnings
-  		sgp_object[["Summary"]][[i]] <- foreach(j=iter(sgp.groups), k=iter(sgp.groups %in% ci.groups), 
+  			sgp_object[["Summary"]][[i]] <- foreach(j=iter(sgp.groups), k=iter(sgp.groups %in% ci.groups), 
 				.options.multicore=list(preschedule = FALSE, set.seed = FALSE), .packages="data.table", .inorder=FALSE) %dopar% {return(sgpSummary(j, k))}
 			names(sgp_object[["Summary"]][[i]]) <- gsub(", ", "__", sgp.groups)
 		} else {
 			j <- k <- NULL ## To prevent R CMD check warnings
-    	sgp_object[["Summary"]][[i]] <- foreach(j=iter(sgp.groups), k=iter(rep(FALSE, length(sgp.groups))), 
+    			sgp_object[["Summary"]][[i]] <- foreach(j=iter(sgp.groups), k=iter(rep(FALSE, length(sgp.groups))), 
 				.options.multicore=list(preschedule = FALSE, set.seed = FALSE), .packages="data.table", .inorder=FALSE) %dopar% {return(sgpSummary(j, k))}
 			names(sgp_object[["Summary"]][[i]]) <- gsub(", ", "__", sgp.groups)
 		}

diff --git a/data/stateData.rda b/data/stateData.rda
diff --git a/inst/CITATION b/inst/CITATION
@@ -4,11 +4,11 @@ citEntry(entry = "Manual",
          title        = "{SGP}: An R Package for the Calculation and Visualization of Student Growth Percentiles",
          author       = "Damian W. Betebenner and Adam Van Iwaarden",
          year         = "2011",
-         note         = "R package version 0.0-8.0",
+         note         = "R package version 0.0-8.5",
          url          = "http://cran.r-project.org/web/packages/SGP/index.html",
 
          textVersion  = paste("Damian W. Betebenner & Adam Van Iwaarden (2011).",
                         "SGP: An R Package for the Calculation and Visualization of Student Growth Percentiles.",
-                        "(R package version 0.0-8.0.",
+                        "(R package version 0.0-8.5.",
                         "URL http://cran.r-project.org/web/packages/SGP/index.html")
 )
diff --git a/inst/NEWS b/inst/NEWS
@@ -1,3 +1,12 @@
+Changes in version: SGP_0.0-8.5
+
+  o Changed analyzeSGP so that it calculates plus/minus 1 standard error confidence bands on individual SGPs by default. 
+  o Added bootstrap confidence intervals to summarizeSGP. 
+  o Added West Virginia to stateData. 
+  o Added Nevada knots and boundaries to stateData. 
+  o studentGrowthPercentiles can now access CSEM by year from stateData if available. 
+  o Consolidated timing messages in studentGrowthPercentiles and studentGrowthProjections at end of function. 
+
 Changes in version: SGP_0.0-8.0
 
   o Implemented more intelligent error handling for missing arguments in studentGrowthPercentiles and studentGrowthProjections passed from analyzeSGP. 

diff --git a/man/SGP-package.Rd b/man/SGP-package.Rd
@@ -19,8 +19,8 @@ are summarized in a variety of ways to describe student growth. As of the 0.0-7.
 \tabular{ll}{
 Package: \tab SGP\cr
 Type: \tab Package\cr
-Version: \tab 0.0-8.0\cr
-Date: \tab 2011-5-5\cr
+Version: \tab 0.0-8.5\cr
+Date: \tab 2011-5-20\cr
 License: \tab CC BY-SA 3.0 US | CC BY-NC-SA 3.0 + file LICENSE\cr
 LazyLoad: \tab yes\cr
 }