Massive changes to geneorama

Using R oxygen2 now for documentation Took out several functions Added tests and a test suite Changed imports and dependencies
geneorama · Sep 5, 2014 · fcf66f5 · fcf66f5
1 parent 3efb11f
commit fcf66f5
Show file tree

Hide file tree

Showing 112 changed files with 3,652 additions and 1,871 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.Rproj.user
+.Rhistory
+.RData
diff --git a/.project b/.project
diff --git a/.settings/de.walware.r.core.prefs b/.settings/de.walware.r.core.prefs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,17 +1,15 @@
 Package: geneorama
 Type: Package
 Title: Gene's collection of miscellaneous functions
-Version: 1.3
+Version: 1.4
 Date: 2012-01-30
 Author: Gene Leynes
 Maintainer: Gene Leynes <rpackage@geneorama.com>
-Enhances: PBSmodelling, fImport, gtools, zoo
-Depends: data.table
-Imports: data.table
-Description: This is an assortment of utilities that I personally use on a 
-             regular basis. Documentation is minimal at this point.
-License: LGPL (>= 2.0, < 3) | Mozilla Public License
-LazyLoad: yes
-
-
-
+Depends:
+    R (>= 3.0.0),
+    data.table,
+    RUnit
+Description: This is an assortment of convenience utilities and documentation of
+    lessons learned (mostly the hard way) in R.  See FAQ.R in the doc
+    folder for "lessons learned".
+License: LGPL (>= 2.0, < 3) | Mozilla Public License + file LICENSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -1 +1,3 @@
 exportPattern("^[[:alpha:]]+")
+import(data.table)
+import(RUnit)
diff --git a/R/NAsummary.R b/R/NAsummary.R
@@ -1,25 +1,84 @@
-NAsummary <-
-function(df, include.nan=FALSE){
-	newdf = data.frame(
-			col=1:ncol(df),
-			Count =nrow(df),
-			nNA = sapply(df,function(x)length(x[is.na(x)]))
-	)
+#' @name   NAsummary
+#' @title  Summarize NA values in a matrix or data.frame (or data.table)
+#' @author Gene Leynes
+#' 
+#' @param df            A data.frame to be summarized
+#' @param include_nan   Do you want to also see Nan's along with NA's
+#'                      Defaults to FALSE
+#'
+#' @description
+#' 		Summarize the available information in a data.frame (or similar) quickly
+#'
+#' @details
+#' 		For a data frame, data.table, or matrix this function creates a summary
+#' 		of how many NA's and unique values there are in each column.  Useful for
+#' 		a quick summary of how complete your data is, and can be useful to pass
+#' 		to other commands; for example to remove columns that only have one
+#' 		unique value.
+#'
+#' @examples
+#'  ## Create an example data frame:
+#' 	set.seed(100)
+#' 	df <- data.frame(ID = 1:30,
+#' 					 col1 = sample(c(rep(NA, 5), rpois(25, 7))),
+#' 					 col2 = sample(letters[1:5], replace=TRUE),
+#' 					 col3 = sample(letters[1:5], replace=TRUE),
+#' 					 col4 = NA,
+#' 					 col5 = "OneValue",
+#' 					 col6 = sample(letters[1:5], replace=TRUE))
+#' 	## Create a summary:
+#' 	MySummary <- NAsummary(df)
+#' 	MySummary
+#' 	## Columns with only one unique value:
+#' 	rownames(MySummary)[MySummary$nUnique == 1]
+#' 	## Columns with 100% NA values:
+#' 	rownames(MySummary)[MySummary$rNA == 1]
+#'
+#'
+
+
+NAsummary <-function(df, include_nan=FALSE){
+	newdf = data.frame(col=1:ncol(df),
+					   Count =nrow(df),
+					   nNA = sapply(df,function(x)length(x[is.na(x)])))
 
 	newdf$rNA = newdf$nNA / newdf$Count
 	newdf$rNA = trunc(newdf$rNA*10000)/10000
 
-	if(include.nan){
+	if(include_nan){
 		newdf$nNan = sapply(df,function(x)length(x[is.nan(x)]))
 		newdf$rNan = newdf$nNan / newdf$Count
 		newdf$rNan = trunc(newdf$rNan*10000)/10000
 	}
 
-	newdf$nUnique = sapply(df,function(x)length(unique(x))) 
+	newdf$nUnique = sapply(df,function(x)length(unique(x)))
 
 	newdf$rUnique = newdf$nUnique / newdf$Count
 	newdf$rUnique = trunc(newdf$rUnique*10000)/10000
 
 	rownames(newdf) = colnames(df)
 	return(newdf)
 }
+
+
+if(FALSE){
+	rm(list=ls())
+	require(geneorama)
+	detach_nonstandard_packages()
+	source('R/NAsummary.R')
+	set.seed(100)
+	df <- data.frame(ID = 1:30,
+					 col1 = sample(c(rep(NA, 5), rpois(25, 7))),
+					 col2 = sample(letters[1:5], replace=TRUE),
+					 col3 = sample(letters[1:5], replace=TRUE),
+					 col4 = NA,
+					 col5 = "OneValue",
+					 col6 = sample(letters[1:5], replace=TRUE))
+	MySummary <- NAsummary(df)
+	MySummary
+	## Columns with only one unique value:
+	rownames(MySummary)[MySummary$nUnique == 1]
+	## Columns with 100% NA values:
+	rownames(MySummary)[MySummary$rNA == 1]
+}
+
diff --git a/R/ReadAssign.R b/R/ReadAssign.R
@@ -1,34 +1,114 @@
-
-##
-## Read an Rds file and assign the value to the global environment with a
-## the name implied from the filename
-##
-
-##
-## 2014-04-01   SJR   Updated to take a path and filename
-## 2014-05-05   GWL   Adding function to geneorama package
-## 2014-05-05   GWL   Adding $ to gsub pattern argument
-##
-
-## Usage:
-##     MyExampleData <- data.frame(x=letters, y=rnorm(length(letters)))
-##     saveRDS(MyExampleData, file="MyExampleData.Rds")
-##     rm(MyExampleData)
-##     ReadAssign("MyExampleData.Rds")
-##     ls()
-##     unlink("MyExampleData.Rds"); rm(MyExampleData)  ## Clean up
+#' @name   ReadAssign
+#' @title  Read in a list of files, and assign them to variables based on the
+#'         filename (or return as a list)
+#' @author Gene Leynes and Scott Rodgers
+#' 
+#' @param x               file name, and source of the future object name
+#' @param assignGlobal    Assign the object to the global space or return
+#'                        invisibly?  Default is TRUE, which assigns the 
+#'                        object globally.
+#'
+#' @description
+#' 		Read an Rds file and assign the value to the global environment with a
+#'		the name implied from the filename
+#'
+#' @details
+#' 		This function will open a serialized R object and assign the object to the
+#' 		name implied by the object.  For example a matrix saved as
+#' 		"Results/Yhat867.Rds" will be loaded and put into a matrix named
+#' 		"Yhat867" if you call \code{ReadAssign("Results/Yhat867.Rds")}.
+#'
+#' 		This function is particularly valuable when
+#' 		you use file names that match the object names you would use for the
+#' 		saved objects.
+#' 		For example, imagine that in a folder "results" you had 1,000 .Rds files
+#' 		named Vec0001.Rds, Vec0002.Rds ... up to Vec1000.Rds, and imagine that
+#' 		each file had one vector of length 60.  If you wanted to load Vec200.Rds
+#' 		to Vec299.Rds into memory you could get get the file names using
+#' 		\code{list.files}: \code{MyFiles <- list.files('results/', full.names=T,
+#' 		pattern = "vec2[0-9][0-9].Rds")}.  Then you could load those vectors
+#' 		into variables in the global enviornment using ReadAssign:
+#' 		\code{sapply(MyFiles, ReadAssign)}
+#' 		Since these happen to be the same length, you could also put them all
+#' 		into a matrix using this syntax:
+#' 		\code{MyMatrix <- sapply(MyFiles, ReadAssign, assignGlobal = F)}
+#'
+#' 		This pattern is useful when you 1) Use file names that match your
+#' 		object names 2) Have many files / objects and want to relaod all / some
+#' 		of them at different times.
+#'
+#' @note
+#' 		2014-04-01   SJR   Updated to take a path and filename
+#'
+#' 		2014-05-05   GWL   Adding function to geneorama package
+#'
+#' 		2014-05-05   GWL   Adding $ to gsub pattern argument
+#'
+#' @examples
+#' 		require(geneorama)
+#'		##----------------------------------------------------------------------
+#'		## Create example data, save it, then load it from disk using ReadAssign
+#'		##----------------------------------------------------------------------
+#' 		set.seed(27)
+#' 		## Generate Data
+#' 		MyExampleData <- data.frame(x=letters, y=rnorm(length(letters)))
+#' 		saveRDS(MyExampleData, file="MyExampleData.Rds")
+#' 		rm(MyExampleData)
+#' 		## MyExampleData" is no longer in the environment
+#' 		ls()
+#' 		## Now reload "MyExampleData"
+#' 		ReadAssign("MyExampleData.Rds")
+#' 		ls()
+#' 		## Clean up
+#' 		unlink("MyExampleData.Rds")
+#' 		rm(MyExampleData)
+#'
+#'
+#' 		\dontrun{
+#'		##----------------------------------------------------------------------
+#' 		## Example of making 600 files, and reloading 100 of them
+#'		##----------------------------------------------------------------------
+#'		mat <- matrix(rnorm(60000), 100, 600)
+#'      for(i in 1:ncol(mat)){
+#'          fp <-  file.path("ReadAssignExampleData",
+#'                           paste0("vec", sprintf("%03.f", i),".Rds"))
+#'          saveRDS(object = mat[,i],
+#'                  file = fp)
+#'      }
+#'      mat_recovered <- sapply(list.files('ReadAssignExampleData/',
+#'                                         full.names=T,
+#'                                         pattern = "vec3[0-9][0-9].Rds"),
+#'                              ReadAssign, assignGlobal = F)
+#' 		head(mat[,300:310])
+#' 		head(mat_recovered[,1:11])
+#' 		## Clean Up
+#' 		unlink("ReadAssignExampleData")
+#' 		rm(mat, mat_recovered)
+#' 		}
+#'
 
 ReadAssign <- function(x, assignGlobal = TRUE){
-    dat <- readRDS(x)
-    objname <- gsub("\\.Rds$", "", basename(x), ignore.case=TRUE)
-    if(assignGlobal){
-        assign(x=objname, value=dat, pos=.GlobalEnv)
-        invisible(objname)
-    } else {
-        return(dat)
-    }
+	dat <- readRDS(x)
+	objname <- gsub("\\.[Rr][Dd][Ss]$", "", basename(x), ignore.case=TRUE)
+	if(assignGlobal){
+		assign(x=objname, value=dat, pos=.GlobalEnv)
+		invisible(objname)
+	} else {
+		return(dat)
+	}
+}
+
+
+if(FALSE){
+	rm(list=ls())
+	source("R/ReadAssign.R")
+	source('tests/test.ReadAssign.R')
+	test.ReadAssign()
 }
 
 
 
 
+
+
+
diff --git a/R/TestGeneorama.R b/R/TestGeneorama.R
@@ -0,0 +1,33 @@
+#' @name   TestGeneorama
+#' @title  TestGeneorama
+#' @author Gene Leynes
+#'
+#' @description
+#' 		Run tests on the geneorama package to make sure the functions are
+#' 		working.
+#'
+#' @details
+#' 		Depends on RUnit
+#'
+#' @examples
+#' 		\dontrun{
+#' 		library(geneorama)
+#' 		TestGeneorama()
+#' 		}
+#'
+
+
+TestGeneorama <- function(){
+
+	# require(RUnit)
+
+	testsuite1 <- defineTestSuite(
+		"Geneorama main test suite",
+		# dirs = file.path(.path.package(package="RUnit"), "examples"),
+		dirs = file.path(".", "RTests"),
+		testFileRegexp = "^test.+\\.[rR]",
+		testFuncRegexp = "^test.+")
+
+	testResult <- runTestSuite(testsuite1)
+	printTextProtocol(testResult)
+}