Skip to content

Commit

Permalink
Massive changes to geneorama
Browse files Browse the repository at this point in the history
Using R oxygen2 now for documentation
Took out several functions
Added tests and a test suite
Changed imports and dependencies
  • Loading branch information
geneorama committed Sep 5, 2014
1 parent 3efb11f commit fcf66f5
Show file tree
Hide file tree
Showing 112 changed files with 3,652 additions and 1,871 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.Rproj.user
.Rhistory
.RData
19 changes: 0 additions & 19 deletions .project

This file was deleted.

3 changes: 0 additions & 3 deletions .settings/de.walware.r.core.prefs

This file was deleted.

20 changes: 9 additions & 11 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
Package: geneorama
Type: Package
Title: Gene's collection of miscellaneous functions
Version: 1.3
Version: 1.4
Date: 2012-01-30
Author: Gene Leynes
Maintainer: Gene Leynes <rpackage@geneorama.com>
Enhances: PBSmodelling, fImport, gtools, zoo
Depends: data.table
Imports: data.table
Description: This is an assortment of utilities that I personally use on a
regular basis. Documentation is minimal at this point.
License: LGPL (>= 2.0, < 3) | Mozilla Public License
LazyLoad: yes



Depends:
R (>= 3.0.0),
data.table,
RUnit
Description: This is an assortment of convenience utilities and documentation of
lessons learned (mostly the hard way) in R. See FAQ.R in the doc
folder for "lessons learned".
License: LGPL (>= 2.0, < 3) | Mozilla Public License + file LICENSE
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
exportPattern("^[[:alpha:]]+")
import(data.table)
import(RUnit)
77 changes: 68 additions & 9 deletions R/NAsummary.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,84 @@
NAsummary <-
function(df, include.nan=FALSE){
newdf = data.frame(
col=1:ncol(df),
Count =nrow(df),
nNA = sapply(df,function(x)length(x[is.na(x)]))
)
#' @name NAsummary
#' @title Summarize NA values in a matrix or data.frame (or data.table)
#' @author Gene Leynes
#'
#' @param df A data.frame to be summarized
#' @param include_nan Do you want to also see Nan's along with NA's
#' Defaults to FALSE
#'
#' @description
#' Summarize the available information in a data.frame (or similar) quickly
#'
#' @details
#' For a data frame, data.table, or matrix this function creates a summary
#' of how many NA's and unique values there are in each column. Useful for
#' a quick summary of how complete your data is, and can be useful to pass
#' to other commands; for example to remove columns that only have one
#' unique value.
#'
#' @examples
#' ## Create an example data frame:
#' set.seed(100)
#' df <- data.frame(ID = 1:30,
#' col1 = sample(c(rep(NA, 5), rpois(25, 7))),
#' col2 = sample(letters[1:5], replace=TRUE),
#' col3 = sample(letters[1:5], replace=TRUE),
#' col4 = NA,
#' col5 = "OneValue",
#' col6 = sample(letters[1:5], replace=TRUE))
#' ## Create a summary:
#' MySummary <- NAsummary(df)
#' MySummary
#' ## Columns with only one unique value:
#' rownames(MySummary)[MySummary$nUnique == 1]
#' ## Columns with 100% NA values:
#' rownames(MySummary)[MySummary$rNA == 1]
#'
#'


NAsummary <-function(df, include_nan=FALSE){
newdf = data.frame(col=1:ncol(df),
Count =nrow(df),
nNA = sapply(df,function(x)length(x[is.na(x)])))

newdf$rNA = newdf$nNA / newdf$Count
newdf$rNA = trunc(newdf$rNA*10000)/10000

if(include.nan){
if(include_nan){
newdf$nNan = sapply(df,function(x)length(x[is.nan(x)]))
newdf$rNan = newdf$nNan / newdf$Count
newdf$rNan = trunc(newdf$rNan*10000)/10000
}

newdf$nUnique = sapply(df,function(x)length(unique(x)))
newdf$nUnique = sapply(df,function(x)length(unique(x)))

newdf$rUnique = newdf$nUnique / newdf$Count
newdf$rUnique = trunc(newdf$rUnique*10000)/10000

rownames(newdf) = colnames(df)
return(newdf)
}


if(FALSE){
rm(list=ls())
require(geneorama)
detach_nonstandard_packages()
source('R/NAsummary.R')
set.seed(100)
df <- data.frame(ID = 1:30,
col1 = sample(c(rep(NA, 5), rpois(25, 7))),
col2 = sample(letters[1:5], replace=TRUE),
col3 = sample(letters[1:5], replace=TRUE),
col4 = NA,
col5 = "OneValue",
col6 = sample(letters[1:5], replace=TRUE))
MySummary <- NAsummary(df)
MySummary
## Columns with only one unique value:
rownames(MySummary)[MySummary$nUnique == 1]
## Columns with 100% NA values:
rownames(MySummary)[MySummary$rNA == 1]
}

134 changes: 107 additions & 27 deletions R/ReadAssign.R
Original file line number Diff line number Diff line change
@@ -1,34 +1,114 @@

##
## Read an Rds file and assign the value to the global environment with a
## the name implied from the filename
##

##
## 2014-04-01 SJR Updated to take a path and filename
## 2014-05-05 GWL Adding function to geneorama package
## 2014-05-05 GWL Adding $ to gsub pattern argument
##

## Usage:
## MyExampleData <- data.frame(x=letters, y=rnorm(length(letters)))
## saveRDS(MyExampleData, file="MyExampleData.Rds")
## rm(MyExampleData)
## ReadAssign("MyExampleData.Rds")
## ls()
## unlink("MyExampleData.Rds"); rm(MyExampleData) ## Clean up
#' @name ReadAssign
#' @title Read in a list of files, and assign them to variables based on the
#' filename (or return as a list)
#' @author Gene Leynes and Scott Rodgers
#'
#' @param x file name, and source of the future object name
#' @param assignGlobal Assign the object to the global space or return
#' invisibly? Default is TRUE, which assigns the
#' object globally.
#'
#' @description
#' Read an Rds file and assign the value to the global environment with a
#' the name implied from the filename
#'
#' @details
#' This function will open a serialized R object and assign the object to the
#' name implied by the object. For example a matrix saved as
#' "Results/Yhat867.Rds" will be loaded and put into a matrix named
#' "Yhat867" if you call \code{ReadAssign("Results/Yhat867.Rds")}.
#'
#' This function is particularly valuable when
#' you use file names that match the object names you would use for the
#' saved objects.
#' For example, imagine that in a folder "results" you had 1,000 .Rds files
#' named Vec0001.Rds, Vec0002.Rds ... up to Vec1000.Rds, and imagine that
#' each file had one vector of length 60. If you wanted to load Vec200.Rds
#' to Vec299.Rds into memory you could get get the file names using
#' \code{list.files}: \code{MyFiles <- list.files('results/', full.names=T,
#' pattern = "vec2[0-9][0-9].Rds")}. Then you could load those vectors
#' into variables in the global enviornment using ReadAssign:
#' \code{sapply(MyFiles, ReadAssign)}
#' Since these happen to be the same length, you could also put them all
#' into a matrix using this syntax:
#' \code{MyMatrix <- sapply(MyFiles, ReadAssign, assignGlobal = F)}
#'
#' This pattern is useful when you 1) Use file names that match your
#' object names 2) Have many files / objects and want to relaod all / some
#' of them at different times.
#'
#' @note
#' 2014-04-01 SJR Updated to take a path and filename
#'
#' 2014-05-05 GWL Adding function to geneorama package
#'
#' 2014-05-05 GWL Adding $ to gsub pattern argument
#'
#' @examples
#' require(geneorama)
#' ##----------------------------------------------------------------------
#' ## Create example data, save it, then load it from disk using ReadAssign
#' ##----------------------------------------------------------------------
#' set.seed(27)
#' ## Generate Data
#' MyExampleData <- data.frame(x=letters, y=rnorm(length(letters)))
#' saveRDS(MyExampleData, file="MyExampleData.Rds")
#' rm(MyExampleData)
#' ## MyExampleData" is no longer in the environment
#' ls()
#' ## Now reload "MyExampleData"
#' ReadAssign("MyExampleData.Rds")
#' ls()
#' ## Clean up
#' unlink("MyExampleData.Rds")
#' rm(MyExampleData)
#'
#'
#' \dontrun{
#' ##----------------------------------------------------------------------
#' ## Example of making 600 files, and reloading 100 of them
#' ##----------------------------------------------------------------------
#' mat <- matrix(rnorm(60000), 100, 600)
#' for(i in 1:ncol(mat)){
#' fp <- file.path("ReadAssignExampleData",
#' paste0("vec", sprintf("%03.f", i),".Rds"))
#' saveRDS(object = mat[,i],
#' file = fp)
#' }
#' mat_recovered <- sapply(list.files('ReadAssignExampleData/',
#' full.names=T,
#' pattern = "vec3[0-9][0-9].Rds"),
#' ReadAssign, assignGlobal = F)
#' head(mat[,300:310])
#' head(mat_recovered[,1:11])
#' ## Clean Up
#' unlink("ReadAssignExampleData")
#' rm(mat, mat_recovered)
#' }
#'

ReadAssign <- function(x, assignGlobal = TRUE){
dat <- readRDS(x)
objname <- gsub("\\.Rds$", "", basename(x), ignore.case=TRUE)
if(assignGlobal){
assign(x=objname, value=dat, pos=.GlobalEnv)
invisible(objname)
} else {
return(dat)
}
dat <- readRDS(x)
objname <- gsub("\\.[Rr][Dd][Ss]$", "", basename(x), ignore.case=TRUE)
if(assignGlobal){
assign(x=objname, value=dat, pos=.GlobalEnv)
invisible(objname)
} else {
return(dat)
}
}


if(FALSE){
rm(list=ls())
source("R/ReadAssign.R")
source('tests/test.ReadAssign.R')
test.ReadAssign()
}







33 changes: 33 additions & 0 deletions R/TestGeneorama.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#' @name TestGeneorama
#' @title TestGeneorama
#' @author Gene Leynes
#'
#' @description
#' Run tests on the geneorama package to make sure the functions are
#' working.
#'
#' @details
#' Depends on RUnit
#'
#' @examples
#' \dontrun{
#' library(geneorama)
#' TestGeneorama()
#' }
#'


TestGeneorama <- function(){

# require(RUnit)

testsuite1 <- defineTestSuite(
"Geneorama main test suite",
# dirs = file.path(.path.package(package="RUnit"), "examples"),
dirs = file.path(".", "RTests"),
testFileRegexp = "^test.+\\.[rR]",
testFuncRegexp = "^test.+")

testResult <- runTestSuite(testsuite1)
printTextProtocol(testResult)
}
Loading

0 comments on commit fcf66f5

Please sign in to comment.