From 25f487bf61ff682c2939f050c734d639d62bfe28 Mon Sep 17 00:00:00 2001
From: Kazutaka Doi <kztkdi@gmail.com>
Date: Mon, 2 Nov 2015 11:31:54 +0000
Subject: [PATCH] version 0.0.4

---
 DESCRIPTION            |  8 ++---
 MD5                    | 42 +++++++++++++++--------
 NAMESPACE              |  7 ++++
 NEWS                   | 10 ++++++
 R/AIC.epifit.R         |  6 ++--
 R/calcAge.R            | 20 +++++++++++
 R/convertNA.R          | 40 ++++++++++++++++++++++
 R/countNA.R            | 30 ++++++++++++++++
 R/epifit-package.R     | 10 ++++--
 R/epifit.R             |  4 +++
 R/extractVariable.R    | 34 +++++++++++++++++++
 R/modules.R            |  8 ++---
 R/print.epifit.R       |  4 +--
 R/pullOneValue.R       | 77 ++++++++++++++++++++++++++++++++++++++++++
 R/pytable.R            |  7 ++--
 R/removeVariable.R     | 33 ++++++++++++++++++
 man/AIC.epifit.Rd      | 10 +++---
 man/calcAge.Rd         | 25 ++++++++++++++
 man/convertNA.Rd       | 33 ++++++++++++++++++
 man/countNA.Rd         | 27 +++++++++++++++
 man/epifit-package.Rd  | 10 ++++--
 man/epifit.Rd          |  4 +++
 man/extractVariable.Rd | 26 ++++++++++++++
 man/print.epifit.Rd    |  2 +-
 man/pullOneValue.Rd    | 26 ++++++++++++++
 man/pytable.Rd         |  9 ++---
 man/removeVariable.Rd  | 31 +++++++++++++++++
 src/select.h           |  4 +--
 28 files changed, 496 insertions(+), 51 deletions(-)
 create mode 100644 R/calcAge.R
 create mode 100644 R/convertNA.R
 create mode 100644 R/countNA.R
 create mode 100644 R/extractVariable.R
 create mode 100644 R/pullOneValue.R
 create mode 100644 R/removeVariable.R
 create mode 100644 man/calcAge.Rd
 create mode 100644 man/convertNA.Rd
 create mode 100644 man/countNA.Rd
 create mode 100644 man/extractVariable.Rd
 create mode 100644 man/pullOneValue.Rd
 create mode 100644 man/removeVariable.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 315d4e9..fc2fe39 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: epifit
 Type: Package
 Title: Flexible Modelling Functions for Epidemiological Data Analysis
-Version: 0.0.3
-Date: 2015-10-01
+Version: 0.0.4
+Date: 2015-11-01
 Author: Kazutaka Doi [aut,cre],
         Kei Sakabe [ctb],
         Masataka Taruri [ctb]
@@ -18,6 +18,6 @@ NeedsCompilation: yes
 ByteCompile: true
 Imports: stats, MASS
 Suggests: survival
-Packaged: 2015-10-01 05:17:08 UTC; kazutaka
+Packaged: 2015-11-02 00:18:26 UTC; kazutaka
 Repository: CRAN
-Date/Publication: 2015-10-01 13:18:48
+Date/Publication: 2015-11-02 11:31:54
diff --git a/MD5 b/MD5
index 861d96c..9204e20 100644
--- a/MD5
+++ b/MD5
@@ -1,17 +1,29 @@
-17e089ae61960affeed3cf70774d53c6 *DESCRIPTION
-844d6a49b62cabc6fd2f30cd09f77c9c *NAMESPACE
-fec108d04c0a0b85b6e06cd62ac0aacc *NEWS
-cf1dac4f20bce5aa8bd60f336637bf49 *R/AIC.epifit.R
-da7c1814ff539a578dbfcefacf000384 *R/epifit-package.R
-9ead0dd58721e9c9805931e02330101b *R/epifit.R
-7d9d4a1c0b9815b4499e8e152ef47141 *R/modules.R
-37ae334b0499226a589d257ef3f52c8d *R/print.epifit.R
-e34d0d507780fee1ef1dfbceb84ab8bb *R/pytable.R
-794302dcda40fab09aeefc1adbc7d0e9 *man/AIC.epifit.Rd
-401433cb87f447517b133bc5cbddb474 *man/epifit-package.Rd
-ccd78ca14885329dd6a6adbfacb3707b *man/epifit.Rd
-43c7fb88046f9a517b7f1198a54fdc06 *man/print.epifit.Rd
-3f553022602d8b5ceb73be02569b1e36 *man/pytable.Rd
+32288f3179fb0440d8bb05d7d96f2c31 *DESCRIPTION
+a699ed81a8e2db7f92bdc7e0ff0bb806 *NAMESPACE
+362daac24dba844ee72e3f1461bea920 *NEWS
+c731dbd4287f76ea58ae568b3d48d854 *R/AIC.epifit.R
+f32940a38c14fd83179553d8faadd087 *R/calcAge.R
+02ee08e34ba9280107c612a2a73e3952 *R/convertNA.R
+f9aa47f0b45af23b6eb222e3fc2209c9 *R/countNA.R
+2d44f2e4f9112198446298c4f9c9981d *R/epifit-package.R
+4234881f5192ff233644e3955d9e9e50 *R/epifit.R
+79f74128ec4c844a22e9ba765d8ac074 *R/extractVariable.R
+d10ee87a519163a91223e3c63d797964 *R/modules.R
+b132e15e4400b329842ec658d23dd409 *R/print.epifit.R
+f19bbdaf163a98894d1c2b8fa8e33d97 *R/pullOneValue.R
+a2ea075936dd35c3facb5e9403c96664 *R/pytable.R
+83ae3b79163122da2738af6d4cf59d88 *R/removeVariable.R
+93c35ed9744ac0a06725220f3426201a *man/AIC.epifit.Rd
+f5049795835ecd9e77bc77b18dc957d4 *man/calcAge.Rd
+6b48b0d2d2e53cba883f81842498721d *man/convertNA.Rd
+e25fdbe2bc9fdcd6fa3b753589530eec *man/countNA.Rd
+0ab2ced76107b8f00318fa90db3e1b39 *man/epifit-package.Rd
+389b6f92e3f6b9561030d92062429bc2 *man/epifit.Rd
+938d7770a1dcb6b47513dedd752fea37 *man/extractVariable.Rd
+766d530d0dee1c69fc3e4b1fd1702cb8 *man/print.epifit.Rd
+72b6190d3f8d5546a761198c6355dcec *man/pullOneValue.Rd
+6b8964041f63404dbbbbae1c346a59c6 *man/pytable.Rd
+1dc9b6b6d926511434d611a23abdcac1 *man/removeVariable.Rd
 e3d3cb360fbfdb3c6974e14eb5f09870 *src/Makevars
 7cdf814e4e9d585eefe5531871ee692e *src/select.cpp
-6312d67a2185d19e4e6a7d917658eeae *src/select.h
+1d57bc1890ad4a58e17a905098a65968 *src/select.h
diff --git a/NAMESPACE b/NAMESPACE
index 640d54d..71278b8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,8 +2,14 @@
 
 S3method(AIC,epifit)
 S3method(print,epifit)
+export(calcAge)
+export(convertNA)
+export(countNA)
 export(epifit)
+export(extractVariable)
+export(pullOneValue)
 export(pytable)
+export(removeVariable)
 importFrom(MASS,ginv)
 importFrom(stats,dbinom)
 importFrom(stats,dgamma)
@@ -21,3 +27,4 @@ importFrom(stats,optim)
 importFrom(stats,pchisq)
 importFrom(stats,pnorm)
 useDynLib(epifit)
+useDynLib(epifit,Rf_select)
diff --git a/NEWS b/NEWS
index 583456c..beb88f3 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,15 @@
 NEWS for the epifit package
 
+---------------------------------------------------------------------
+epifit 0.0.4 (2015/11/01)
+
+* Added removeVariable function
+* Added extractVariable function
+* Added countNA function
+* Added calcAge function
+* Added pullOneValue function
+* Added convertNA function
+
 ---------------------------------------------------------------------
 epifit 0.0.3 (2015/10/01)
 
diff --git a/R/AIC.epifit.R b/R/AIC.epifit.R
index 791cc29..aec7f10 100644
--- a/R/AIC.epifit.R
+++ b/R/AIC.epifit.R
@@ -1,10 +1,10 @@
-##' Function for calculating Akaike's \sQuote{An Information Criterion} (AIC) from epifit object
+##' Function for calculating Akaike's \sQuote{An Information Criterion} (AIC) from epifit object.
 ##'
 ##' Function called from generic function AIC in \pkg{stats} when the argument is epifit object.
-##' @title Akaike's An Information Criterion
-##' @param object a fitted epifit object for which there exists a 
+##' @param object a fitted epifit object.
 ##' @param ... not used in this version, only for compatibility purpose with generic function \code{AIC} currently.
 ##' @param k numeric, the \emph{penalty} per parameter to be used; the default \code{k = 2} is the classical AIC.
+##' @return a numeric AIC value.
 ##' @seealso \code{\link[stats]{AIC}}
 ##' @examples
 ##' library(survival)
diff --git a/R/calcAge.R b/R/calcAge.R
new file mode 100644
index 0000000..6bbb86e
--- /dev/null
+++ b/R/calcAge.R
@@ -0,0 +1,20 @@
+##' Calculate the difference between two date in terms of unit of time.
+##'
+##' This function calculate the difference between two date in terms of unit of time, and age can be obtained when \sQuote{year} is specified as unit argument.
+##' @param birthday a character or character vector specifying birthday or base date.
+##' @param targetdate a character specifying current or target date.
+##' @param unit a character specifying unit for calculating the difference between the two dates. Values of "year", "month" and "day" are supported.
+##' @return a vector of age
+##' @examples calcAge("1963-2-3")
+##' @examples calcAge("1970-1-1", unit="day")
+##' @export
+calcAge <- function(birthday, targetdate=Sys.Date(), unit="year"){
+  sapply(birthday,
+         function(x){
+           tryCatch(
+             {length(seq(as.Date(x), as.Date(targetdate), unit)) - 1},
+             error=function(e){NA})
+         },
+         USE.NAMES=FALSE
+         )
+}
diff --git a/R/convertNA.R b/R/convertNA.R
new file mode 100644
index 0000000..b9e0fc1
--- /dev/null
+++ b/R/convertNA.R
@@ -0,0 +1,40 @@
+##' Convert a character pattern into NA in character and vice versa.
+##'
+##' Convert a character pattern into NA in character and vice versa.
+##' @param data a data.frame to summarize.
+##' @param na.character a character vector specifying missing character.
+##' @param reverse a logical value specifying reverse replacement that NA is replaced with the first element of na.character.
+##' @return a data.frame with NA replacement.
+##' @seealso
+##' \code{\link{countNA}}
+##' @examples
+##' dat <- data.frame(a=c("","2","3"),b=c("4", NA, "."), stringsAsFactors=FALSE)
+##' dat2 <- convertNA(dat)
+##' dat3 <- convertNA(dat2, na.character=".", reverse=TRUE)
+##' dat
+##' dat2
+##' dat3
+##' @export
+convertNA <- function(data=NULL, na.character=c("", "."), reverse=FALSE){
+
+  if (is.null(data) || !is.data.frame(data)) 
+    stop("data is not specified or not data.frame")
+  
+  n <- nrow(data)
+  
+  for(i in 1:ncol(data)){
+    if(!is.character(data[,i]))
+      next
+    for(j in 1:n){
+      if(reverse){
+        if(is.na(data[j,i]))
+           data[j,i] <- na.character[1]
+         } else {
+        if(data[j,i] %in% na.character)
+          data[j,i] <- NA
+      }
+    }
+  }
+  
+  return(data)
+}
diff --git a/R/countNA.R b/R/countNA.R
new file mode 100644
index 0000000..1ad8ab2
--- /dev/null
+++ b/R/countNA.R
@@ -0,0 +1,30 @@
+##' Count NA in variables.
+##'
+##' Count NA, and calculate NA proportion in data.frame.
+##' @param data a data.frame to summarize.
+##' @return a matrix with total data, NA count and NA proportion.
+##' @seealso
+##' \code{\link{convertNA}}
+##' @examples
+##' df <- data.frame(id=1:1000, cov1=rnorm(1000), cov2=runif(1000))
+##' df$cov1 <- ifelse(df$cov1 < 0, NA, df$cov1)
+##' df$cov2 <- ifelse(df$cov2 < 0.2, NA, df$cov2)
+##' countNA(df)
+##' @export
+countNA <- function(data=NULL){
+
+  if (is.null(data) || !is.data.frame(data)) 
+    stop("data is not specified or not data.frame")
+  
+  n <- nrow(data)
+  result <- matrix(n, ncol(data), 3)
+  rownames(result) <- colnames(data)
+  colnames(result) <- c("missing", "total", "percent(%)")
+  
+  for(i in 1:ncol(data)){
+    result[i,1] <- sum(as.integer(is.na(data[,i])))
+    result[i,3] <- result[i,1]/result[i,2]*100
+  }
+  
+  return(result)
+}
diff --git a/R/epifit-package.R b/R/epifit-package.R
index d74b2da..ba85553 100644
--- a/R/epifit-package.R
+++ b/R/epifit-package.R
@@ -1,4 +1,4 @@
-#' Flexible Modelling Functions for Epidemiological Data Analysis
+#' Flexible Modelling Functions for Epidemiological Data Analysis.
 #'
 #' Provides flexible model fitting used in epidemiological data analysis
 #' by a unified model specification, along with some data manipulation functions.
@@ -14,8 +14,14 @@
 #' Author: Kazutaka Doi, Kei Sakabe and Masataka Taguri
 #' Maintainer: Kazutaka Doi \email{kztkdi@@gmail.com}
 #' @seealso
+#' \code{\link{calcAge}},
+#' \code{\link{convertNA}},
+#' \code{\link{countNA}},
 #' \code{\link{epifit}},
-#' \code{\link{pytable}}
+#' \code{\link{extractVariable}},
+#' \code{\link{pullOneValue}},
+#' \code{\link{pytable}},
+#' \code{\link{removeVariable}}
 #' @useDynLib epifit
 #' @importFrom stats dbinom dgamma dnbinom dnorm dpois dweibull integrate na.exclude na.fail na.omit na.pass nlm optim pchisq pnorm
 #' @importFrom MASS ginv
diff --git a/R/epifit.R b/R/epifit.R
index 1d8527d..b453729 100644
--- a/R/epifit.R
+++ b/R/epifit.R
@@ -25,8 +25,12 @@
 #' @param verbatim a integer value from 0 (minimum) to 2 (maximum) controlling the amount of information printed during calculation.
 #' @param ... for the arguments used in the inner functions (currently not used).
 #' @return a list containing the result of model fitting including parameter estimates, variance of parameter estimates, log likelihood and so on.
+#' @useDynLib epifit Rf_select
 #' @references DeLong, D. M., Guirguis, G.H., and So, Y.C. (1994). Efficient computation of subset selection probabilities with application to Cox regression. \emph{Biometrika} \strong{81}, 607-611.
 #' @references Gail, M. H., Lubin, J. H., and Rubinstein, L. V. (1981). Likelihood calculations for matched case-control studies and survival studies with tied death times. \emph{Biometrika} \strong{68}, 703-707.
+#' @seealso
+#' \code{\link{AIC.epifit}},
+#' \code{\link{print.epifit}}
 #' @examples
 #' library(survival)
 #' 
diff --git a/R/extractVariable.R b/R/extractVariable.R
new file mode 100644
index 0000000..b17cb71
--- /dev/null
+++ b/R/extractVariable.R
@@ -0,0 +1,34 @@
+##' Extract variables according to mode from data.frame.
+##'
+##' This function extract variables which match specified mode from data.frame, and make a new data frame.
+##' @param data a data.frame from which numeric variables are extracted.
+##' @param mode a character specifying object type. Object modes of \sQuote{numeric}, \sQuote{character}, \sQuote{factor}, and \sQuote{logical} are supported.
+##' @return a data.frame which includes only specified mode of variables.
+##' @examples
+##' df <- data.frame(id=seq(1,10), str=letters[1:10], fac=factor(seq(1,10)), stringsAsFactors=FALSE)
+##' extractVariable(df)
+##' extractVariable(df, mode="character")
+##' extractVariable(df, mode="factor")
+##' @export
+extractVariable <- function(data=NULL, mode="numeric"){
+
+  if(is.null(data)||!is.data.frame(data))
+    stop("data is not specified or not data.frame")
+
+  idx <- rep(TRUE, ncol(data))
+  
+  funcname <- paste("is.", mode, sep="")
+  if(exists(funcname, mode="function", envir=.BaseNamespaceEnv)){
+    func <- get(funcname, mode="function", envir=.BaseNamespaceEnv)
+  } else {
+    stop("Invalid mode")
+  }
+  
+  for(i in 1:ncol(data)){
+    if(!func(data[,i])){
+      idx[i] = FALSE
+    }
+  }
+  
+  return(data[,idx,drop=FALSE])
+}
diff --git a/R/modules.R b/R/modules.R
index bdf5d0a..4e42803 100644
--- a/R/modules.R
+++ b/R/modules.R
@@ -133,7 +133,7 @@ SolveDependence <- function(vec_depvar, lst_eqnassigned, lst_eqndepend, vec_eqns
           unsolved <- unsolved[unsolved != lst_eqndepend[[i]]]
           resfml <- c(SolveDependence(lst_eqndepend[[i]], lst_eqnassigned, lst_eqndepend, vec_eqns), resfml)
         } else if(length(lst_eqndepend[[i]]) > 1){
-          unsolved <- RemoveVariable(unsolved, lst_eqndepend[[i]])
+          unsolved <- RemoveVariableName(unsolved, lst_eqndepend[[i]])
           resfml <- c(SolveDependence(lst_eqndepend[[i]], lst_eqnassigned, lst_eqndepend, vec_eqns), resfml)
         }
         break
@@ -192,7 +192,7 @@ InnerInsertFormula <- function(psd_target, chr_var, psd_fml){
 }
 
 ## remove some variable names from variable list
-RemoveVariable <- function(varlist, remove){
+RemoveVariableName <- function(varlist, remove){
   flag <- rep(TRUE, length(varlist))
   for(i in 1:length(remove)){
     for(j in 1:length(varlist)){
@@ -301,7 +301,7 @@ GetParamPosition <- function(param, paramlist){
       if(x == paramlist[i])
         return(i)
     }
-  })
+  }, USE.NAMES=FALSE)
 }
 
 ## Make epifit result object from optim function
@@ -538,7 +538,7 @@ LogCoxLikelihood <- function(init, parameters, equations, itereq, envs, time1nam
           } else if(ties=="discrete"){
             
             if(tieevent > 0){
-              phazard[tiebegin] <- prod(phazard[tiebegin:(tiebegin+tieevent-1)])/.Call("Rf_select", tieevent, nsubject-tiebegin+1, phazard[tiebegin:nsubject])
+              phazard[tiebegin] <- prod(phazard[tiebegin:(tiebegin+tieevent-1)])/.Call(Rf_select, tieevent, nsubject-tiebegin+1, phazard[tiebegin:nsubject])
               status[tiebegin] <- 1 # regard as event
               riskset[tiebegin] <- 1
               status[(tiebegin+1):(i-1)] <- 0
diff --git a/R/print.epifit.R b/R/print.epifit.R
index 0d7d684..24354ab 100644
--- a/R/print.epifit.R
+++ b/R/print.epifit.R
@@ -1,7 +1,7 @@
-##' Print function for epifit object
+##' Print function for epifit object.
 ##'
 ##' This function print result of function \code{\link{epifit}}
-##' 
+##'
 ##' @param x Object of class \code{epifit}.
 ##' @param digits a non-null value for digits specifies the minimum number of significant digits to be printed in values. The default,  uses \code{max(\link[base:options]{getOption}}(digits - 4, 3)).
 ##' @param ... Further arguments passed to or from other methods.
diff --git a/R/pullOneValue.R b/R/pullOneValue.R
new file mode 100644
index 0000000..780ad80
--- /dev/null
+++ b/R/pullOneValue.R
@@ -0,0 +1,77 @@
+##' Pull one set of values from variables included in data which are thought to include essentially the same information.
+##'
+##' When some part of the data is missing, the missing information may be recovered from another source of information. This function scans all variables which are thought to include essentially the same information, and pulls value which is not missing.
+##' @param data a data.frame in which variables are included
+##' @param varlist a character vector of variable list which is assumed to contain the same information.
+##' @param check a logical value specifying whether to scan all variables in varlist for incompatible values or not.
+##' @return a vector including one set of values obtained from varlist.
+##' @examples
+##' dat <- data.frame(a1=c(NA,2,3), a2=c(1,NA,2), a3=c(1,2,NA), b=c(10,11,20))
+##' pullOneValue(dat, c("a1", "a2", "a3"))
+##' @export
+pullOneValue <- function(data=NULL, varlist=c(""), check=TRUE){
+
+  res <- NULL
+  
+  if(is.null(data) || !is.data.frame(data))
+    stop("data must be data.frame")
+  
+  allvar <- colnames(data)
+  
+  for(i in 1:length(varlist)){
+    if(!varlist[i] %in% allvar)
+      stop("at least one variable in varlist is not included in data")
+  }
+  
+  idx <- GetParamPosition(varlist, allvar)
+  
+  if(length(idx) < 2)
+    return(data[,idx])
+  
+  n <- nrow(data)
+  
+  if(is.numeric(data[,idx[1]])){
+    res <- as.numeric(rep(NA, n))
+    for(i in 2:length(idx)){
+      if(!is.numeric(data[,idx[i]]))
+        stop("mode of variables are different")
+    }
+  } else if(is.character(data[,idx[1]])){
+    res <- as.character(rep(NA, n))
+    for(i in 2:length(idx)){
+      if(!is.character(data[,idx[i]]))
+        stop("mode of variables are different")
+    }
+  } else if(is.factor(data[,idx[1]])){
+    res <- as.factor(rep(NA, n))
+    levels(res) <- levels(data[, idx[1]])
+    for(i in 2:length(idx)){
+      if(!is.factor(data[,idx[i]]))
+        stop("mode of variables are different")
+    }
+    warning("factor is not fully supported")
+  } else {
+    stop("unsupported variable type")
+  }
+
+  for(i in 1:n){
+    for(j in idx){
+      
+      if(!is.na(data[i,j]) && !is.nan(data[i,j])){
+        if(is.na(res[i])){
+          res[i] <- data[i,j]
+          if(!check)
+            next
+        } else {
+          if(res[i] != data[i,j]){
+            cat("Imcompatible data exists in row:", i,"\n")
+            for(j in idx){
+              cat(allvar[j], " = ", data[i,j], "\n")
+            }
+          }
+        }
+      }
+    }
+  }
+  return(res)
+}
diff --git a/R/pytable.R b/R/pytable.R
index 4739d3c..07f39d1 100644
--- a/R/pytable.R
+++ b/R/pytable.R
@@ -1,10 +1,9 @@
 #' Make person-year table from individual data
 #'
 #' This function creates the person-years table from event, time and covariate data. The number of event and time of some observations with the same covariate data are summed up, and made into one observation.
-#' @title Make person-year table from individual data
-#' @param event a vector specifying number of event
-#' @param time a vector specifying time variable
-#' @param cov vector or matrix or data.frame of covariates
+#' @param event a vector specifying number of event.
+#' @param time a vector specifying time variable.
+#' @param cov vector or matrix or data.frame of covariates.
 #' @param scale a scaling for person-year. The value of 365.25 will make person-year table from time variable recoded as days.
 #' @return a person-year data.frame
 #' @examples
diff --git a/R/removeVariable.R b/R/removeVariable.R
new file mode 100644
index 0000000..bf95b1f
--- /dev/null
+++ b/R/removeVariable.R
@@ -0,0 +1,33 @@
+##' Remove variables containing NaNs or NAs from data.frames.
+##'
+##' In reading data, incomplete variables are sometimes included, and only NAs or NaNs are included in some variables.
+##' This function removes such variables in the data.frame. In addition to NAs or NaNs, variables which contain specified keyword can also be removed. 
+##' @param data a data.frame from which variables are removed.
+##' @param string a flag character specifying variables to be removed.
+##' @param na.prop a numeric value specifying NA threshold proportion for removing variables. The default threashold is 1.0, meaning that variables including only NAs are removed.
+##' @param nan.prop a numeric value specifying NaN threshold proportion for removing variables. The default threashold is 1.0, meaning that variables including only NaNs are removed.
+##' @return a data.frame from which some variables are removed.
+##' @examples
+##' df <- data.frame(imcomp=rep(NA, 10), imcomp2=rep(NaN, 10), cov1=rnorm(10), NO_USE=rnorm(10))
+##' df2 <- removeVariable(df, string="NO_USE")
+##' str(df)
+##' str(df2)
+##' @export
+removeVariable <- function(data=NULL, string=NA, na.prop=1.0, nan.prop=1.0){
+
+  if(is.null(data)||!is.data.frame(data))
+    stop("data is not specified or not data.frame")
+
+  n <- nrow(data)
+  varname <- colnames(data)
+  idx <- rep(TRUE, length(varname))
+
+  for(i in 1:length(varname)){
+    if(length(grep(string, varname[i])) > 0 ||
+       sum(as.integer(is.na(data[,i]))) >= na.prop*n ||
+       sum(as.integer(is.nan(data[,i]))) >= nan.prop*n){
+      idx[i] = FALSE
+    }
+  }
+  return(data[,idx])
+}
diff --git a/man/AIC.epifit.Rd b/man/AIC.epifit.Rd
index ab3370a..ec2b49c 100644
--- a/man/AIC.epifit.Rd
+++ b/man/AIC.epifit.Rd
@@ -2,21 +2,21 @@
 % Please edit documentation in R/AIC.epifit.R
 \name{AIC.epifit}
 \alias{AIC.epifit}
-\title{Akaike's An Information Criterion}
+\title{Function for calculating Akaike's \sQuote{An Information Criterion} (AIC) from epifit object.}
 \usage{
 \method{AIC}{epifit}(object, ..., k = 2)
 }
 \arguments{
-\item{object}{a fitted epifit object for which there exists a}
+\item{object}{a fitted epifit object.}
 
 \item{...}{not used in this version, only for compatibility purpose with generic function \code{AIC} currently.}
 
 \item{k}{numeric, the \emph{penalty} per parameter to be used; the default \code{k = 2} is the classical AIC.}
 }
-\description{
-Function for calculating Akaike's \sQuote{An Information Criterion} (AIC) from epifit object
+\value{
+a numeric AIC value.
 }
-\details{
+\description{
 Function called from generic function AIC in \pkg{stats} when the argument is epifit object.
 }
 \examples{
diff --git a/man/calcAge.Rd b/man/calcAge.Rd
new file mode 100644
index 0000000..16c7dc7
--- /dev/null
+++ b/man/calcAge.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/calcAge.R
+\name{calcAge}
+\alias{calcAge}
+\title{Calculate the difference between two date in terms of unit of time.}
+\usage{
+calcAge(birthday, targetdate = Sys.Date(), unit = "year")
+}
+\arguments{
+\item{birthday}{a character or character vector specifying birthday or base date.}
+
+\item{targetdate}{a character specifying current or target date.}
+
+\item{unit}{a character specifying unit for calculating the difference between the two dates. Values of "year", "month" and "day" are supported.}
+}
+\value{
+a vector of age
+}
+\description{
+This function calculate the difference between two date in terms of unit of time, and age can be obtained when \sQuote{year} is specified as unit argument.
+}
+\examples{
+calcAge("1963-2-3")
+}
+
diff --git a/man/convertNA.Rd b/man/convertNA.Rd
new file mode 100644
index 0000000..20cf384
--- /dev/null
+++ b/man/convertNA.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/convertNA.R
+\name{convertNA}
+\alias{convertNA}
+\title{Convert a character pattern into NA in character and vice versa.}
+\usage{
+convertNA(data = NULL, na.character = c("", "."), reverse = FALSE)
+}
+\arguments{
+\item{data}{a data.frame to summarize.}
+
+\item{na.character}{a character vector specifying missing character.}
+
+\item{reverse}{a logical value specifying reverse replacement that NA is replaced with the first element of na.character.}
+}
+\value{
+a data.frame with NA replacement.
+}
+\description{
+Convert a character pattern into NA in character and vice versa.
+}
+\examples{
+dat <- data.frame(a=c("","2","3"),b=c("4", NA, "."), stringsAsFactors=FALSE)
+dat2 <- convertNA(dat)
+dat3 <- convertNA(dat2, na.character=".", reverse=TRUE)
+dat
+dat2
+dat3
+}
+\seealso{
+\code{\link{countNA}}
+}
+
diff --git a/man/countNA.Rd b/man/countNA.Rd
new file mode 100644
index 0000000..807abf8
--- /dev/null
+++ b/man/countNA.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/countNA.R
+\name{countNA}
+\alias{countNA}
+\title{Count NA in variables.}
+\usage{
+countNA(data = NULL)
+}
+\arguments{
+\item{data}{a data.frame to summarize.}
+}
+\value{
+a matrix with total data, NA count and NA proportion.
+}
+\description{
+Count NA, and calculate NA proportion in data.frame.
+}
+\examples{
+df <- data.frame(id=1:1000, cov1=rnorm(1000), cov2=runif(1000))
+df$cov1 <- ifelse(df$cov1 < 0, NA, df$cov1)
+df$cov2 <- ifelse(df$cov2 < 0.2, NA, df$cov2)
+countNA(df)
+}
+\seealso{
+\code{\link{convertNA}}
+}
+
diff --git a/man/epifit-package.Rd b/man/epifit-package.Rd
index 5612e0b..d81ff34 100644
--- a/man/epifit-package.Rd
+++ b/man/epifit-package.Rd
@@ -4,7 +4,7 @@
 \name{epifit-package}
 \alias{epifit-package}
 \alias{epifit-pkg}
-\title{Flexible Modelling Functions for Epidemiological Data Analysis}
+\title{Flexible Modelling Functions for Epidemiological Data Analysis.}
 \description{
 Provides flexible model fitting used in epidemiological data analysis
 by a unified model specification, along with some data manipulation functions.
@@ -18,8 +18,14 @@ Author: Kazutaka Doi, Kei Sakabe and Masataka Taguri
 Maintainer: Kazutaka Doi \email{kztkdi@gmail.com}
 }
 \seealso{
+\code{\link{calcAge}},
+\code{\link{convertNA}},
+\code{\link{countNA}},
 \code{\link{epifit}},
-\code{\link{pytable}}
+\code{\link{extractVariable}},
+\code{\link{pullOneValue}},
+\code{\link{pytable}},
+\code{\link{removeVariable}}
 }
 \keyword{models}
 
diff --git a/man/epifit.Rd b/man/epifit.Rd
index 9ce6e8e..bd5f2d6 100644
--- a/man/epifit.Rd
+++ b/man/epifit.Rd
@@ -140,4 +140,8 @@ DeLong, D. M., Guirguis, G.H., and So, Y.C. (1994). Efficient computation of sub
 
 Gail, M. H., Lubin, J. H., and Rubinstein, L. V. (1981). Likelihood calculations for matched case-control studies and survival studies with tied death times. \emph{Biometrika} \strong{68}, 703-707.
 }
+\seealso{
+\code{\link{AIC.epifit}},
+\code{\link{print.epifit}}
+}
 
diff --git a/man/extractVariable.Rd b/man/extractVariable.Rd
new file mode 100644
index 0000000..dbc7cde
--- /dev/null
+++ b/man/extractVariable.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/extractVariable.R
+\name{extractVariable}
+\alias{extractVariable}
+\title{Extract variables according to mode from data.frame.}
+\usage{
+extractVariable(data = NULL, mode = "numeric")
+}
+\arguments{
+\item{data}{a data.frame from which numeric variables are extracted.}
+
+\item{mode}{a character specifying object type. Object modes of \sQuote{numeric}, \sQuote{character}, \sQuote{factor}, and \sQuote{logical} are supported.}
+}
+\value{
+a data.frame which includes only specified mode of variables.
+}
+\description{
+This function extract variables which match specified mode from data.frame, and make a new data frame.
+}
+\examples{
+df <- data.frame(id=seq(1,10), str=letters[1:10], fac=factor(seq(1,10)), stringsAsFactors=FALSE)
+extractVariable(df)
+extractVariable(df, mode="character")
+extractVariable(df, mode="factor")
+}
+
diff --git a/man/print.epifit.Rd b/man/print.epifit.Rd
index c24663e..d313356 100644
--- a/man/print.epifit.Rd
+++ b/man/print.epifit.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/print.epifit.R
 \name{print.epifit}
 \alias{print.epifit}
-\title{Print function for epifit object}
+\title{Print function for epifit object.}
 \usage{
 \method{print}{epifit}(x, digits = max(options()$digits - 4, 3), ...)
 }
diff --git a/man/pullOneValue.Rd b/man/pullOneValue.Rd
new file mode 100644
index 0000000..9366d03
--- /dev/null
+++ b/man/pullOneValue.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/pullOneValue.R
+\name{pullOneValue}
+\alias{pullOneValue}
+\title{Pull one set of values from variables included in data which are thought to include essentially the same information.}
+\usage{
+pullOneValue(data = NULL, varlist = c(""), check = TRUE)
+}
+\arguments{
+\item{data}{a data.frame in which variables are included}
+
+\item{varlist}{a character vector of variable list which is assumed to contain the same information.}
+
+\item{check}{a logical value specifying whether to scan all variables in varlist for incompatible values or not.}
+}
+\value{
+a vector including one set of values obtained from varlist.
+}
+\description{
+When some part of the data is missing, the missing information may be recovered from another source of information. This function scans all variables which are thought to include essentially the same information, and pulls value which is not missing.
+}
+\examples{
+dat <- data.frame(a1=c(NA,2,3), a2=c(1,NA,2), a3=c(1,2,NA), b=c(10,11,20))
+pullOneValue(dat, c("a1", "a2", "a3"))
+}
+
diff --git a/man/pytable.Rd b/man/pytable.Rd
index 4a43c01..7c1464a 100644
--- a/man/pytable.Rd
+++ b/man/pytable.Rd
@@ -7,11 +7,11 @@
 pytable(event, time, cov, scale = 1)
 }
 \arguments{
-\item{event}{a vector specifying number of event}
+\item{event}{a vector specifying number of event.}
 
-\item{time}{a vector specifying time variable}
+\item{time}{a vector specifying time variable.}
 
-\item{cov}{vector or matrix or data.frame of covariates}
+\item{cov}{vector or matrix or data.frame of covariates.}
 
 \item{scale}{a scaling for person-year. The value of 365.25 will make person-year table from time variable recoded as days.}
 }
@@ -19,9 +19,6 @@ pytable(event, time, cov, scale = 1)
 a person-year data.frame
 }
 \description{
-Make person-year table from individual data
-}
-\details{
 This function creates the person-years table from event, time and covariate data. The number of event and time of some observations with the same covariate data are summed up, and made into one observation.
 }
 \examples{
diff --git a/man/removeVariable.Rd b/man/removeVariable.Rd
new file mode 100644
index 0000000..e46a363
--- /dev/null
+++ b/man/removeVariable.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/removeVariable.R
+\name{removeVariable}
+\alias{removeVariable}
+\title{Remove variables containing NaNs or NAs from data.frames.}
+\usage{
+removeVariable(data = NULL, string = NA, na.prop = 1, nan.prop = 1)
+}
+\arguments{
+\item{data}{a data.frame from which variables are removed.}
+
+\item{string}{a flag character specifying variables to be removed.}
+
+\item{na.prop}{a numeric value specifying NA threshold proportion for removing variables. The default threashold is 1.0, meaning that variables including only NAs are removed.}
+
+\item{nan.prop}{a numeric value specifying NaN threshold proportion for removing variables. The default threashold is 1.0, meaning that variables including only NaNs are removed.}
+}
+\value{
+a data.frame from which some variables are removed.
+}
+\description{
+In reading data, incomplete variables are sometimes included, and only NAs or NaNs are included in some variables.
+This function removes such variables in the data.frame. In addition to NAs or NaNs, variables which contain specified keyword can also be removed.
+}
+\examples{
+df <- data.frame(imcomp=rep(NA, 10), imcomp2=rep(NaN, 10), cov1=rnorm(10), NO_USE=rnorm(10))
+df2 <- removeVariable(df, string="NO_USE")
+str(df)
+str(df2)
+}
+
diff --git a/src/select.h b/src/select.h
index b474568..4e9e243 100644
--- a/src/select.h
+++ b/src/select.h
@@ -1,4 +1,2 @@
 double inner_select(int m, int n, SEXP hazard);
-extern "C"{
-  SEXP Rf_select(SEXP m, SEXP n, SEXP hazard);
-}
+extern "C" SEXP Rf_select(SEXP m, SEXP n, SEXP hazard);