version 0.2.0

cran · Aug 10, 2020 · 56e90f1 · 56e90f1
1 parent f50d188
commit 56e90f1
Show file tree

Hide file tree

Showing 13 changed files with 257 additions and 25 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: htestClust
 Title: Reweighted Marginal Hypothesis Tests for Clustered Data
-Version: 0.1.0
+Version: 0.2.0
 Authors@R: c(
     person(given = "Mary", family = "Gregg", role = c("aut", "cre"), email = "megregg07@gmail.com", comment = c(ORCID = "0000-0003-2991-6939")),
     person(given = "Somnath", family = "Datta", role = "aut", comment = c(ORCID = "0000-0003-4381-1842")),
@@ -23,12 +23,12 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.1.0
-Imports: bootstrap, MASS, stats
+Imports: bootstrap, graphics, MASS, stats
 NeedsCompilation: no
-Packaged: 2020-06-26 17:51:02 UTC; konan
+Packaged: 2020-08-07 14:24:05 UTC; konan
 Author: Mary Gregg [aut, cre] (<https://orcid.org/0000-0003-2991-6939>),
   Somnath Datta [aut] (<https://orcid.org/0000-0003-4381-1842>),
   Doug Lorenz [aut] (<https://orcid.org/0000-0001-8114-0926>)
 Maintainer: Mary Gregg <megregg07@gmail.com>
 Repository: CRAN
-Date/Publication: 2020-06-29 09:20:03 UTC
+Date/Publication: 2020-08-10 10:40:02 UTC
diff --git a/MD5 b/MD5
@@ -1,26 +1,28 @@
-3cc60efbfd435613e8be605f86099e4f *DESCRIPTION
+81e5149291da85a12e3283022f82373f *DESCRIPTION
 1e948abc4d712e2f7e19b6504c9c1e43 *LICENSE
-7bafab97bfe489b4dbb8f5d8e96a8ba5 *NAMESPACE
+7694728a3a92d1e5e945a72da7f784c2 *NAMESPACE
 f1d196bc8342ad54095bc27ff7def1b9 *R/chisqtestClust.R
-3fea411b759ac26b5d190bee05f82fa5 *R/cortestClust.R
+b73f2070f0d8d49ea3dc798af5d5a5f1 *R/cortestClust.R
+48b8cd3976a95ac2bec90552f70bb168 *R/icsPlot.R
 a9b38b2dd2b7620a0325b352c19144bc *R/icstestClust.R
 59ba89c00c2f2d61c5feed98895d75d7 *R/levenetestClust.R
 e3309fd113e3929daa79b8deb0a8a5a2 *R/mcnemartestClust.R
 4f3691ddf614428535ce125541296934 *R/onewaytestClust.R
-995e1ee7a7fb3d7f62a13bc0e563636c *R/proptestClust.R
-80a3e81d7bbb530ba17e49e50d9c0bb2 *R/screen8.R
-9a4d464283bb0c999fc7f0a85e607a23 *R/ttestClust.R
+17559bd80d295a2eddba60b2483028b9 *R/proptestClust.R
+dce3cff6b2dc3f72bd7ccdbeaed8ef6b *R/screen8.R
+4f0974235e9777afda858fea522c775c *R/ttestClust.R
 3b65f548f4fc40eaa0f2797d8a59a58e *R/vartestClust.R
 32f99fb28ddd28a6952eb88a81e1a0ae *R/wilcoxtestClust.R
 3d3155cb419d968eb3925051087fb050 *data/screen8.RData
 694cb99e568a7b9c182dc48b8f290818 *man/chisqtestClust.Rd
-48d8b135d31512345f947870ea9ab7bd *man/cortestClust.Rd
+59941499330ffcdee690b37e10c837e4 *man/cortestClust.Rd
+a6c924e402bb0a8cbfedb315f4c4b3ed *man/icsPlot.Rd
 e0cf36c70bd98cbd754f56eb5c81cf22 *man/icstestClust.Rd
 bdc625d1baed30898f337e2bd3f8aa7c *man/levenetestClust.Rd
 7719644cd7376910fce28d28f2445844 *man/mcnemartestClust.Rd
 4902ca15baad88a2d69af43a1d838c84 *man/onewaytestClust.Rd
-ad5ceb662f275e6de8dab9eef5f4c07b *man/proptestClust.Rd
-4d96c81f4610c8fecc3e382b076b737f *man/screen8.Rd
-98611540cc68b04603120cd2d674b973 *man/ttestClust.Rd
+d58b87b9620a3fbe9925a108bb9ed33d *man/proptestClust.Rd
+e78d2207c47e5f76d61727a06f4ed945 *man/screen8.Rd
+6bc7ff469573f1a374ac5022654048e9 *man/ttestClust.Rd
 0b2ec803af4d15c8081423438355fbbe *man/vartestClust.Rd
 81f2d8a25d4e82732498a0761fac231a *man/wilcoxtestClust.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,6 +14,7 @@ S3method(wilcoxtestClust,default)
 S3method(wilcoxtestClust,formula)
 export(chisqtestClust)
 export(cortestClust)
+export(icsPlot)
 export(icstestClust)
 export(levenetestClust)
 export(mcnemartestClust)

diff --git a/R/cortestClust.R b/R/cortestClust.R
@@ -4,7 +4,7 @@
 
 #' Test for Marginal Association Between Paired Clustered Data
 #'
-#' Test for association between paired samples in clustered data with potentially
+#' Test for marginal association between paired samples in clustered data with potentially
 #' informative cluster size.
 #'
 #' @param x,y numeric vectors of data values.
@@ -52,9 +52,10 @@
 #'
 #' @examples
 #' data(screen8)
+#' ## test if math and reading scores are marginally correlated using vectors
 #' cortestClust(screen8$read, screen8$math, screen8$sch.id)
 #'
-#' ## Formula interface.
+#' ## formula interface
 #' cortestClust(~ math + read, sch.id, data=screen8, method="kendall")
 #'
 #' @export

diff --git a/R/icsPlot.R b/R/icsPlot.R
@@ -0,0 +1,151 @@
+#######################################################################
+## Function for plotICS
+##
+## Plot measures of location by cluster size
+## Quantitative  variable: mean, median, SD, variance, IQR, range
+##
+## Categorical variable: barplot
+##
+## Function should operate on vectors (of individual observations) or table
+#######################################################################
+
+#' Test of Marginal Proportion for Clustered Data
+#'
+#' Function to visualize informative cluster size. Plots within-cluster summary statistic from
+#' quantitative variables against the size of each cluster. For categorical variables, a barplot of
+#' category proportions for quantiles of cluster size is produced.
+#'
+#' @param x  vector of data values. Alternatively a two-dimensional table or matrix.
+#' @param id a vector which identifies the clusters, with length equal to length of \code{x}; ignored if \code{x} is a matrix or table.
+#' @param FUN the name of the function that produces the desired intra-cluster summary statistic.
+#' @param breaks a single number giving the number of desired quantiles for the barplot of categorical variables with >2 categories.
+#' @param xlab a label for the x axis, defaults to "cluster size".
+#' @param ylab a label for the y axis, defaults to a description of \code{FUN} of \code{x}.
+#' @param legend a logical indicating whether a legend should be included in a barplot.
+#' @param ... further arguments to be passed to or from methods.
+#'
+#' @details If \code{x} is a matrix or table and \code{x} has exactly two columns, the first column should contain the cluster
+#'  sizes and the second column the respective intra-cluster summary statistic (e.g., mean, variance) that will be plotted
+#'  against cluster size.
+#'
+#'  If \code{x} has more than two columns, the first column is assumed to contain the cluster size
+#'  and the subsequent columns the counts of intra-cluster observations belonging to the different categorical variable levels.
+#'  If there are exactly two categorical levels (e.g., \code{x} has exactly three columns), a scatterplot of the proportion of
+#'  intracluster observations belonging to the first category will be plotted against the cluster size. If the number of
+#'  categories is > 2, a barplot of category proportions against quantiles of cluster size is produced.
+#'
+#'  Standard graphical parameters can be passed to \code{icsPlot} through the \code{...} argument.
+#'
+#' @examples
+#' data(screen8)
+#' ## VECTOR INPUT
+#' ## plot average math score by cluster size
+#' icsPlot(x = screen8$math, id = screen8$sch.id, pch = 20)
+#'
+#' ## plot proportion of females by cluster size
+#' icsPlot(screen8$gender, screen8$sch.id, pch = 20, main = "Female proportion by cluster size")
+#'
+#' ## barchart of activity proportion by quartile of cluster size
+#' icsPlot(x = screen8$activity, id = screen8$sch.id)
+#'
+#' ## TABLE INPUT
+#' ## Plot intra-cluster variance of math score by cluster size
+#' cl.size <- as.numeric(table(screen8$sch.id))
+#' tab1 <- cbind(cl.size, aggregate(screen8$math, list(screen8$sch.id), var)[,2])
+#' colnames(tab1) <- c("cl.size", "variance")
+#' icsPlot(x = tab1, pch = 17, main = "math score variance by cluster size")
+#'
+#' ## barchart of activity proportion across five quantiles of cluster size
+#' tab2 <- cbind(cl.size, table(screen8$sch.id, screen8$activity))
+#' icsPlot(tab2, breaks = 5)
+#'
+#' @export
+
+icsPlot <- function(x, id, FUN = c("mean", "median", "var", "sd", "range", "IQR", "prop"),
+                    breaks, xlab = NULL, ylab = NULL, legend = c(TRUE, FALSE), ...) {
+  ## check validity (length of vectors is same, other checks)
+  if (!missing(breaks) && (!is.finite(breaks) || breaks < 2 ))
+    stop("'breaks' must be a single number > 2")
+  xLAB <- ifelse(!is.null(xlab), paste(xlab), paste("cluster size"))
+
+  ##  TABLE INPUT
+  if (is.table(x) || is.matrix(x)) { ## first column cluster size, second column desired summary
+    ## NUMERIC
+    if (ncol(x)==2) {
+      yLAB <- ifelse(!is.null(ylab), paste(ylab), paste(colnames(x)[2]))
+      graphics::plot(x[,1], x[,2], xlab = xLAB, ylab = yLAB, ...)
+    }
+    ## FACTOR
+    else {## first column cluster size, addition columns are counts from respective clusters across categorical levels
+      if (!(all(x>=0)))
+        stop("elements of x must be nonnegative when plotting categorical variable")
+      tmp.tab <- x[,-1]
+      ## if two levels, do scatter plot of proportion of first value
+      if (ncol(tmp.tab) == 2) {
+        px <- prop.table(tmp.tab, margin=1)[,1]
+        ylab.tmp <- paste("proportion", paste(colnames(tmp.tab)[1]))
+        yLAB <- ifelse(!is.null(ylab), paste(ylab), ylab.tmp)
+        graphics::plot(x[,1], px, xlab = xLAB, ylab = yLAB, ...)
+      }
+      else {
+        ## if > two levels, group into cluster size quartile (or specified number of breaks) and do bar chart
+        if (missing(breaks))
+          breaks <- 4
+        cl.size.cat <- cut(x[,1], breaks = stats::quantile(x[,1], probs=seq(0,1,by=1/breaks), na.rm=TRUE), include.lowest = TRUE,
+                           right = FALSE)
+        tmp <- data.frame(x[,-1], cl.size.cat)
+        count <- stats::aggregate(list(tmp[,1:(ncol(tmp)-1)]), list(tmp[,ncol(tmp)]), sum)
+        tmptab <- apply(count[,2:ncol(count)], 1, function(x) x/sum(x))
+        colnames(tmptab) <- count[,1]
+        yLAB <- ifelse(!is.null(ylab), paste(ylab), "proportion")
+        legend.in <- legend
+        graphics::barplot(as.table(tmptab), legend = legend.in, xlab = xLAB, ylab = yLAB, ...)
+      }
+    }
+  }
+  ## VECTOR INPUT
+  else { ## each value in x and id are an observation
+    if ((l <- length(x)) != length(id))
+      stop("'x' and 'id' must have the same length")
+    FUNin <- match.arg(FUN)
+    cl.size <- as.numeric(table(id))
+    DNAME <- deparse(substitute(x))
+
+    if (is.factor(x)) {
+      if(!missing(FUN) & FUNin!="prop")
+        stop(paste("'FUN = ", paste0(paste(FUN),"'"), "invalid for factors"))
+      nx <- levels(x)
+      ## if two levels, do scatter plot of proportion of first value
+      if (length(nx) == 2) {
+        px <- prop.table(table(id, x), margin=1)[,1]
+        ylab.tmp <- paste("proportion", paste(DNAME, " = ", paste(nx[1])))
+        yLAB <- ifelse(!is.null(ylab), paste(ylab), ylab.tmp)
+        graphics::plot(cl.size, px, xlab = xLAB, ylab = yLAB, ...)
+      }
+      else {
+      ## if > two levels, group into cluster size quartile (or specified number of breaks) and do bar chart
+      if (missing(breaks))
+        breaks <- 4
+      cl.size.exp <- rep(cl.size, cl.size)
+      cl.size.cat <- cut(cl.size.exp, breaks = stats::quantile(cl.size.exp, probs=seq(0,1,by=1/breaks), na.rm=TRUE), include.lowest = TRUE,
+                         right = FALSE)
+      counts <- table(x, cl.size.cat)
+
+      yLAB <- ifelse(!is.null(ylab), paste(ylab), "proportion")
+      legend.in <- legend
+      graphics::barplot(prop.table(counts, margin=2), legend = legend.in, xlab = xLAB, ylab = yLAB, ...)
+      }
+    }
+    else {
+      FUN.tmp <- ifelse(FUNin=="range", "range", "not range")
+      if (FUN.tmp == "range") {
+        tmp <- tapply(x, list(id), FUNin)
+      }
+      sumval <- switch(FUN.tmp, "not range" = tapply(x, list(id), FUNin),
+                       "range" = apply(matrix(unlist(tmp), ncol=2, byrow=T), 1, diff))
+      ylab.tmp <- paste(FUNin, DNAME)
+      yLAB <- ifelse(!is.null(ylab), paste(ylab), ylab.tmp)
+      graphics::plot(cl.size, sumval, xlab = xLAB, ylab = yLAB, ...)
+    }
+  }
+}
diff --git a/R/proptestClust.R b/R/proptestClust.R
@@ -51,7 +51,7 @@
 #' @examples
 #' data(screen8)
 #' ## using vectors
-#' ## math proficientcy determined by score >= 65
+#' ## suppose math proficiency is determined by score >= 65
 #' ## is the marginal proportion of students proficient in math at least 75%?
 #' screen8$math.p <- 1*(screen8$math>=65)
 #' proptestClust(screen8$math.p, screen8$sch.id, p = .75, alternative = "great")
@@ -76,7 +76,7 @@ proptestClust <- function(x, id, p = NULL, alternative = c("two.sided", "less",
     x <- x[stats::complete.cases(x),]
     if (!(all(rowSums(x)>0)))
       stop("all clusters must have counts > 0")
-    if (!(all(x)>=0))
+    if (!(all(x>=0)))
       stop("elements of x must be nonnegative ")
     m <- nrow(x)
     xbar <- x[,2]/rowSums(x)

diff --git a/R/screen8.R b/R/screen8.R
@@ -1,6 +1,6 @@
 #' Example data for informative cluster size
 #'
-#' Simulated hypothetical clustered data created for illustration of functions in the \code{htest.clust} package.
+#' Simulated hypothetical clustered data created for illustration of functions in the \code{htestClust} package.
 #'
 #' @name screen8
 #' @docType data

diff --git a/R/ttestClust.R b/R/ttestClust.R
@@ -55,7 +55,7 @@
 #' ttestClust(x=screen8$math, idx=screen8$sch.id, mu = 70)
 #'
 #' ## paired test
-#' ## Test is marginal math scores have equal mean to marginal reading scores
+#' ## Test equality of marginal means in math and reading scores
 #' ttestClust(x=screen8$math, y=screen8$read, idx=screen8$sch.id, paired=TRUE)
 #'
 #' ## unpaired test

diff --git a/man/cortestClust.Rd b/man/cortestClust.Rd
diff --git a/man/icsPlot.Rd b/man/icsPlot.Rd
diff --git a/man/proptestClust.Rd b/man/proptestClust.Rd
diff --git a/man/screen8.Rd b/man/screen8.Rd