initial commit

hrbrmstr · Mar 3, 2017 · 380b69d · 380b69d
1 parent 821023a
commit 380b69d
Show file tree

Hide file tree

Showing 31 changed files with 1,088 additions and 12 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,3 @@
+^.*\.Rproj$
+^\.Rproj\.user$
+^README\.Rmd$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,26 @@
 Package: scimple
-Title: scimple TITLE
+Title: Tidy Simultaneous Confidence Intervals for Multinomial Proportions
 Version: 0.1.0
 Encoding: UTF-8
-Authors@R: c(person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is"))
+Authors@R: c(
+      person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is"),
+      person("M", "Subbiah", role = c("aut"), comment = c("Original package author"))
+    )
 Maintainer: Bob Rudis <bob@rud.is>
-Description: scimple DESCRIPTION
-Depends: R (>= 3.2.0)
-License: AGPL
+Description: Methods for obtaining simultaneous confidence intervals for multinomial 
+    proportions have been proposed by many authors and the present study include a variety 
+    of widely applicable procedures. Seven classical methods (Wilson, Quesenberry and 
+    Hurst, Goodman, Wald with and without continuity correction, Fitzpatrick and Scott, 
+    Sison and Glaz) and Bayesian Dirichlet models are included in the package. The 
+    advantage of MCMC pack has been exploited to derive the Dirichlet posterior directly 
+    and this also helps in handling the Dirichlet prior parameters. This package is 
+    prepared to have equal and unequal values for the Dirichlet prior distribution that 
+    will provide better scope for data analysis and associated sensitivity analysis.
+Depends: R (>= 3.2.0), MCMCpack
+License: GPL-2
 URL: https://github.com/hrbrmstr/scimple
 BugReports: https://github.com/hrbrmstr/scimple/issues
 LazyData: true
 Suggests: testthat
-Imports: purrr
+Imports: dplyr, tibble, stats, purrr
+RoxygenNote: 6.0.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,19 @@
+# Generated by roxygen2: do not edit by hand
+
+export(scimp_bmde)
+export(scimp_bmdu)
+export(scimp_fs)
+export(scimp_goodman)
+export(scimp_qh)
+export(scimp_sg)
+export(scimp_wald)
+export(scimp_waldcc)
+export(scimp_wilson)
+export(scimple_ci)
+import(MCMCpack)
+import(stats)
+import(tibble)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
+importFrom(purrr,map)
+importFrom(purrr,map_df)
diff --git a/R/aaa.r b/R/aaa.r
@@ -0,0 +1,156 @@
+SG <- function(x,alpha) {
+  t1=proc.time()
+
+  sgp=function(c) {
+    s=sum(x)  ##SUM(Cell_Counts)
+    k=length(x)
+    b= x-c
+    a= x+c
+    ###FINDING FACTORIAL MOMENTS-TRUNCATED POISSON
+    fm1=0
+    fm2=0
+    fm3=0
+    fm4=0
+    for (i in 1:k)
+    {
+      fm1[i]=x[i]*(ppois(a[i]-1,x[i])-ppois(b[i]-2,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
+      fm2[i]=x[i]^2*(ppois(a[i]-2,x[i])-ppois(b[i]-3,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
+      fm3[i]=x[i]^3*(ppois(a[i]-3,x[i])-ppois(b[i]-4,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
+      fm4[i]=x[i]^4*(ppois(a[i]-4,x[i])-ppois(b[i]-5,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
+    }
+
+    ##FINDING CENTRAL MOMENTS-TRUNCATED POISSON
+    m1=0
+    m2=0
+    m3=0
+    m4=0
+    m4t=0
+    for (i in 1:k)
+    {
+      m1[i]=fm1[i]
+      m2[i]=fm2[i]+fm1[i]-(fm1[i]*fm1[i])
+      m3[i]=fm3[i]+fm2[i]*(3-(3*fm1[i]))+(fm1[i]-(3*fm1[i]*fm1[i])+(2*fm1[i]^3))
+      m4[i]=fm4[i]+fm3[i]*(6-(4*fm1[i]))+fm2[i]*(7-(12*fm1[i])+(6*fm1[i]^2))+fm1[i]-(4*fm1[i]^2)+(6*fm1[i]^3)-(3*fm1[i]^4)
+      m4t[i] = m4[i]-(3*m2[i]^2)#Temporary Variable for next step
+    }
+    s1=sum(m1)
+    s2=sum(m2)
+    s3=sum(m3)
+    s4=sum(m4t)
+    ##FINDING GAMMAS ---> EDGEWORTH EXPANSION
+    g1=s3/(s2^(3/2))
+    g2=s4/(s2^2)
+
+    ##FINDING CHEBYSHEV-HERMITE POLYNOMIALS ---> EDGEWORTH EXPANSION
+    z=(s-s1)/sqrt(s2)
+    z2=z^2
+    z3=z^3
+    z4=z^4
+    z6=z^6
+    poly=1+g1*(z3-(3*z))/6+g2*(z4-(6*z2)+3)/24+(g1^2)*(z6-(15*z4)+(45*z2)-15)/72
+    f=poly*exp(-z2/2)/sqrt(2*pi)
+
+    ##FINDING PROBABILITY FUNCTION BASED ON 'c'
+    pc=0
+    for (i in 1:k) pc[i] <- ppois(a[i],x[i])-ppois(b[i]-1,x[i])
+    pcp = prod(pc) #PRODUCT OF pc THAT HAS k ELEMENTS
+    pps = 1/dpois(s,s)#POISSON PROB FOR s WITH PARAMETER AS s
+    rp=pps*pcp*f/sqrt(s2)##REQUIRED PROBABILITY
+    rp
+  }
+  proc.time()-t1
+  t=proc.time()
+  y=0
+  s=sum(x)
+  M1=1
+  M2=s
+  c=M1:M2
+  M=length(c)
+
+  for (i in 1:M) y[i] <- round(sgp(c[i]), 4)
+
+  j=1
+  vc=0
+  while(j<=M){
+    if (y[j]<1-alpha && 1-alpha < y[j+1])
+      vc=j  else
+        vc=vc
+      j = j+1
+  }
+
+  # vc##REQUIRED VALUE OF C
+  delta <- ((1-alpha)-y[vc])/(y[vc+1]-y[vc])
+
+  ##FINDING LIMITS
+  sp <- x/s#SAMPLE PROPORTION
+  LL <- round(sp-(vc/s),4)#LOWER LIMIT
+  UL <- round(sp+(vc/s)+(2*delta/s),4)#UPPER LIMIT
+  LLA <- ULA <- 0
+  for (r in 1:length(x)) {
+    if ( LL [r]< 0) LLA[r] = 0 else LLA[r]=LL[r]
+    if (UL[r] > 1) ULA[r] = 1 else ULA[r]=UL[r]
+  }
+  diA <- ULA-LLA##FIND LENGTH OF CIs
+  VOL <- round(prod(diA),8)##PRODUCT OF LENGTH OF CIs
+
+  data_frame(
+    method = "sg",
+    lower_limit = LL,
+    upper_limit = UL,
+    adj_ll = LLA,
+    adj_ul = ULA,
+    volume = VOL
+  )
+
+}
+
+
+BMDU <- function(x, d, seed=1492) {
+
+  set.seed(seed)
+
+  k <- length(x)
+
+  for(m in 1:k) {
+    if(x[m]<0) { warning('Arguments must be non-negative integers') }
+  }
+
+  if(d>=1 && d<=k) {
+    m=0
+    l=0
+    u=0
+    diff=0
+    s=sum(x)
+    s1=floor(k/d)
+    d1=runif(s1,0,1)###First half of the vector
+    d2=runif(k-s1,1,2)###Second half of the vector
+    a=c(d1,d2)
+    p=x+a###Prior for Dirichlet
+    dr=rdirichlet(10000, p)###Posterior
+    for(j in 1:k) {
+      l[j]=round(quantile(dr[,j],0.025),4)###Lower Limit
+      u[j]=round(quantile(dr[,j],0.975),4)###Upper Limit
+      m[j]=round(mean(dr[,j]),4)###Point Estimate
+      diff[j]=u[j]-l[j]
+    }
+
+    data_frame(
+      method = "bmdu",
+      lower_limit = l,
+      upper_limit = m,
+      volume = prod(diff),
+      mean = m
+    )
+
+  } else {
+    warning('Size of the division should be less than the size of the input matrix')
+    data_frame(
+      method = "bmdu",
+      lower_limit = l,
+      upper_limit = m,
+      volume = prod(diff),
+      mean = m
+    )
+  }
+
+}
diff --git a/R/bmde.r b/R/bmde.r
@@ -0,0 +1,46 @@
+#' Bayesian Multinomial Dirichlet Model (Equal Prior)
+#'
+#' This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups.
+#'
+#' @md
+#' @param x cell counts of given contingency table corresponding to a categorical data - non negative integers
+#' @param p equal value for the Dirichlet prior parameter - positive real number
+#' @param seed random seed for reproducible results
+#' @return `tibble` with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean
+#' @author Dr M Subbiah
+#' @references Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London.
+#' @export
+#' @examples
+#' y <- c(44, 55, 43, 32, 67, 78)
+#' z <- 1
+#' scimp_bmde(y, z)
+scimp_bmde <- function(x, p, seed=1492) {
+
+  k <- length(x)
+  n_r <- 10000
+  for(m in 1:k) {
+    if(x[m]<0) {warning('Arguments must be non-negative integers') }
+  }
+
+  set.seed(seed)
+
+  po <- x+p
+  dr <- rdirichlet(n_r,po)
+  a <- l <- u <- dif <- 0
+
+  for(j in 1:k) {
+    a[j] <- round(mean(dr[,j]), 4)
+    l[j] <- round(quantile(dr[,j], 0.025),4)
+    u[j] <- round(quantile(dr[,j], 0.975),4)
+    dif[j] <- u[j] - l[j]
+  }
+
+  data_frame(
+    method = "bmde",
+    lower_limit = l,
+    upper_limit = u,
+    volume = prod(dif),
+    mean = a
+  )
+
+}
diff --git a/R/bmdu.r b/R/bmdu.r
@@ -0,0 +1,19 @@
+#' Bayesian Multinomial Dirichlet Model (Unequal Prior)
+#'
+#' This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups.
+#'
+#' @md
+#' @param x cell counts of given contingency table corresponding to a categorical data - non negative integers
+#' @param d number of divisions required to split the prior vector of Dirichlet distribution to assign unequal values from U(0,1) and U(1,2)
+#' @param seed random seed for reproducible results
+#' @return `tibble` with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean
+#' @author Dr M Subbiah
+#' @references Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London.
+#' @export
+#' @examples
+#' y <- c(44, 55, 43, 32, 67, 78)
+#' z <- 2
+#' scimp_bmdu(y, z)
+scimp_bmdu <- function(x, d, seed=1492) {
+  return(BMDU(x, d, seed))
+}
diff --git a/R/fitz-scott.r b/R/fitz-scott.r
@@ -0,0 +1,47 @@
+#' Fitzpatrick and Scott Confidence Interval
+#'
+#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Fitzpatrick and Scott (1987)
+#'
+#' @md
+#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
+#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
+#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
+#' @author Dr M Subbiah
+#' @references Fitzpatrick, S. and Scott, A. (1987). Quick simultaneous confidence interval for multinomial proportions. Journal of American Statistical Association 82(399): 875-878.
+#' @export
+#' @examples
+#' y <- c(44, 55, 43, 32, 67, 78)
+#' z <- 0.05
+#' scimp_fs(y, z)
+scimp_fs <- function(inpmat, alpha) {
+
+  k <- length(inpmat)
+  s <-  sum(inpmat)
+  zval <- abs(qnorm(1 - (alpha/2)))
+  pi <- inpmat/s
+
+  fs_ll <- pi - (zval / (2 * sqrt(s)))
+  fs_ul <- pi + (zval / (2 * sqrt(s)))
+
+  adj_ll <- adj_ul <- 0
+
+  for (r in 1:length(inpmat)) {
+    if (fs_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- fs_ll[r]
+    if (fs_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- fs_ul[r]
+  }
+
+  ci_length <- adj_ul - adj_ll
+  volume <- round(prod(ci_length), 8)
+
+  data_frame(
+    method = "fs",
+    lower_limit = fs_ll,
+    upper_limit = fs_ul,
+    adj_ll = adj_ll,
+    adj_ul = adj_ul,
+    volume = volume
+  ) -> ret
+
+  ret
+
+}
diff --git a/R/goodman.r b/R/goodman.r
@@ -0,0 +1,47 @@
+#' Goodman Confidence Interval
+#'
+#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Goodman (1965)
+#'
+#' @md
+#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
+#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
+#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
+#' @author Dr M Subbiah
+#' @references Goodman, L.A. (1965). On Simultaneous Confidence Intervals for Multinomial Proportions. Technometrics 7: 247-254.
+#' @export
+#' @examples
+#' y <- c(44, 55, 43, 32, 67, 78)
+#' z <- 0.05
+#' scimp_goodman(y, z)
+scimp_goodman <- function(inpmat, alpha) {
+
+  k <- length(inpmat)
+  s <- sum(inpmat)
+  chi <- qchisq(1 - (alpha/k), df = 1)
+  pi <- inpmat/s
+
+  goodman_ul <- (chi + 2*inpmat + sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
+  goodman_ll <- (chi + 2*inpmat - sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
+
+  adj_ll <- adj_ul <- 0
+
+  for (r in 1:length(inpmat)) {
+    if (goodman_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- goodman_ll[r]
+    if (goodman_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- goodman_ul[r]
+  }
+
+  ci_length <- adj_ul - adj_ll
+  volume <- round(prod(ci_length), 8)
+
+  data_frame(
+    method = "goodman",
+    lower_limit = goodman_ll,
+    upper_limit = goodman_ul,
+    adj_ll = adj_ll,
+    adj_ul = adj_ul,
+    volume = volume
+  ) -> ret
+
+  ret
+
+}
diff --git a/R/package.r b/R/package.r
@@ -1,7 +1,19 @@
-#' Tools to ...
+#' Simultaneous Confidence Intervals for Multinomial Proportions
+#'
+#' Methods for obtaining simultaneous confidence intervals for multinomial proportions have
+#' been proposed by many authors and the present study include a variety of widely
+#' applicable procedures. Seven classical methods (Wilson, Quesenberry and Hurst, Goodman,
+#' Wald with and without continuity correction, Fitzpatrick and Scott, Sison and Glaz)
+#' and Bayesian Dirichlet models are included in the package. The advantage of MCMC pack
+#' has been exploited to derive the Dirichlet posterior directly and this also helps in
+#' handling the Dirichlet prior parameters. This package is prepared to have equal and
+#' unequal values for the Dirichlet prior distribution that will provide better scope for
+#' data analysis and associated sensitivity analysis.
 #'
 #' @name scimple
 #' @docType package
-#' @author Bob Rudis (bob@@rud.is)
-#' @import purrr
+#' @author Dr M.Subbiah [primary], Bob Rudis (bob@@rud.is) [tidy version]
+#' @import tibble stats MCMCpack
+#' @importFrom dplyr mutate select
+#' @importFrom purrr map map_df
 NULL