Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
hrbrmstr committed Mar 3, 2017
1 parent 821023a commit 380b69d
Show file tree
Hide file tree
Showing 31 changed files with 1,088 additions and 12 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
@@ -0,0 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
^README\.Rmd$
24 changes: 18 additions & 6 deletions DESCRIPTION
@@ -1,14 +1,26 @@
Package: scimple
Title: scimple TITLE
Title: Tidy Simultaneous Confidence Intervals for Multinomial Proportions
Version: 0.1.0
Encoding: UTF-8
Authors@R: c(person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is"))
Authors@R: c(
person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is"),
person("M", "Subbiah", role = c("aut"), comment = c("Original package author"))
)
Maintainer: Bob Rudis <bob@rud.is>
Description: scimple DESCRIPTION
Depends: R (>= 3.2.0)
License: AGPL
Description: Methods for obtaining simultaneous confidence intervals for multinomial
proportions have been proposed by many authors and the present study include a variety
of widely applicable procedures. Seven classical methods (Wilson, Quesenberry and
Hurst, Goodman, Wald with and without continuity correction, Fitzpatrick and Scott,
Sison and Glaz) and Bayesian Dirichlet models are included in the package. The
advantage of MCMC pack has been exploited to derive the Dirichlet posterior directly
and this also helps in handling the Dirichlet prior parameters. This package is
prepared to have equal and unequal values for the Dirichlet prior distribution that
will provide better scope for data analysis and associated sensitivity analysis.
Depends: R (>= 3.2.0), MCMCpack
License: GPL-2
URL: https://github.com/hrbrmstr/scimple
BugReports: https://github.com/hrbrmstr/scimple/issues
LazyData: true
Suggests: testthat
Imports: purrr
Imports: dplyr, tibble, stats, purrr
RoxygenNote: 6.0.1
19 changes: 19 additions & 0 deletions NAMESPACE
@@ -0,0 +1,19 @@
# Generated by roxygen2: do not edit by hand

export(scimp_bmde)
export(scimp_bmdu)
export(scimp_fs)
export(scimp_goodman)
export(scimp_qh)
export(scimp_sg)
export(scimp_wald)
export(scimp_waldcc)
export(scimp_wilson)
export(scimple_ci)
import(MCMCpack)
import(stats)
import(tibble)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(purrr,map)
importFrom(purrr,map_df)
156 changes: 156 additions & 0 deletions R/aaa.r
@@ -0,0 +1,156 @@
SG <- function(x,alpha) {
t1=proc.time()

sgp=function(c) {
s=sum(x) ##SUM(Cell_Counts)
k=length(x)
b= x-c
a= x+c
###FINDING FACTORIAL MOMENTS-TRUNCATED POISSON
fm1=0
fm2=0
fm3=0
fm4=0
for (i in 1:k)
{
fm1[i]=x[i]*(ppois(a[i]-1,x[i])-ppois(b[i]-2,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
fm2[i]=x[i]^2*(ppois(a[i]-2,x[i])-ppois(b[i]-3,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
fm3[i]=x[i]^3*(ppois(a[i]-3,x[i])-ppois(b[i]-4,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
fm4[i]=x[i]^4*(ppois(a[i]-4,x[i])-ppois(b[i]-5,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
}

##FINDING CENTRAL MOMENTS-TRUNCATED POISSON
m1=0
m2=0
m3=0
m4=0
m4t=0
for (i in 1:k)
{
m1[i]=fm1[i]
m2[i]=fm2[i]+fm1[i]-(fm1[i]*fm1[i])
m3[i]=fm3[i]+fm2[i]*(3-(3*fm1[i]))+(fm1[i]-(3*fm1[i]*fm1[i])+(2*fm1[i]^3))
m4[i]=fm4[i]+fm3[i]*(6-(4*fm1[i]))+fm2[i]*(7-(12*fm1[i])+(6*fm1[i]^2))+fm1[i]-(4*fm1[i]^2)+(6*fm1[i]^3)-(3*fm1[i]^4)
m4t[i] = m4[i]-(3*m2[i]^2)#Temporary Variable for next step
}
s1=sum(m1)
s2=sum(m2)
s3=sum(m3)
s4=sum(m4t)
##FINDING GAMMAS ---> EDGEWORTH EXPANSION
g1=s3/(s2^(3/2))
g2=s4/(s2^2)

##FINDING CHEBYSHEV-HERMITE POLYNOMIALS ---> EDGEWORTH EXPANSION
z=(s-s1)/sqrt(s2)
z2=z^2
z3=z^3
z4=z^4
z6=z^6
poly=1+g1*(z3-(3*z))/6+g2*(z4-(6*z2)+3)/24+(g1^2)*(z6-(15*z4)+(45*z2)-15)/72
f=poly*exp(-z2/2)/sqrt(2*pi)

##FINDING PROBABILITY FUNCTION BASED ON 'c'
pc=0
for (i in 1:k) pc[i] <- ppois(a[i],x[i])-ppois(b[i]-1,x[i])
pcp = prod(pc) #PRODUCT OF pc THAT HAS k ELEMENTS
pps = 1/dpois(s,s)#POISSON PROB FOR s WITH PARAMETER AS s
rp=pps*pcp*f/sqrt(s2)##REQUIRED PROBABILITY
rp
}
proc.time()-t1
t=proc.time()
y=0
s=sum(x)
M1=1
M2=s
c=M1:M2
M=length(c)

for (i in 1:M) y[i] <- round(sgp(c[i]), 4)

j=1
vc=0
while(j<=M){
if (y[j]<1-alpha && 1-alpha < y[j+1])
vc=j else
vc=vc
j = j+1
}

# vc##REQUIRED VALUE OF C
delta <- ((1-alpha)-y[vc])/(y[vc+1]-y[vc])

##FINDING LIMITS
sp <- x/s#SAMPLE PROPORTION
LL <- round(sp-(vc/s),4)#LOWER LIMIT
UL <- round(sp+(vc/s)+(2*delta/s),4)#UPPER LIMIT
LLA <- ULA <- 0
for (r in 1:length(x)) {
if ( LL [r]< 0) LLA[r] = 0 else LLA[r]=LL[r]
if (UL[r] > 1) ULA[r] = 1 else ULA[r]=UL[r]
}
diA <- ULA-LLA##FIND LENGTH OF CIs
VOL <- round(prod(diA),8)##PRODUCT OF LENGTH OF CIs

data_frame(
method = "sg",
lower_limit = LL,
upper_limit = UL,
adj_ll = LLA,
adj_ul = ULA,
volume = VOL
)

}


BMDU <- function(x, d, seed=1492) {

set.seed(seed)

k <- length(x)

for(m in 1:k) {
if(x[m]<0) { warning('Arguments must be non-negative integers') }
}

if(d>=1 && d<=k) {
m=0
l=0
u=0
diff=0
s=sum(x)
s1=floor(k/d)
d1=runif(s1,0,1)###First half of the vector
d2=runif(k-s1,1,2)###Second half of the vector
a=c(d1,d2)
p=x+a###Prior for Dirichlet
dr=rdirichlet(10000, p)###Posterior
for(j in 1:k) {
l[j]=round(quantile(dr[,j],0.025),4)###Lower Limit
u[j]=round(quantile(dr[,j],0.975),4)###Upper Limit
m[j]=round(mean(dr[,j]),4)###Point Estimate
diff[j]=u[j]-l[j]
}

data_frame(
method = "bmdu",
lower_limit = l,
upper_limit = m,
volume = prod(diff),
mean = m
)

} else {
warning('Size of the division should be less than the size of the input matrix')
data_frame(
method = "bmdu",
lower_limit = l,
upper_limit = m,
volume = prod(diff),
mean = m
)
}

}
46 changes: 46 additions & 0 deletions R/bmde.r
@@ -0,0 +1,46 @@
#' Bayesian Multinomial Dirichlet Model (Equal Prior)
#'
#' This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups.
#'
#' @md
#' @param x cell counts of given contingency table corresponding to a categorical data - non negative integers
#' @param p equal value for the Dirichlet prior parameter - positive real number
#' @param seed random seed for reproducible results
#' @return `tibble` with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean
#' @author Dr M Subbiah
#' @references Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 1
#' scimp_bmde(y, z)
scimp_bmde <- function(x, p, seed=1492) {

k <- length(x)
n_r <- 10000
for(m in 1:k) {
if(x[m]<0) {warning('Arguments must be non-negative integers') }
}

set.seed(seed)

po <- x+p
dr <- rdirichlet(n_r,po)
a <- l <- u <- dif <- 0

for(j in 1:k) {
a[j] <- round(mean(dr[,j]), 4)
l[j] <- round(quantile(dr[,j], 0.025),4)
u[j] <- round(quantile(dr[,j], 0.975),4)
dif[j] <- u[j] - l[j]
}

data_frame(
method = "bmde",
lower_limit = l,
upper_limit = u,
volume = prod(dif),
mean = a
)

}
19 changes: 19 additions & 0 deletions R/bmdu.r
@@ -0,0 +1,19 @@
#' Bayesian Multinomial Dirichlet Model (Unequal Prior)
#'
#' This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups.
#'
#' @md
#' @param x cell counts of given contingency table corresponding to a categorical data - non negative integers
#' @param d number of divisions required to split the prior vector of Dirichlet distribution to assign unequal values from U(0,1) and U(1,2)
#' @param seed random seed for reproducible results
#' @return `tibble` with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean
#' @author Dr M Subbiah
#' @references Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 2
#' scimp_bmdu(y, z)
scimp_bmdu <- function(x, d, seed=1492) {
return(BMDU(x, d, seed))
}
47 changes: 47 additions & 0 deletions R/fitz-scott.r
@@ -0,0 +1,47 @@
#' Fitzpatrick and Scott Confidence Interval
#'
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Fitzpatrick and Scott (1987)
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @references Fitzpatrick, S. and Scott, A. (1987). Quick simultaneous confidence interval for multinomial proportions. Journal of American Statistical Association 82(399): 875-878.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_fs(y, z)
scimp_fs <- function(inpmat, alpha) {

k <- length(inpmat)
s <- sum(inpmat)
zval <- abs(qnorm(1 - (alpha/2)))
pi <- inpmat/s

fs_ll <- pi - (zval / (2 * sqrt(s)))
fs_ul <- pi + (zval / (2 * sqrt(s)))

adj_ll <- adj_ul <- 0

for (r in 1:length(inpmat)) {
if (fs_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- fs_ll[r]
if (fs_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- fs_ul[r]
}

ci_length <- adj_ul - adj_ll
volume <- round(prod(ci_length), 8)

data_frame(
method = "fs",
lower_limit = fs_ll,
upper_limit = fs_ul,
adj_ll = adj_ll,
adj_ul = adj_ul,
volume = volume
) -> ret

ret

}
47 changes: 47 additions & 0 deletions R/goodman.r
@@ -0,0 +1,47 @@
#' Goodman Confidence Interval
#'
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Goodman (1965)
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @references Goodman, L.A. (1965). On Simultaneous Confidence Intervals for Multinomial Proportions. Technometrics 7: 247-254.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_goodman(y, z)
scimp_goodman <- function(inpmat, alpha) {

k <- length(inpmat)
s <- sum(inpmat)
chi <- qchisq(1 - (alpha/k), df = 1)
pi <- inpmat/s

goodman_ul <- (chi + 2*inpmat + sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
goodman_ll <- (chi + 2*inpmat - sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))

adj_ll <- adj_ul <- 0

for (r in 1:length(inpmat)) {
if (goodman_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- goodman_ll[r]
if (goodman_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- goodman_ul[r]
}

ci_length <- adj_ul - adj_ll
volume <- round(prod(ci_length), 8)

data_frame(
method = "goodman",
lower_limit = goodman_ll,
upper_limit = goodman_ul,
adj_ll = adj_ll,
adj_ul = adj_ul,
volume = volume
) -> ret

ret

}
18 changes: 15 additions & 3 deletions R/package.r
@@ -1,7 +1,19 @@
#' Tools to ...
#' Simultaneous Confidence Intervals for Multinomial Proportions
#'
#' Methods for obtaining simultaneous confidence intervals for multinomial proportions have
#' been proposed by many authors and the present study include a variety of widely
#' applicable procedures. Seven classical methods (Wilson, Quesenberry and Hurst, Goodman,
#' Wald with and without continuity correction, Fitzpatrick and Scott, Sison and Glaz)
#' and Bayesian Dirichlet models are included in the package. The advantage of MCMC pack
#' has been exploited to derive the Dirichlet posterior directly and this also helps in
#' handling the Dirichlet prior parameters. This package is prepared to have equal and
#' unequal values for the Dirichlet prior distribution that will provide better scope for
#' data analysis and associated sensitivity analysis.
#'
#' @name scimple
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import purrr
#' @author Dr M.Subbiah [primary], Bob Rudis (bob@@rud.is) [tidy version]
#' @import tibble stats MCMCpack
#' @importFrom dplyr mutate select
#' @importFrom purrr map map_df
NULL

0 comments on commit 380b69d

Please sign in to comment.