Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
31 changed files
with
1,088 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ | ||
^README\.Rmd$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,26 @@ | ||
Package: scimple | ||
Title: scimple TITLE | ||
Title: Tidy Simultaneous Confidence Intervals for Multinomial Proportions | ||
Version: 0.1.0 | ||
Encoding: UTF-8 | ||
Authors@R: c(person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is")) | ||
Authors@R: c( | ||
person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is"), | ||
person("M", "Subbiah", role = c("aut"), comment = c("Original package author")) | ||
) | ||
Maintainer: Bob Rudis <bob@rud.is> | ||
Description: scimple DESCRIPTION | ||
Depends: R (>= 3.2.0) | ||
License: AGPL | ||
Description: Methods for obtaining simultaneous confidence intervals for multinomial | ||
proportions have been proposed by many authors and the present study include a variety | ||
of widely applicable procedures. Seven classical methods (Wilson, Quesenberry and | ||
Hurst, Goodman, Wald with and without continuity correction, Fitzpatrick and Scott, | ||
Sison and Glaz) and Bayesian Dirichlet models are included in the package. The | ||
advantage of MCMC pack has been exploited to derive the Dirichlet posterior directly | ||
and this also helps in handling the Dirichlet prior parameters. This package is | ||
prepared to have equal and unequal values for the Dirichlet prior distribution that | ||
will provide better scope for data analysis and associated sensitivity analysis. | ||
Depends: R (>= 3.2.0), MCMCpack | ||
License: GPL-2 | ||
URL: https://github.com/hrbrmstr/scimple | ||
BugReports: https://github.com/hrbrmstr/scimple/issues | ||
LazyData: true | ||
Suggests: testthat | ||
Imports: purrr | ||
Imports: dplyr, tibble, stats, purrr | ||
RoxygenNote: 6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(scimp_bmde) | ||
export(scimp_bmdu) | ||
export(scimp_fs) | ||
export(scimp_goodman) | ||
export(scimp_qh) | ||
export(scimp_sg) | ||
export(scimp_wald) | ||
export(scimp_waldcc) | ||
export(scimp_wilson) | ||
export(scimple_ci) | ||
import(MCMCpack) | ||
import(stats) | ||
import(tibble) | ||
importFrom(dplyr,mutate) | ||
importFrom(dplyr,select) | ||
importFrom(purrr,map) | ||
importFrom(purrr,map_df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
SG <- function(x,alpha) { | ||
t1=proc.time() | ||
|
||
sgp=function(c) { | ||
s=sum(x) ##SUM(Cell_Counts) | ||
k=length(x) | ||
b= x-c | ||
a= x+c | ||
###FINDING FACTORIAL MOMENTS-TRUNCATED POISSON | ||
fm1=0 | ||
fm2=0 | ||
fm3=0 | ||
fm4=0 | ||
for (i in 1:k) | ||
{ | ||
fm1[i]=x[i]*(ppois(a[i]-1,x[i])-ppois(b[i]-2,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i])) | ||
fm2[i]=x[i]^2*(ppois(a[i]-2,x[i])-ppois(b[i]-3,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i])) | ||
fm3[i]=x[i]^3*(ppois(a[i]-3,x[i])-ppois(b[i]-4,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i])) | ||
fm4[i]=x[i]^4*(ppois(a[i]-4,x[i])-ppois(b[i]-5,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i])) | ||
} | ||
|
||
##FINDING CENTRAL MOMENTS-TRUNCATED POISSON | ||
m1=0 | ||
m2=0 | ||
m3=0 | ||
m4=0 | ||
m4t=0 | ||
for (i in 1:k) | ||
{ | ||
m1[i]=fm1[i] | ||
m2[i]=fm2[i]+fm1[i]-(fm1[i]*fm1[i]) | ||
m3[i]=fm3[i]+fm2[i]*(3-(3*fm1[i]))+(fm1[i]-(3*fm1[i]*fm1[i])+(2*fm1[i]^3)) | ||
m4[i]=fm4[i]+fm3[i]*(6-(4*fm1[i]))+fm2[i]*(7-(12*fm1[i])+(6*fm1[i]^2))+fm1[i]-(4*fm1[i]^2)+(6*fm1[i]^3)-(3*fm1[i]^4) | ||
m4t[i] = m4[i]-(3*m2[i]^2)#Temporary Variable for next step | ||
} | ||
s1=sum(m1) | ||
s2=sum(m2) | ||
s3=sum(m3) | ||
s4=sum(m4t) | ||
##FINDING GAMMAS ---> EDGEWORTH EXPANSION | ||
g1=s3/(s2^(3/2)) | ||
g2=s4/(s2^2) | ||
|
||
##FINDING CHEBYSHEV-HERMITE POLYNOMIALS ---> EDGEWORTH EXPANSION | ||
z=(s-s1)/sqrt(s2) | ||
z2=z^2 | ||
z3=z^3 | ||
z4=z^4 | ||
z6=z^6 | ||
poly=1+g1*(z3-(3*z))/6+g2*(z4-(6*z2)+3)/24+(g1^2)*(z6-(15*z4)+(45*z2)-15)/72 | ||
f=poly*exp(-z2/2)/sqrt(2*pi) | ||
|
||
##FINDING PROBABILITY FUNCTION BASED ON 'c' | ||
pc=0 | ||
for (i in 1:k) pc[i] <- ppois(a[i],x[i])-ppois(b[i]-1,x[i]) | ||
pcp = prod(pc) #PRODUCT OF pc THAT HAS k ELEMENTS | ||
pps = 1/dpois(s,s)#POISSON PROB FOR s WITH PARAMETER AS s | ||
rp=pps*pcp*f/sqrt(s2)##REQUIRED PROBABILITY | ||
rp | ||
} | ||
proc.time()-t1 | ||
t=proc.time() | ||
y=0 | ||
s=sum(x) | ||
M1=1 | ||
M2=s | ||
c=M1:M2 | ||
M=length(c) | ||
|
||
for (i in 1:M) y[i] <- round(sgp(c[i]), 4) | ||
|
||
j=1 | ||
vc=0 | ||
while(j<=M){ | ||
if (y[j]<1-alpha && 1-alpha < y[j+1]) | ||
vc=j else | ||
vc=vc | ||
j = j+1 | ||
} | ||
|
||
# vc##REQUIRED VALUE OF C | ||
delta <- ((1-alpha)-y[vc])/(y[vc+1]-y[vc]) | ||
|
||
##FINDING LIMITS | ||
sp <- x/s#SAMPLE PROPORTION | ||
LL <- round(sp-(vc/s),4)#LOWER LIMIT | ||
UL <- round(sp+(vc/s)+(2*delta/s),4)#UPPER LIMIT | ||
LLA <- ULA <- 0 | ||
for (r in 1:length(x)) { | ||
if ( LL [r]< 0) LLA[r] = 0 else LLA[r]=LL[r] | ||
if (UL[r] > 1) ULA[r] = 1 else ULA[r]=UL[r] | ||
} | ||
diA <- ULA-LLA##FIND LENGTH OF CIs | ||
VOL <- round(prod(diA),8)##PRODUCT OF LENGTH OF CIs | ||
|
||
data_frame( | ||
method = "sg", | ||
lower_limit = LL, | ||
upper_limit = UL, | ||
adj_ll = LLA, | ||
adj_ul = ULA, | ||
volume = VOL | ||
) | ||
|
||
} | ||
|
||
|
||
BMDU <- function(x, d, seed=1492) { | ||
|
||
set.seed(seed) | ||
|
||
k <- length(x) | ||
|
||
for(m in 1:k) { | ||
if(x[m]<0) { warning('Arguments must be non-negative integers') } | ||
} | ||
|
||
if(d>=1 && d<=k) { | ||
m=0 | ||
l=0 | ||
u=0 | ||
diff=0 | ||
s=sum(x) | ||
s1=floor(k/d) | ||
d1=runif(s1,0,1)###First half of the vector | ||
d2=runif(k-s1,1,2)###Second half of the vector | ||
a=c(d1,d2) | ||
p=x+a###Prior for Dirichlet | ||
dr=rdirichlet(10000, p)###Posterior | ||
for(j in 1:k) { | ||
l[j]=round(quantile(dr[,j],0.025),4)###Lower Limit | ||
u[j]=round(quantile(dr[,j],0.975),4)###Upper Limit | ||
m[j]=round(mean(dr[,j]),4)###Point Estimate | ||
diff[j]=u[j]-l[j] | ||
} | ||
|
||
data_frame( | ||
method = "bmdu", | ||
lower_limit = l, | ||
upper_limit = m, | ||
volume = prod(diff), | ||
mean = m | ||
) | ||
|
||
} else { | ||
warning('Size of the division should be less than the size of the input matrix') | ||
data_frame( | ||
method = "bmdu", | ||
lower_limit = l, | ||
upper_limit = m, | ||
volume = prod(diff), | ||
mean = m | ||
) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#' Bayesian Multinomial Dirichlet Model (Equal Prior) | ||
#' | ||
#' This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups. | ||
#' | ||
#' @md | ||
#' @param x cell counts of given contingency table corresponding to a categorical data - non negative integers | ||
#' @param p equal value for the Dirichlet prior parameter - positive real number | ||
#' @param seed random seed for reproducible results | ||
#' @return `tibble` with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean | ||
#' @author Dr M Subbiah | ||
#' @references Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London. | ||
#' @export | ||
#' @examples | ||
#' y <- c(44, 55, 43, 32, 67, 78) | ||
#' z <- 1 | ||
#' scimp_bmde(y, z) | ||
scimp_bmde <- function(x, p, seed=1492) { | ||
|
||
k <- length(x) | ||
n_r <- 10000 | ||
for(m in 1:k) { | ||
if(x[m]<0) {warning('Arguments must be non-negative integers') } | ||
} | ||
|
||
set.seed(seed) | ||
|
||
po <- x+p | ||
dr <- rdirichlet(n_r,po) | ||
a <- l <- u <- dif <- 0 | ||
|
||
for(j in 1:k) { | ||
a[j] <- round(mean(dr[,j]), 4) | ||
l[j] <- round(quantile(dr[,j], 0.025),4) | ||
u[j] <- round(quantile(dr[,j], 0.975),4) | ||
dif[j] <- u[j] - l[j] | ||
} | ||
|
||
data_frame( | ||
method = "bmde", | ||
lower_limit = l, | ||
upper_limit = u, | ||
volume = prod(dif), | ||
mean = a | ||
) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#' Bayesian Multinomial Dirichlet Model (Unequal Prior) | ||
#' | ||
#' This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups. | ||
#' | ||
#' @md | ||
#' @param x cell counts of given contingency table corresponding to a categorical data - non negative integers | ||
#' @param d number of divisions required to split the prior vector of Dirichlet distribution to assign unequal values from U(0,1) and U(1,2) | ||
#' @param seed random seed for reproducible results | ||
#' @return `tibble` with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean | ||
#' @author Dr M Subbiah | ||
#' @references Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London. | ||
#' @export | ||
#' @examples | ||
#' y <- c(44, 55, 43, 32, 67, 78) | ||
#' z <- 2 | ||
#' scimp_bmdu(y, z) | ||
scimp_bmdu <- function(x, d, seed=1492) { | ||
return(BMDU(x, d, seed)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#' Fitzpatrick and Scott Confidence Interval | ||
#' | ||
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Fitzpatrick and Scott (1987) | ||
#' | ||
#' @md | ||
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data | ||
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution | ||
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals | ||
#' @author Dr M Subbiah | ||
#' @references Fitzpatrick, S. and Scott, A. (1987). Quick simultaneous confidence interval for multinomial proportions. Journal of American Statistical Association 82(399): 875-878. | ||
#' @export | ||
#' @examples | ||
#' y <- c(44, 55, 43, 32, 67, 78) | ||
#' z <- 0.05 | ||
#' scimp_fs(y, z) | ||
scimp_fs <- function(inpmat, alpha) { | ||
|
||
k <- length(inpmat) | ||
s <- sum(inpmat) | ||
zval <- abs(qnorm(1 - (alpha/2))) | ||
pi <- inpmat/s | ||
|
||
fs_ll <- pi - (zval / (2 * sqrt(s))) | ||
fs_ul <- pi + (zval / (2 * sqrt(s))) | ||
|
||
adj_ll <- adj_ul <- 0 | ||
|
||
for (r in 1:length(inpmat)) { | ||
if (fs_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- fs_ll[r] | ||
if (fs_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- fs_ul[r] | ||
} | ||
|
||
ci_length <- adj_ul - adj_ll | ||
volume <- round(prod(ci_length), 8) | ||
|
||
data_frame( | ||
method = "fs", | ||
lower_limit = fs_ll, | ||
upper_limit = fs_ul, | ||
adj_ll = adj_ll, | ||
adj_ul = adj_ul, | ||
volume = volume | ||
) -> ret | ||
|
||
ret | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#' Goodman Confidence Interval | ||
#' | ||
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Goodman (1965) | ||
#' | ||
#' @md | ||
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data | ||
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution | ||
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals | ||
#' @author Dr M Subbiah | ||
#' @references Goodman, L.A. (1965). On Simultaneous Confidence Intervals for Multinomial Proportions. Technometrics 7: 247-254. | ||
#' @export | ||
#' @examples | ||
#' y <- c(44, 55, 43, 32, 67, 78) | ||
#' z <- 0.05 | ||
#' scimp_goodman(y, z) | ||
scimp_goodman <- function(inpmat, alpha) { | ||
|
||
k <- length(inpmat) | ||
s <- sum(inpmat) | ||
chi <- qchisq(1 - (alpha/k), df = 1) | ||
pi <- inpmat/s | ||
|
||
goodman_ul <- (chi + 2*inpmat + sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s)) | ||
goodman_ll <- (chi + 2*inpmat - sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s)) | ||
|
||
adj_ll <- adj_ul <- 0 | ||
|
||
for (r in 1:length(inpmat)) { | ||
if (goodman_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- goodman_ll[r] | ||
if (goodman_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- goodman_ul[r] | ||
} | ||
|
||
ci_length <- adj_ul - adj_ll | ||
volume <- round(prod(ci_length), 8) | ||
|
||
data_frame( | ||
method = "goodman", | ||
lower_limit = goodman_ll, | ||
upper_limit = goodman_ul, | ||
adj_ll = adj_ll, | ||
adj_ul = adj_ul, | ||
volume = volume | ||
) -> ret | ||
|
||
ret | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,19 @@ | ||
#' Tools to ... | ||
#' Simultaneous Confidence Intervals for Multinomial Proportions | ||
#' | ||
#' Methods for obtaining simultaneous confidence intervals for multinomial proportions have | ||
#' been proposed by many authors and the present study include a variety of widely | ||
#' applicable procedures. Seven classical methods (Wilson, Quesenberry and Hurst, Goodman, | ||
#' Wald with and without continuity correction, Fitzpatrick and Scott, Sison and Glaz) | ||
#' and Bayesian Dirichlet models are included in the package. The advantage of MCMC pack | ||
#' has been exploited to derive the Dirichlet posterior directly and this also helps in | ||
#' handling the Dirichlet prior parameters. This package is prepared to have equal and | ||
#' unequal values for the Dirichlet prior distribution that will provide better scope for | ||
#' data analysis and associated sensitivity analysis. | ||
#' | ||
#' @name scimple | ||
#' @docType package | ||
#' @author Bob Rudis (bob@@rud.is) | ||
#' @import purrr | ||
#' @author Dr M.Subbiah [primary], Bob Rudis (bob@@rud.is) [tidy version] | ||
#' @import tibble stats MCMCpack | ||
#' @importFrom dplyr mutate select | ||
#' @importFrom purrr map map_df | ||
NULL |
Oops, something went wrong.