version 1.2.4

cran · Jan 13, 2016 · 8185a6a · 8185a6a
1 parent 25d3752
commit 8185a6a
Show file tree

Hide file tree

Showing 20 changed files with 788 additions and 515 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,15 @@
 Package: StatMatch
 Type: Package
 Title: Statistical Matching
-Version: 1.2.3
-Date: 2015-01-28
+Version: 1.2.4
+Date: 2016-01-13
 Author: Marcello D'Orazio
 Maintainer: Marcello D'Orazio <madorazi@istat.it>
 Depends: R (>= 2.7.0), proxy, clue, survey, RANN, lpSolve
 Suggests: MASS, Hmisc
 Description: Integration of two data sources referred to the same target population which share a number of common variables (aka data fusion). Some functions can also be used to impute missing values in data sets through hot deck imputation methods. Methods to perform statistical matching when dealing  with data from complex sample surveys are available too.
 License: GPL (>= 2)
-Packaged: 2015-01-29 16:23:10 UTC; UTENTE
 NeedsCompilation: no
+Packaged: 2016-01-13 09:03:21 UTC; UTENTE
 Repository: CRAN
-Date/Publication: 2015-01-29 18:07:42
+Date/Publication: 2016-01-13 13:54:07
diff --git a/MD5 b/MD5
@@ -1,11 +1,11 @@
-4dc0c84c4be386fff3414f7dcc721d41 *DESCRIPTION
-6b8eae1e7729573247b5561869415ec7 *NAMESPACE
-ba233fd587cb6cba94d4865c6ab0f4ac *NEWS
-bf898fdafb7c85897c400a9c103d1e99 *R/Fbwidths.by.x.R
-5a25d8b618de1e94d3b2604a6b2fa18e *R/Frechet.bounds.cat.R
+a79f162060899fd4960baa31d562ecd4 *DESCRIPTION
+3b52596f6a8f3929b305b879271ed878 *NAMESPACE
+bba93e7b0b2d970acdf0f037900d48f5 *NEWS
+62bf417fe076625a0679e0c4940d9625 *R/Fbwidths.by.x.R
+fdd76b66e9d4267ad479ea33882cc584 *R/Frechet.bounds.cat.2.R
 e75519228cef04aaae11e563c9c91675 *R/NND.hotdeck.R
-87f2a5149df358fdca93ba79cf9153c0 *R/RANDwNND.hotdeck.r
-053f440cc50632e1d1338429423124d2 *R/comb.samples.R
+87f2a5149df358fdca93ba79cf9153c0 *R/RANDwNND.hotdeck.R
+39b227871e1960df068a5e3539091e5b *R/comb.samples.R
 1e26a60b90775c9598037cddb8bd7dbf *R/comp.prop.R
 9396068abb28c862c4574879b36b1845 *R/create.fused.R
 f1b1eeff97c344cba58146a9e264b17e *R/fact2dummy.R
@@ -14,21 +14,22 @@ f1b1eeff97c344cba58146a9e264b17e *R/fact2dummy.R
 67588f85e4361c767744277e9a89c633 *R/mahalanobis.dist.R
 01ad43b31e3f870348a8ec9e6be7a607 *R/maximum.dist.R
 c5c6f4bff82a516ed5c55dded9cc4489 *R/mixed.mtc.R
+e67cbe52ae4305e1ea873fd6c7e0282a *R/pBayes.R
 35475ec952695f9021ac36f58c30c69e *R/pw.assoc.R
 c7fadea54abb8511cf9c17c97c9d8362 *R/rankNND_hotdeck.R
-fc36cfa1f8d8fd953c24427f60eebac9 *build/vignette.rds
+afe65c65bee50c4d98f942a115c8792b *build/vignette.rds
 e324fe9c04c91f84845bc71d9bddc05e *data/samp.A.rda
 5e4f59e4955cab46754444b8c07ad227 *data/samp.B.rda
 b643d3bc6da0098b5080972f85e65555 *data/samp.C.rda
-e3fb80e4c15929fdc60c1f140c9df895 *inst/doc/Statistical_Matching_with_StatMatch.R
-f9bff2e4d5a00bd0f5d8c8cb1d9e58cc *inst/doc/Statistical_Matching_with_StatMatch.Rnw
-7d42f5878cb4f258ff5e64d57494541f *inst/doc/Statistical_Matching_with_StatMatch.pdf
-e5478d8aee547e524fcd96e939622107 *man/Fbwidths.by.x.Rd
-33fb9f3b482f5fd3f1284aebe3e173f0 *man/Frechet.bounds.cat.Rd
+3e9d59eb953aa8f273fc24675a24d43a *inst/doc/Statistical_Matching_with_StatMatch.R
+d86d64b343788ae7a39106f919d2e614 *inst/doc/Statistical_Matching_with_StatMatch.Rnw
+7995bad2df674da03265e34b0bdbfc3f *inst/doc/Statistical_Matching_with_StatMatch.pdf
+1abafa3ba2fd2f7041054089e92df8ca *man/Fbwidths.by.x.Rd
+249b4447f2fbcd9ef7ac975f0f73babe *man/Frechet.bounds.cat.Rd
 b3c7e4c79cda13bbd50c9aeb4e844313 *man/NND.hotdeck.Rd
 ea5d75d4ad15ff04296ab26f46ad5c63 *man/RANDwNND.hotdeck.Rd
 f458cf6f291bdc9e7684e5b7adf3b87f *man/StatMatch-package.Rd
-4dd29d7330276d7b5f1fa6876ba61688 *man/comb.samples.Rd
+8a962b6dc8bdc991327c94001e8b1e0a *man/comb.samples.Rd
 d4e409e6af99a533b9ab07103d3ba1f7 *man/comp.prop.Rd
 c65f897e1e12c0cb580bce8b6ccbeac2 *man/create.fused.Rd
 4edfaac6036bc02732ec423381f1f97c *man/fact2dummy.Rd
@@ -37,9 +38,10 @@ fc02cd12885ba2fd96e3694c06032cd8 *man/harmonize.x.Rd
 034fe85bbbb0151b0794dab5510abef9 *man/mahalanobis.dist.Rd
 a963eb0f68b38f877157a8176a4931c6 *man/maximum.dist.Rd
 4d98cb46543a87e9f0b41283486bf34f *man/mixed.mtc.Rd
-3bc0329afe67f18213380eee75e566e3 *man/pw.assoc.Rd
+7b00f9760dcd5759c8d00c7682dccba6 *man/pBayes.Rd
+ff23c6880899ea4f08b68bf7cf8b8884 *man/pw.assoc.Rd
 578253c8d5832bb53f37124b6ac70c6c *man/rankNND.hotdeck.Rd
 c5b937de4c9e01611c17ca77bc1d080e *man/samp.A.Rd
 a5ab43011b41de0702ed820aadde665e *man/samp.B.Rd
 7138246146d10e37905fa723911005a7 *man/samp.C.Rd
-f9bff2e4d5a00bd0f5d8c8cb1d9e58cc *vignettes/Statistical_Matching_with_StatMatch.Rnw
+d86d64b343788ae7a39106f919d2e614 *vignettes/Statistical_Matching_with_StatMatch.Rnw
diff --git a/NAMESPACE b/NAMESPACE
@@ -12,3 +12,8 @@ import(
   RANN,
   lpSolve
 )
+ importFrom("stats", "as.formula", "chisq.test", "coefficients", "cor",
+     "cov2cor", "lm", "mahalanobis", "model.frame",
+     "model.matrix", "qchisq", "residuals", "rnorm", "runif",
+     "sd", "var", "weights", "xtabs", "ftable", "loglin")
+ importFrom("utils", "combn")
diff --git a/NEWS b/NEWS
@@ -1,3 +1,11 @@
+1.2.4 added the new function pBayes for applying pseudo-Bayes estimator to sparse contingency tables
+
+		modified comb.samples to handle a continuous target variable (Y or Z)
+
+		Faster versions of Frechet.bound.cat and Fbwidths.by.x.
+
+		Fbwidths.by.x now provides a richer output. 
+
 1.2.3 corrected a bug in RANDwNND.hotdeck. Thanks to Kirill Muller
 
 1.2.2 added 3 data sets used in the function's help pages and in the vignette

diff --git a/R/Fbwidths.by.x.R b/R/Fbwidths.by.x.R
@@ -1,75 +1,114 @@
-'Fbwidths.by.x' <-
-function(tab.x, tab.xy, tab.xz)
+Fbwidths.by.x <-
+function (tab.x, tab.xy, tab.xz, compress.sum=FALSE) 
 {
+    N <- sum(tab.xy) + sum(tab.xz)
     prop.x <- prop.table(tab.x)
     prop.xy <- prop.table(tab.xy)
     prop.xz <- prop.table(tab.xz)
-
+    
     lab.x <- names(dimnames(tab.x))
-    if(all(nchar(lab.x)==0)) lab.x <- paste("x",1:length(lab.x), sep="")
+    if (all(nchar(lab.x) == 0)) 
+        lab.x <- paste("x", 1:length(lab.x), sep = "")
     names(attr(tab.x, "dimnames")) <- lab.x
-
+    
     lab.xy <- names(dimnames(tab.xy))
-    if(all(nchar(lab.xy)==0)) lab.xy <- c(lab.x, "y")
+    if (all(nchar(lab.xy) == 0)) 
+        lab.xy <- c(lab.x, "y")
     names(attr(tab.xy, "dimnames")) <- lab.xy
+
     lab.y <- setdiff(lab.xy, lab.x)
     p.y <- match(lab.y, lab.xy)
-
+    
     lab.xz <- names(dimnames(tab.xz))
-    if(all(nchar(lab.xz)==0)) lab.xz <- c(lab.x, "z")
+    if (all(nchar(lab.xz) == 0)) 
+        lab.xz <- c(lab.x, "z")
     names(attr(tab.xz, "dimnames")) <- lab.xz
+
     lab.z <- setdiff(lab.xz, lab.x)
     p.z <- match(lab.z, lab.xz)
-
-##
+#    
     n.x <- length(lab.x)
     appo.var <- as.list(lab.x)
-    for(k in 2:n.x){
+    for (k in 2:n.x) {
         b <- combn(lab.x, k)
-        b <- data.frame(b, stringsAsFactors=FALSE)
+        b <- data.frame(b, stringsAsFactors = FALSE)
         appo.var <- c(appo.var, as.list(b))
     }
-
+    
     H <- length(appo.var)
     out.rng <- as.list(as.numeric(H))
-#    av.rng <- matrix(NA, H, 4)
-    av.rng <- matrix(NA, H, 3)
-
-    for(h in 1:H){
+    av.rng <- matrix(NA, H, 8)
+#    av.rng <- matrix(NA, H, 9)
+#    all.H <- matrix(NA, H, 5)
+#    all.U <- matrix(NA, H, 2)
+
+    for (h in 1:H) {
         lab <- appo.var[[h]]
+
         p.x <- match(lab, lab.x)
+
         xx <- margin.table(prop.x, p.x)
-        av.rng[h,1] <- length(xx)
-        av.rng[h,2] <- sum(xx==0)
-
-        p.xy <- match(c(lab,lab.y), lab.xy)
+        av.rng[h, 1] <- length(xx)
+        av.rng[h, 2] <- sum(xx == 0)
+        p.xy <- match(c(lab, lab.y), lab.xy)
         xy <- margin.table(prop.xy, p.xy)
-
+        av.rng[h, 3] <- length(xy)
+        av.rng[h, 4] <- sum(xy == 0)
         p.xz <- match(c(lab, lab.z), lab.xz)
         xz <- margin.table(prop.xz, p.xz)
+        av.rng[h, 5] <- length(xz)
+        av.rng[h, 6] <- sum(xz == 0)
 
-        fb <- Frechet.bounds.cat(xx, xy, xz, print.f="tables")
+        fb <- Frechet.bounds.cat(xx, xy, xz, print.f = "tables")
         appo <- data.frame(fb$low.cx)
-        out.rng[[h]] <- data.frame(appo[,1:2], lower=c(fb$low.cx), upper=c(fb$up.cx), width=c(fb$up.cx-fb$low.cx))
-        av.rng[h,3] <- mean( c(fb$up.cx-fb$low.cx))
-#        av.rng[h,4] <- fb$uncertainty["overall"]
-
+        out.rng[[h]] <- data.frame(appo[, 1:2], lower = c(fb$low.cx), 
+                                   upper = c(fb$up.cx), width = c(fb$up.cx - fb$low.cx))
+
+        av.rng[h, 7] <- fb$uncertainty[2]
+        av.rng[h, 8] <- fb$uncertainty[2] / fb$uncertainty[1]
+#        av.rng[h, 9] <- fb$uncertainty[3]
+#        all.H[h, ] <- fb$H
+#        all.U[h, ] <- fb$U
     }
-    lab.list <- paste("|", lapply(appo.var, paste, collapse="+"), sep="")
+    lab.list <- paste("|", lapply(appo.var, paste, collapse = "+"), 
+                      sep = "")
     n.vars <- lapply(appo.var, length)
-#    av.rng <- data.frame(x.vars=unlist(n.vars), x.cells=av.rng[,1], x.freq0=av.rng[,2], 
-#                         av.width=av.rng[,3], ov.unc=av.rng[,4])
-    av.rng <- data.frame(x.vars=unlist(n.vars), x.cells=av.rng[,1], x.freq0=av.rng[,2], 
-                          av.width=av.rng[,3])
-row.names(av.rng) <- paste("|", lapply(appo.var, paste, collapse="+"), sep="")
-
+    av.rng <- data.frame(x.vars = unlist(n.vars), 
+                         x.cells = av.rng[, 1], x.freq0 = av.rng[, 2], 
+                         xy.cells = av.rng[, 3], xy.freq0 = av.rng[, 4],
+                         xz.cells = av.rng[, 5], xz.freq0 = av.rng[, 6],
+                         av.width = av.rng[, 7], rel.av.width = av.rng[, 8])
+#                         delta.CMS=av.rng[, 9])
+    row.names(av.rng) <- paste("|", lapply(appo.var, paste, collapse = "*"), 
+                               sep = "")
+    av.rng.0 <- c(x.vars=0, x.cells=NA, x.freq0=NA, 
+                  xy.cells = NA, xy.freq0 = NA,
+                  xz.cells = NA, xz.freq0 = NA,
+                  av.width = fb$uncertainty[1], rel.av.width = 1)
+    av.rng <- rbind(unconditioned=av.rng.0, av.rng)
+#    colnames(all.H) <- names(fb$H)
+#    colnames(all.U) <- names(fb$U)
+#    row.names(all.H) <- rownames(all.U) <- paste("|", lapply(appo.var, paste, collapse = "+"), sep = "")
     aa <- n.x - av.rng$x.vars
-    ord.lab <- order(aa, av.rng$av.width, decreasing=TRUE)
-#    ord.all <- order(aa, av.rng$ov.unc, decreasing=TRUE)
-
-    out.rng[[(H+1)]] <- av.rng[ord.lab,]
-#    out.rng[[(H+2)]] <- av.rng[ord.all,]
-
+    ord.lab <- order(aa, av.rng$av.width, decreasing = TRUE)
+    av.rng <- av.rng[ord.lab, ]
+    if(compress.sum){
+        sp.av <- split(av.rng, av.rng$x.vars)
+        G <- length(sp.av)
+        sp.new <- as.list(G)
+        sp.new[[1]] <- sp.av[[1]]
+        sp.new[[2]] <- sp.av[[2]]
+        for(g in 3:G){
+            min.p <- min(sp.av[[(g-1)]][,"av.width"])
+            tst <- sp.av[[g]][,"av.width"] <= min.p
+            sp.new[[g]] <- sp.av[[g]][tst,]
+        }
+        av.rng <- do.call("rbind", sp.new)
+    }
+    out.rng[[(H + 1)]] <- av.rng
     names(out.rng) <- c(lab.list, "sum.unc")
+#    out.rng$all.H <- all.H[ord.lab,]
+#    out.rng$all.U <- all.U[ord.lab,]
+
     out.rng
-}
+}