Skip to content

Commit

Permalink
version 1.2.4
Browse files Browse the repository at this point in the history
  • Loading branch information
marcellodo authored and cran-robot committed Jan 13, 2016
1 parent 25d3752 commit 8185a6a
Show file tree
Hide file tree
Showing 20 changed files with 788 additions and 515 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
@@ -1,15 +1,15 @@
Package: StatMatch
Type: Package
Title: Statistical Matching
Version: 1.2.3
Date: 2015-01-28
Version: 1.2.4
Date: 2016-01-13
Author: Marcello D'Orazio
Maintainer: Marcello D'Orazio <madorazi@istat.it>
Depends: R (>= 2.7.0), proxy, clue, survey, RANN, lpSolve
Suggests: MASS, Hmisc
Description: Integration of two data sources referred to the same target population which share a number of common variables (aka data fusion). Some functions can also be used to impute missing values in data sets through hot deck imputation methods. Methods to perform statistical matching when dealing with data from complex sample surveys are available too.
License: GPL (>= 2)
Packaged: 2015-01-29 16:23:10 UTC; UTENTE
NeedsCompilation: no
Packaged: 2016-01-13 09:03:21 UTC; UTENTE
Repository: CRAN
Date/Publication: 2015-01-29 18:07:42
Date/Publication: 2016-01-13 13:54:07
34 changes: 18 additions & 16 deletions MD5
@@ -1,11 +1,11 @@
4dc0c84c4be386fff3414f7dcc721d41 *DESCRIPTION
6b8eae1e7729573247b5561869415ec7 *NAMESPACE
ba233fd587cb6cba94d4865c6ab0f4ac *NEWS
bf898fdafb7c85897c400a9c103d1e99 *R/Fbwidths.by.x.R
5a25d8b618de1e94d3b2604a6b2fa18e *R/Frechet.bounds.cat.R
a79f162060899fd4960baa31d562ecd4 *DESCRIPTION
3b52596f6a8f3929b305b879271ed878 *NAMESPACE
bba93e7b0b2d970acdf0f037900d48f5 *NEWS
62bf417fe076625a0679e0c4940d9625 *R/Fbwidths.by.x.R
fdd76b66e9d4267ad479ea33882cc584 *R/Frechet.bounds.cat.2.R
e75519228cef04aaae11e563c9c91675 *R/NND.hotdeck.R
87f2a5149df358fdca93ba79cf9153c0 *R/RANDwNND.hotdeck.r
053f440cc50632e1d1338429423124d2 *R/comb.samples.R
87f2a5149df358fdca93ba79cf9153c0 *R/RANDwNND.hotdeck.R
39b227871e1960df068a5e3539091e5b *R/comb.samples.R
1e26a60b90775c9598037cddb8bd7dbf *R/comp.prop.R
9396068abb28c862c4574879b36b1845 *R/create.fused.R
f1b1eeff97c344cba58146a9e264b17e *R/fact2dummy.R
Expand All @@ -14,21 +14,22 @@ f1b1eeff97c344cba58146a9e264b17e *R/fact2dummy.R
67588f85e4361c767744277e9a89c633 *R/mahalanobis.dist.R
01ad43b31e3f870348a8ec9e6be7a607 *R/maximum.dist.R
c5c6f4bff82a516ed5c55dded9cc4489 *R/mixed.mtc.R
e67cbe52ae4305e1ea873fd6c7e0282a *R/pBayes.R
35475ec952695f9021ac36f58c30c69e *R/pw.assoc.R
c7fadea54abb8511cf9c17c97c9d8362 *R/rankNND_hotdeck.R
fc36cfa1f8d8fd953c24427f60eebac9 *build/vignette.rds
afe65c65bee50c4d98f942a115c8792b *build/vignette.rds
e324fe9c04c91f84845bc71d9bddc05e *data/samp.A.rda
5e4f59e4955cab46754444b8c07ad227 *data/samp.B.rda
b643d3bc6da0098b5080972f85e65555 *data/samp.C.rda
e3fb80e4c15929fdc60c1f140c9df895 *inst/doc/Statistical_Matching_with_StatMatch.R
f9bff2e4d5a00bd0f5d8c8cb1d9e58cc *inst/doc/Statistical_Matching_with_StatMatch.Rnw
7d42f5878cb4f258ff5e64d57494541f *inst/doc/Statistical_Matching_with_StatMatch.pdf
e5478d8aee547e524fcd96e939622107 *man/Fbwidths.by.x.Rd
33fb9f3b482f5fd3f1284aebe3e173f0 *man/Frechet.bounds.cat.Rd
3e9d59eb953aa8f273fc24675a24d43a *inst/doc/Statistical_Matching_with_StatMatch.R
d86d64b343788ae7a39106f919d2e614 *inst/doc/Statistical_Matching_with_StatMatch.Rnw
7995bad2df674da03265e34b0bdbfc3f *inst/doc/Statistical_Matching_with_StatMatch.pdf
1abafa3ba2fd2f7041054089e92df8ca *man/Fbwidths.by.x.Rd
249b4447f2fbcd9ef7ac975f0f73babe *man/Frechet.bounds.cat.Rd
b3c7e4c79cda13bbd50c9aeb4e844313 *man/NND.hotdeck.Rd
ea5d75d4ad15ff04296ab26f46ad5c63 *man/RANDwNND.hotdeck.Rd
f458cf6f291bdc9e7684e5b7adf3b87f *man/StatMatch-package.Rd
4dd29d7330276d7b5f1fa6876ba61688 *man/comb.samples.Rd
8a962b6dc8bdc991327c94001e8b1e0a *man/comb.samples.Rd
d4e409e6af99a533b9ab07103d3ba1f7 *man/comp.prop.Rd
c65f897e1e12c0cb580bce8b6ccbeac2 *man/create.fused.Rd
4edfaac6036bc02732ec423381f1f97c *man/fact2dummy.Rd
Expand All @@ -37,9 +38,10 @@ fc02cd12885ba2fd96e3694c06032cd8 *man/harmonize.x.Rd
034fe85bbbb0151b0794dab5510abef9 *man/mahalanobis.dist.Rd
a963eb0f68b38f877157a8176a4931c6 *man/maximum.dist.Rd
4d98cb46543a87e9f0b41283486bf34f *man/mixed.mtc.Rd
3bc0329afe67f18213380eee75e566e3 *man/pw.assoc.Rd
7b00f9760dcd5759c8d00c7682dccba6 *man/pBayes.Rd
ff23c6880899ea4f08b68bf7cf8b8884 *man/pw.assoc.Rd
578253c8d5832bb53f37124b6ac70c6c *man/rankNND.hotdeck.Rd
c5b937de4c9e01611c17ca77bc1d080e *man/samp.A.Rd
a5ab43011b41de0702ed820aadde665e *man/samp.B.Rd
7138246146d10e37905fa723911005a7 *man/samp.C.Rd
f9bff2e4d5a00bd0f5d8c8cb1d9e58cc *vignettes/Statistical_Matching_with_StatMatch.Rnw
d86d64b343788ae7a39106f919d2e614 *vignettes/Statistical_Matching_with_StatMatch.Rnw
5 changes: 5 additions & 0 deletions NAMESPACE
Expand Up @@ -12,3 +12,8 @@ import(
RANN,
lpSolve
)
importFrom("stats", "as.formula", "chisq.test", "coefficients", "cor",
"cov2cor", "lm", "mahalanobis", "model.frame",
"model.matrix", "qchisq", "residuals", "rnorm", "runif",
"sd", "var", "weights", "xtabs", "ftable", "loglin")
importFrom("utils", "combn")
8 changes: 8 additions & 0 deletions NEWS
@@ -1,3 +1,11 @@
1.2.4 added the new function pBayes for applying pseudo-Bayes estimator to sparse contingency tables

modified comb.samples to handle a continuous target variable (Y or Z)

Faster versions of Frechet.bound.cat and Fbwidths.by.x.

Fbwidths.by.x now provides a richer output.

1.2.3 corrected a bug in RANDwNND.hotdeck. Thanks to Kirill Muller

1.2.2 added 3 data sets used in the function's help pages and in the vignette
Expand Down
121 changes: 80 additions & 41 deletions R/Fbwidths.by.x.R
@@ -1,75 +1,114 @@
'Fbwidths.by.x' <-
function(tab.x, tab.xy, tab.xz)
Fbwidths.by.x <-
function (tab.x, tab.xy, tab.xz, compress.sum=FALSE)
{
N <- sum(tab.xy) + sum(tab.xz)
prop.x <- prop.table(tab.x)
prop.xy <- prop.table(tab.xy)
prop.xz <- prop.table(tab.xz)

lab.x <- names(dimnames(tab.x))
if(all(nchar(lab.x)==0)) lab.x <- paste("x",1:length(lab.x), sep="")
if (all(nchar(lab.x) == 0))
lab.x <- paste("x", 1:length(lab.x), sep = "")
names(attr(tab.x, "dimnames")) <- lab.x

lab.xy <- names(dimnames(tab.xy))
if(all(nchar(lab.xy)==0)) lab.xy <- c(lab.x, "y")
if (all(nchar(lab.xy) == 0))
lab.xy <- c(lab.x, "y")
names(attr(tab.xy, "dimnames")) <- lab.xy

lab.y <- setdiff(lab.xy, lab.x)
p.y <- match(lab.y, lab.xy)

lab.xz <- names(dimnames(tab.xz))
if(all(nchar(lab.xz)==0)) lab.xz <- c(lab.x, "z")
if (all(nchar(lab.xz) == 0))
lab.xz <- c(lab.x, "z")
names(attr(tab.xz, "dimnames")) <- lab.xz

lab.z <- setdiff(lab.xz, lab.x)
p.z <- match(lab.z, lab.xz)

##
#
n.x <- length(lab.x)
appo.var <- as.list(lab.x)
for(k in 2:n.x){
for (k in 2:n.x) {
b <- combn(lab.x, k)
b <- data.frame(b, stringsAsFactors=FALSE)
b <- data.frame(b, stringsAsFactors = FALSE)
appo.var <- c(appo.var, as.list(b))
}

H <- length(appo.var)
out.rng <- as.list(as.numeric(H))
# av.rng <- matrix(NA, H, 4)
av.rng <- matrix(NA, H, 3)

for(h in 1:H){
av.rng <- matrix(NA, H, 8)
# av.rng <- matrix(NA, H, 9)
# all.H <- matrix(NA, H, 5)
# all.U <- matrix(NA, H, 2)

for (h in 1:H) {
lab <- appo.var[[h]]

p.x <- match(lab, lab.x)

xx <- margin.table(prop.x, p.x)
av.rng[h,1] <- length(xx)
av.rng[h,2] <- sum(xx==0)

p.xy <- match(c(lab,lab.y), lab.xy)
av.rng[h, 1] <- length(xx)
av.rng[h, 2] <- sum(xx == 0)
p.xy <- match(c(lab, lab.y), lab.xy)
xy <- margin.table(prop.xy, p.xy)

av.rng[h, 3] <- length(xy)
av.rng[h, 4] <- sum(xy == 0)
p.xz <- match(c(lab, lab.z), lab.xz)
xz <- margin.table(prop.xz, p.xz)
av.rng[h, 5] <- length(xz)
av.rng[h, 6] <- sum(xz == 0)

fb <- Frechet.bounds.cat(xx, xy, xz, print.f="tables")
fb <- Frechet.bounds.cat(xx, xy, xz, print.f = "tables")
appo <- data.frame(fb$low.cx)
out.rng[[h]] <- data.frame(appo[,1:2], lower=c(fb$low.cx), upper=c(fb$up.cx), width=c(fb$up.cx-fb$low.cx))
av.rng[h,3] <- mean( c(fb$up.cx-fb$low.cx))
# av.rng[h,4] <- fb$uncertainty["overall"]

out.rng[[h]] <- data.frame(appo[, 1:2], lower = c(fb$low.cx),
upper = c(fb$up.cx), width = c(fb$up.cx - fb$low.cx))

av.rng[h, 7] <- fb$uncertainty[2]
av.rng[h, 8] <- fb$uncertainty[2] / fb$uncertainty[1]
# av.rng[h, 9] <- fb$uncertainty[3]
# all.H[h, ] <- fb$H
# all.U[h, ] <- fb$U
}
lab.list <- paste("|", lapply(appo.var, paste, collapse="+"), sep="")
lab.list <- paste("|", lapply(appo.var, paste, collapse = "+"),
sep = "")
n.vars <- lapply(appo.var, length)
# av.rng <- data.frame(x.vars=unlist(n.vars), x.cells=av.rng[,1], x.freq0=av.rng[,2],
# av.width=av.rng[,3], ov.unc=av.rng[,4])
av.rng <- data.frame(x.vars=unlist(n.vars), x.cells=av.rng[,1], x.freq0=av.rng[,2],
av.width=av.rng[,3])
row.names(av.rng) <- paste("|", lapply(appo.var, paste, collapse="+"), sep="")

av.rng <- data.frame(x.vars = unlist(n.vars),
x.cells = av.rng[, 1], x.freq0 = av.rng[, 2],
xy.cells = av.rng[, 3], xy.freq0 = av.rng[, 4],
xz.cells = av.rng[, 5], xz.freq0 = av.rng[, 6],
av.width = av.rng[, 7], rel.av.width = av.rng[, 8])
# delta.CMS=av.rng[, 9])
row.names(av.rng) <- paste("|", lapply(appo.var, paste, collapse = "*"),
sep = "")
av.rng.0 <- c(x.vars=0, x.cells=NA, x.freq0=NA,
xy.cells = NA, xy.freq0 = NA,
xz.cells = NA, xz.freq0 = NA,
av.width = fb$uncertainty[1], rel.av.width = 1)
av.rng <- rbind(unconditioned=av.rng.0, av.rng)
# colnames(all.H) <- names(fb$H)
# colnames(all.U) <- names(fb$U)
# row.names(all.H) <- rownames(all.U) <- paste("|", lapply(appo.var, paste, collapse = "+"), sep = "")
aa <- n.x - av.rng$x.vars
ord.lab <- order(aa, av.rng$av.width, decreasing=TRUE)
# ord.all <- order(aa, av.rng$ov.unc, decreasing=TRUE)

out.rng[[(H+1)]] <- av.rng[ord.lab,]
# out.rng[[(H+2)]] <- av.rng[ord.all,]

ord.lab <- order(aa, av.rng$av.width, decreasing = TRUE)
av.rng <- av.rng[ord.lab, ]
if(compress.sum){
sp.av <- split(av.rng, av.rng$x.vars)
G <- length(sp.av)
sp.new <- as.list(G)
sp.new[[1]] <- sp.av[[1]]
sp.new[[2]] <- sp.av[[2]]
for(g in 3:G){
min.p <- min(sp.av[[(g-1)]][,"av.width"])
tst <- sp.av[[g]][,"av.width"] <= min.p
sp.new[[g]] <- sp.av[[g]][tst,]
}
av.rng <- do.call("rbind", sp.new)
}
out.rng[[(H + 1)]] <- av.rng
names(out.rng) <- c(lab.list, "sum.unc")
# out.rng$all.H <- all.H[ord.lab,]
# out.rng$all.U <- all.U[ord.lab,]

out.rng
}
}

0 comments on commit 8185a6a

Please sign in to comment.