version 1.2.3

cran · Jan 29, 2015 · 25d3752 · 25d3752
1 parent 4df1bb9
commit 25d3752
Show file tree

Hide file tree

Showing 20 changed files with 114 additions and 119 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,15 @@
 Package: StatMatch
 Type: Package
-Title: Statistical Matching (aka data fusion)
-Version: 1.2.2
-Date: 2014-06-19
+Title: Statistical Matching
+Version: 1.2.3
+Date: 2015-01-28
 Author: Marcello D'Orazio
 Maintainer: Marcello D'Orazio <madorazi@istat.it>
-Depends: R (>= 2.7.0), proxy, clue, survey, RANN
-Suggests: lpSolve, MASS, Hmisc
-Description: The package StatMatch provides some R functions to perform statistical matching (aka data fusion), i.e. the integration of two data sources referred to the same target population which share a number of common variables. Some functions can also be used to impute missing values in data sets through hot deck imputation methods. Methods to perform statistical matching when dealing  with data from complex sample surveys are available too.
+Depends: R (>= 2.7.0), proxy, clue, survey, RANN, lpSolve
+Suggests: MASS, Hmisc
+Description: Integration of two data sources referred to the same target population which share a number of common variables (aka data fusion). Some functions can also be used to impute missing values in data sets through hot deck imputation methods. Methods to perform statistical matching when dealing  with data from complex sample surveys are available too.
 License: GPL (>= 2)
-Packaged: 2014-06-19 06:00:30 UTC; UTENTE
+Packaged: 2015-01-29 16:23:10 UTC; UTENTE
 NeedsCompilation: no
 Repository: CRAN
-Date/Publication: 2014-06-19 08:29:26
+Date/Publication: 2015-01-29 18:07:42
diff --git a/MD5 b/MD5
@@ -1,10 +1,10 @@
-d07a87fe57e9f83177f25ac5eb89607b *DESCRIPTION
-25d05cbb2bb42f3a9a12101e3619edb0 *NAMESPACE
-43cb604202a98d4dc856d5b7c27e616b *NEWS
-a45307ced0376741735ee29a4d721a70 *R/Fbwidths.by.x.R
-c0f2d5c18a9cd47106d0d15371a09d42 *R/Frechet.bounds.cat.R
-a28cd116847c73c09cae8e8cb767fc78 *R/NND.hotdeck.R
-eefb4ce6e1e1ed34609fe264d1c9a859 *R/RANDwNND.hotdeck.r
+4dc0c84c4be386fff3414f7dcc721d41 *DESCRIPTION
+6b8eae1e7729573247b5561869415ec7 *NAMESPACE
+ba233fd587cb6cba94d4865c6ab0f4ac *NEWS
+bf898fdafb7c85897c400a9c103d1e99 *R/Fbwidths.by.x.R
+5a25d8b618de1e94d3b2604a6b2fa18e *R/Frechet.bounds.cat.R
+e75519228cef04aaae11e563c9c91675 *R/NND.hotdeck.R
+87f2a5149df358fdca93ba79cf9153c0 *R/RANDwNND.hotdeck.r
 053f440cc50632e1d1338429423124d2 *R/comb.samples.R
 1e26a60b90775c9598037cddb8bd7dbf *R/comp.prop.R
 9396068abb28c862c4574879b36b1845 *R/create.fused.R
@@ -13,21 +13,21 @@ f1b1eeff97c344cba58146a9e264b17e *R/fact2dummy.R
 5256c8a2a230ab4efddb2704dd24a07d *R/harmonize.x.R
 67588f85e4361c767744277e9a89c633 *R/mahalanobis.dist.R
 01ad43b31e3f870348a8ec9e6be7a607 *R/maximum.dist.R
-efc443f211ce6109ae7388886bea0ec3 *R/mixed.mtc.R
+c5c6f4bff82a516ed5c55dded9cc4489 *R/mixed.mtc.R
 35475ec952695f9021ac36f58c30c69e *R/pw.assoc.R
-5c677843ec0fc7d8d111035e6365cc95 *R/rankNND_hotdeck.R
-b4dc941125b695a27b1704258308b48f *build/vignette.rds
+c7fadea54abb8511cf9c17c97c9d8362 *R/rankNND_hotdeck.R
+fc36cfa1f8d8fd953c24427f60eebac9 *build/vignette.rds
 e324fe9c04c91f84845bc71d9bddc05e *data/samp.A.rda
 5e4f59e4955cab46754444b8c07ad227 *data/samp.B.rda
 b643d3bc6da0098b5080972f85e65555 *data/samp.C.rda
-4054b9a792d09f6ff65840aebe96a9cd *inst/doc/Statistical_Matching_with_StatMatch.R
-07822999b9f0c9e5d607e032fc91568a *inst/doc/Statistical_Matching_with_StatMatch.Rnw
-609a0d70d6c7ef684e69574228c9fba9 *inst/doc/Statistical_Matching_with_StatMatch.pdf
-1075b48f315be1c2907d30085eaf7c85 *man/Fbwidths.by.x.Rd
-d07fb9d5ee14072e0dd5f10c3c54d011 *man/Frechet.bounds.cat.Rd
-0f0337cc5a8231164bbdac331d5c9a4e *man/NND.hotdeck.Rd
+e3fb80e4c15929fdc60c1f140c9df895 *inst/doc/Statistical_Matching_with_StatMatch.R
+f9bff2e4d5a00bd0f5d8c8cb1d9e58cc *inst/doc/Statistical_Matching_with_StatMatch.Rnw
+7d42f5878cb4f258ff5e64d57494541f *inst/doc/Statistical_Matching_with_StatMatch.pdf
+e5478d8aee547e524fcd96e939622107 *man/Fbwidths.by.x.Rd
+33fb9f3b482f5fd3f1284aebe3e173f0 *man/Frechet.bounds.cat.Rd
+b3c7e4c79cda13bbd50c9aeb4e844313 *man/NND.hotdeck.Rd
 ea5d75d4ad15ff04296ab26f46ad5c63 *man/RANDwNND.hotdeck.Rd
-cf558873a185433e12b7d51900b0513e *man/StatMatch-package.Rd
+f458cf6f291bdc9e7684e5b7adf3b87f *man/StatMatch-package.Rd
 4dd29d7330276d7b5f1fa6876ba61688 *man/comb.samples.Rd
 d4e409e6af99a533b9ab07103d3ba1f7 *man/comp.prop.Rd
 c65f897e1e12c0cb580bce8b6ccbeac2 *man/create.fused.Rd
@@ -36,10 +36,10 @@ ff8061a196834378ecc03a5540bb1332 *man/gower.dist.Rd
 fc02cd12885ba2fd96e3694c06032cd8 *man/harmonize.x.Rd
 034fe85bbbb0151b0794dab5510abef9 *man/mahalanobis.dist.Rd
 a963eb0f68b38f877157a8176a4931c6 *man/maximum.dist.Rd
-0483f2bb6635f118ce8a5677973d7bc5 *man/mixed.mtc.Rd
+4d98cb46543a87e9f0b41283486bf34f *man/mixed.mtc.Rd
 3bc0329afe67f18213380eee75e566e3 *man/pw.assoc.Rd
 578253c8d5832bb53f37124b6ac70c6c *man/rankNND.hotdeck.Rd
 c5b937de4c9e01611c17ca77bc1d080e *man/samp.A.Rd
 a5ab43011b41de0702ed820aadde665e *man/samp.B.Rd
 7138246146d10e37905fa723911005a7 *man/samp.C.Rd
-07822999b9f0c9e5d607e032fc91568a *vignettes/Statistical_Matching_with_StatMatch.Rnw
+f9bff2e4d5a00bd0f5d8c8cb1d9e58cc *vignettes/Statistical_Matching_with_StatMatch.Rnw
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,13 +1,14 @@
-# Default NAMESPACE created by R
 # Remove the previous line if you edit this file
 
 # Export all names
 exportPattern(".")
 
 # Import all packages listed as Imports or Depends
+#importFrom("lpSolve", lp.assign, lp.transport)
 import(
   proxy,
   clue,
   survey,
-  RANN
+  RANN,
+  lpSolve
 )
diff --git a/NEWS b/NEWS
@@ -1,3 +1,5 @@
+1.2.3 corrected a bug in RANDwNND.hotdeck. Thanks to Kirill Muller
+
 1.2.2 added 3 data sets used in the function's help pages and in the vignette
 
 		modified the RANDwNND.hotdeck function to identify the subset of the donors by

diff --git a/R/Fbwidths.by.x.R b/R/Fbwidths.by.x.R
@@ -32,7 +32,9 @@ function(tab.x, tab.xy, tab.xz)
 
     H <- length(appo.var)
     out.rng <- as.list(as.numeric(H))
-    av.rng <- matrix(NA, H, 4)
+#    av.rng <- matrix(NA, H, 4)
+    av.rng <- matrix(NA, H, 3)
+
     for(h in 1:H){
         lab <- appo.var[[h]]
         p.x <- match(lab, lab.x)
@@ -50,17 +52,19 @@ function(tab.x, tab.xy, tab.xz)
         appo <- data.frame(fb$low.cx)
         out.rng[[h]] <- data.frame(appo[,1:2], lower=c(fb$low.cx), upper=c(fb$up.cx), width=c(fb$up.cx-fb$low.cx))
         av.rng[h,3] <- mean( c(fb$up.cx-fb$low.cx))
-        av.rng[h,4] <- fb$uncertainty["overall"]
+#        av.rng[h,4] <- fb$uncertainty["overall"]
 
     }
     lab.list <- paste("|", lapply(appo.var, paste, collapse="+"), sep="")
     n.vars <- lapply(appo.var, length)
+#    av.rng <- data.frame(x.vars=unlist(n.vars), x.cells=av.rng[,1], x.freq0=av.rng[,2], 
+#                         av.width=av.rng[,3], ov.unc=av.rng[,4])
     av.rng <- data.frame(x.vars=unlist(n.vars), x.cells=av.rng[,1], x.freq0=av.rng[,2], 
-                         av.width=av.rng[,3], ov.unc=av.rng[,4])
-    row.names(av.rng) <- paste("|", lapply(appo.var, paste, collapse="+"), sep="")
+                          av.width=av.rng[,3])
+row.names(av.rng) <- paste("|", lapply(appo.var, paste, collapse="+"), sep="")
 
     aa <- n.x - av.rng$x.vars
-    ord.lab <- order(aa, av.rng$ov.unc, decreasing=TRUE)
+    ord.lab <- order(aa, av.rng$av.width, decreasing=TRUE)
 #    ord.all <- order(aa, av.rng$ov.unc, decreasing=TRUE)
 
     out.rng[[(H+1)]] <- av.rng[ord.lab,]

diff --git a/R/Frechet.bounds.cat.R b/R/Frechet.bounds.cat.R
@@ -87,7 +87,7 @@ fb.yz <-  function(y, z, prn="tables")
         warning("The marginal distr. of the X variables \n in tab.xy and in tab.xz are not equal")
 
 ########################################################
-# computes Fréchet bounds _without_ using X variables
+# computes Frechet bounds _without_ using X variables
 
     ll <- outer(p.y, p.z, FUN="+") - 1
     m0 <- matrix(0, nrow(ll), ncol(ll))
@@ -99,7 +99,7 @@ fb.yz <-  function(y, z, prn="tables")
     res.0 <- list(low.u=low, up.u=upper)
 
 #############################################
-# computes Fréchet bounds using X variables
+# computes Frechet bounds using X variables
 
     dm.x <- data.frame(p.x)
     sdm.x <- split(dm.x, dm.x[,lab.x])
@@ -114,7 +114,7 @@ fb.yz <-  function(y, z, prn="tables")
     out.CIA <- as.list(numeric(H))
     out.low <- as.list(numeric(H))
     out.up <- as.list(numeric(H))
-    unc <- as.list(numeric(H)) 
+#    unc <- as.list(numeric(H)) 
 
     for(h in 1:H){
         yy <- say.gx[[h]][,"Freq"]
@@ -131,7 +131,7 @@ fb.yz <-  function(y, z, prn="tables")
         uu <- outer(yy, zz, FUN="pmin")
         out.low[[h]] <- ll*xx
         out.up[[h]] <- uu*xx
-        unc[[h]] <- sum((uu-ll) * outer(yy, zz, FUN="*") * xx)
+ #       unc[[h]] <- sum((uu-ll) * outer(yy, zz, FUN="*") * xx)
     }
 
     aa.CIA <- array(unlist(out.CIA), dim=c(dim(out.CIA[[1]]),H) )
@@ -151,7 +151,8 @@ fb.yz <-  function(y, z, prn="tables")
     l.z <- l.z[-p.z]
     class(fine.CIA) <- class(fine.low) <- class(fine.up) <- "table"
     dimnames(fine.CIA) <- dimnames(fine.low) <-  dimnames(fine.up) <- c(l.y, l.z)
-    vet.unc <- c(av.u=mean(c(upper-low)), av.cx=mean(c(fine.up-fine.low)), overall=sum(unlist(unc)))
+#    vet.unc <- c(av.u=mean(c(upper-low)), av.cx=mean(c(fine.up-fine.low)), overall=sum(unlist(unc)))
+    vet.unc <- c(av.u=mean(c(upper-low)), av.cx=mean(c(fine.up-fine.low)))
     res.1 <- list(CIA=fine.CIA, low.cx=fine.low, up.cx=fine.up, uncertainty=vet.unc)
 
     if(print.f=="tables"){

diff --git a/R/NND.hotdeck.R b/R/NND.hotdeck.R
@@ -2,7 +2,7 @@
 function (data.rec, data.don, match.vars, don.class=NULL, dist.fun="Manhattan", constrained=FALSE, constr.alg="Hungarian", keep.t=FALSE, ...)
 {
 #    if(constrained && (constr.alg=="Hungarian" || constr.alg=="hungarian")) require(clue)
-    if(constrained && (constr.alg=="lpSolve" || constr.alg=="lpsolve")) require(lpSolve)
+    #if(constrained && (constr.alg=="lpSolve" || constr.alg=="lpsolve")) require(lpSolve)
 
 	p <- length(match.vars)
 	if(!is.null(dim(data.rec))){

diff --git a/R/RANDwNND.hotdeck.r b/R/RANDwNND.hotdeck.r
@@ -121,7 +121,7 @@ RANDwNND.hd <- function (rec, don, dfun="Manhattan", cut.don="rot", k=NULL, w.do
         if(cut.don=="min") k0 <- 10
         else if (cut.don=="k.dist") stop("When dist.fun='RANN' it is not possible to to set \n cut.don = 'k.dist' ")
         else k0 <- k
-        dd <- nn2(data=x.rec, query=x.don, k=k0, ...)
+        dd <- nn2(data=x.don, query=x.rec, k=k0, ...)
         mdist <- dd$nn.dists
     }
     else if(dfun=="difference" || dfun=="diff"){
@@ -359,7 +359,7 @@ pps.draw <- function(n, w){
                     pos <- pps.draw(n=nn.r[[lab.h]], w=l.don[[lab.h]][,weight.don])
                     don.lab <- l.d.lab[[lab.h]][pos]
                 }
-			      mtc.ids[[h]] <- cbind(rec.id=l.r.lab[[lab.h]], don.id=don.lab)
+			    mtc.ids[[h]] <- cbind(rec.id=l.r.lab[[lab.h]], don.id=don.lab)
     		    sum.dist[[h]] <- NA
 			    noad[[h]] <- rep(nn.d[[lab.h]], nn.r[[lab.h]])
 			}

diff --git a/R/mixed.mtc.R b/R/mixed.mtc.R
@@ -2,7 +2,7 @@
 function (data.rec, data.don, match.vars, y.rec, z.don, method="ML", rho.yz=NULL, micro=FALSE, constr.alg="Hungarian")
 {
  #   if(micro && (constr.alg=="Hungarian" || constr.alg=="hungarian")) require(clue)
-    if(micro && (constr.alg=="lpSolve" || constr.alg=="lpsolve")) require(lpSolve)
+#    if(micro && (constr.alg=="lpSolve" || constr.alg=="lpsolve")) require(lpSolve)
 
 	nA <- nrow(data.rec)
 	nB <- nrow(data.don)

diff --git a/R/rankNND_hotdeck.R b/R/rankNND_hotdeck.R
@@ -11,7 +11,7 @@ function (data.rec, data.don, var.rec, var.don=var.rec, don.class=NULL, weight.r
     }
 
  #   if(constrained && (constr.alg=="Hungarian" || constr.alg=="hungarian")) require(clue)
-    if(constrained && (constr.alg=="lpSolve" || constr.alg=="lpsolve")) require(lpSolve)
+ #   if(constrained && (constr.alg=="lpSolve" || constr.alg=="lpsolve")) require(lpSolve)
 
     if(!is.null(dim(data.rec))){
 		nr <- nrow(data.rec)

diff --git a/build/vignette.rds b/build/vignette.rds