version 0.3.4

cran · Jan 9, 2024 · 0940e2d · 0940e2d
1 parent 56bd0bc
commit 0940e2d
Show file tree

Hide file tree

Showing 7 changed files with 42 additions and 42 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: cdgd
 Title: Causal Decomposition of Group Disparities
-Version: 0.3.3
+Version: 0.3.4
 Authors@R: 
     person("Ang", "Yu", , "ang_yu@outlook.com", role = c("aut", "cre", "cph"),
            comment = c(website = "<https://ang-yu.github.io/>", ORCID = "0000-0002-1828-0165"))
@@ -22,9 +22,9 @@ Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
 NeedsCompilation: no
-Packaged: 2023-12-09 20:01:24 UTC; Ang
+Packaged: 2024-01-06 14:01:48 UTC; Ang
 Author: Ang Yu [aut, cre, cph] (<https://ang-yu.github.io/>,
     <https://orcid.org/0000-0002-1828-0165>)
 Maintainer: Ang Yu <ang_yu@outlook.com>
 Repository: CRAN
-Date/Publication: 2023-12-09 20:10:02 UTC
+Date/Publication: 2024-01-08 14:00:06 UTC
diff --git a/MD5 b/MD5
@@ -1,20 +1,20 @@
-178ef87c23c4040132eae4e196a85281 *DESCRIPTION
+fc4ba4c7b9efcb2fb787a530fb6ddd5b *DESCRIPTION
 d51c59f8ce5117b43778cd95fee34fef *LICENSE
 9fa47bc528b89d6c3da57aea4f3e56f0 *NAMESPACE
-d507f724fd6293c941925b81adc2229c *NEWS.md
+a46c13c7aa4ab7b35c3a5d8a1490ddc3 *NEWS.md
 ef69c0670a99158c51e3ba5d8f1858c9 *R/cdgd0_manual.R
 bfaf6d7d1dae45d7a6c6161ca4c3d52f *R/cdgd0_ml.R
 8da79b3526b03cfeb9cb2d6af0ac366f *R/cdgd0_pa.R
-8d5e1f62da6eb3666a59c69c4685c035 *R/cdgd1_manual.R
-085be5613f86b5f5d710bebf55009452 *R/cdgd1_ml.R
-5a3ff852028be9e222bb9425909a1f82 *R/cdgd1_pa.R
+88ed4ea8f1bb5daf1b2f9324d6667199 *R/cdgd1_manual.R
+c77d76dd291a5d49dd0553b0fa8566d1 *R/cdgd1_ml.R
+500ad430d564a90ab69808ec42604487 *R/cdgd1_pa.R
 d3e698fca05ab4a2e7908d83424c2590 *R/exp_data.R
 75a2ae38290583448144e83d4478959e *README.md
 087c491f1ec5cb0b8e6cb173b276079d *data/exp_data.rda
 43e24517ab758241cd9c3b043df4fb67 *man/cdgd0_manual.Rd
 336980e8b243ffb3cf1d30843af7d5fe *man/cdgd0_ml.Rd
 30d0c0717c1d13d0c64c7e9dda2b67dd *man/cdgd0_pa.Rd
-abe1d6f1200c3b9ff89a0dcb0a66e9c8 *man/cdgd1_manual.Rd
+8532960c2e75c6e893c126f7ea10ea31 *man/cdgd1_manual.Rd
 30a5180ae09c793529699bb985c0b859 *man/cdgd1_ml.Rd
 c8ae5db605881e76964ede4618498cab *man/cdgd1_pa.Rd
 ea74744471a3d3fe351620f3df0d7e6b *man/exp_data.Rd
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+
+# cdgd 0.3.4
+
+* Corrected an error in the example of cdgd1_manual
+
 # cdgd 0.3.3
 
 * Now allows for a binary outcome.

diff --git a/R/cdgd1_manual.R b/R/cdgd1_manual.R
@@ -219,9 +219,9 @@
 #' data[,G] <- as.numeric(data[,G])-1
 #'
 #' GgivenQ.Pred <- rep(NA, nrow(data))
-#' GgivenQ.Pred[sample2] <- stats::predict(DgivenGQ.Model.sample1,
+#' GgivenQ.Pred[sample2] <- stats::predict(GgivenQ.Model.sample1,
 #'     newdata = data[sample2,], type="prob")[,2]
-#' GgivenQ.Pred[sample1] <- stats::predict(DgivenGQ.Model.sample2,
+#' GgivenQ.Pred[sample1] <- stats::predict(GgivenQ.Model.sample2,
 #'     newdata = data[sample1,], type="prob")[,2]
 #'
 #'
@@ -242,9 +242,9 @@
 
 
 cdgd1_manual <- function(Y,D,G,
-                     YgivenGXQ.Pred_D0,YgivenGXQ.Pred_D1,DgivenGXQ.Pred,
-                     Y0givenQ.Pred_G0,Y0givenQ.Pred_G1,Y1givenQ.Pred_G0,Y1givenQ.Pred_G1,DgivenQ.Pred_G0,DgivenQ.Pred_G1,GgivenQ.Pred,
-                     data,alpha=0.05) {
+                         YgivenGXQ.Pred_D0,YgivenGXQ.Pred_D1,DgivenGXQ.Pred,
+                         Y0givenQ.Pred_G0,Y0givenQ.Pred_G1,Y1givenQ.Pred_G0,Y1givenQ.Pred_G1,DgivenQ.Pred_G0,DgivenQ.Pred_G1,GgivenQ.Pred,
+                         data,alpha=0.05) {
 
   data <- as.data.frame(data)
 

diff --git a/R/cdgd1_ml.R b/R/cdgd1_ml.R
@@ -49,11 +49,11 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
 
   data <- as.data.frame(data)
 
-### estimate the nuisance functions with cross-fitting
+  ### estimate the nuisance functions with cross-fitting
   sample1 <- sample(nrow(data), floor(nrow(data)/2), replace=FALSE)
   sample2 <- setdiff(1:nrow(data), sample1)
 
-### propensity score model
+  ### propensity score model
   data[,D] <- as.factor(data[,D])
   levels(data[,D]) <- c("D0","D1")  # necessary for caret implementation of ranger
 
@@ -171,7 +171,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
     data[,Y] <- as.numeric(data[,Y])-1
   }
 
-### outcome predictions
+  ### outcome predictions
   YgivenGXQ.Pred_D0 <- YgivenGXQ.Pred_D1 <- rep(NA, nrow(data))
 
   pred_data <- data
@@ -196,7 +196,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
     YgivenGXQ.Pred_D1[sample1] <- stats::predict(YgivenDGXQ.Model.sample2, newdata = pred_data[sample1,], type="prob")[,2]
   }
 
-### Estimate p_g(Q)=Pr(G=g | Q)
+  ### Estimate p_g(Q)=Pr(G=g | Q)
   data[,G] <- as.factor(data[,G])
   levels(data[,G]) <- c("G0","G1")  # necessary for caret implementation of ranger
 
@@ -256,7 +256,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
     )
   }
 
-### Estimate E(Y_d | Q,g)
+  ### Estimate E(Y_d | Q,g)
   YgivenGXQ.Pred_D1_ncf <- YgivenGXQ.Pred_D0_ncf <- rep(NA, nrow(data)) # ncf stands for non-cross-fitted
 
   pred_data <- data
@@ -336,14 +336,14 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
   Y1givenQ.Pred_G0[sample2] <- stats::predict(Y1givenGQ.Model.sample1, newdata = pred_data[sample2,])
   Y1givenQ.Pred_G0[sample1] <- stats::predict(Y1givenGQ.Model.sample2, newdata = pred_data[sample1,])
 
-### The "IPO" (individual potential outcome) function
+  ### The "IPO" (individual potential outcome) function
   # For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
   # We stabilize the weight by dividing the sample average of estimated weights
 
   IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
   IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
 
-### Estimate E(D | Q,g')
+  ### Estimate E(D | Q,g')
   data[,D] <- as.factor(data[,D])
   levels(data[,D]) <- c("D0","D1")  # necessary for caret implementation of ranger
 
@@ -355,15 +355,15 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
   }
   if (algorithm=="ranger") {
     message <- utils::capture.output( DgivenGQ.Model.sample1 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample1,], method="ranger",
-                                                                              trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
+                                                                             trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
     message <- utils::capture.output( DgivenGQ.Model.sample2 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample2,], method="ranger",
-                                                                              trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
+                                                                             trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
   }
   if (algorithm=="gbm") {
     message <- utils::capture.output( DgivenGQ.Model.sample1 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample1,], method="gbm",
-                                                                              trControl=caret::trainControl(method="cv")) )
+                                                                             trControl=caret::trainControl(method="cv")) )
     message <- utils::capture.output( DgivenGQ.Model.sample2 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample2,], method="gbm",
-                                                                              trControl=caret::trainControl(method="cv")) )
+                                                                             trControl=caret::trainControl(method="cv")) )
   }
 
   data[,D] <- as.numeric(data[,D])-1
@@ -380,7 +380,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
   DgivenQ.Pred_G1[sample2] <- stats::predict(DgivenGQ.Model.sample1, newdata = pred_data[sample2,], type="prob")[,2]
   DgivenQ.Pred_G1[sample1] <- stats::predict(DgivenGQ.Model.sample2, newdata = pred_data[sample1,], type="prob")[,2]
 
-### The one-step estimate of \xi_{dg}
+  ### The one-step estimate of \xi_{dg}
   psi_00 <- mean( (1-data[,G])/(1-mean(data[,G]))*IPO_D0 )
   psi_01 <- mean( data[,G]/mean(data[,G])*IPO_D0 )
   # Note that this is basically DML2. We could also use DML1:
@@ -391,7 +391,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
   #psi_01_S2 <- mean( data[sample1,G]/mean(data[sample1,G])*IPO_D0[sample2] )     # sample 2 estimate
   #psi_01 <- (1/2)*(psi_01_S1+psi_01_S2)
 
-### The one-step estimate of \xi_{dgg'g''}
+  ### The one-step estimate of \xi_{dgg'g''}
   # There are 8 possible dgg'g'' combinations, so we define a function first
   psi_dggg <- function(d,g1,g2,g3) {
     if (d==0 & g1==0) {
@@ -517,10 +517,10 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
   cond_effect_se <- se( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) )
   Q_dist_se <- se( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) )
   cond_selection_se <- se( data[,G]/mean(data[,G])*(data[,Y]-Y_G1) - (1-data[,G])/(1-mean(data[,G]))*(data[,Y]-Y_G0) -
-                         ( data[,G]/mean(data[,G])*(IPO_D0-psi_01) - (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00) ) -
-                         ( EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-EIF_dggg(1,0,0,0)+EIF_dggg(0,0,0,0) ) -
-                         ( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) -
-                         ( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) ))
+                             ( data[,G]/mean(data[,G])*(IPO_D0-psi_01) - (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00) ) -
+                             ( EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-EIF_dggg(1,0,0,0)+EIF_dggg(0,0,0,0) ) -
+                             ( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) -
+                             ( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) ))
 
   cond_Jackson_reduction_se <- se( (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00)+EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-(1-data[,G])/(1-mean(data[,G]))*(data[,Y]-Y_G0) )
 

diff --git a/R/cdgd1_pa.R b/R/cdgd1_pa.R
@@ -111,9 +111,11 @@ cdgd1_pa <- function(Y,D,G,X,Q,data,alpha=0.05,trim1=0,trim2=0) {
     )
   }
 
-  # IPOs for modelling E(Y_d | Q,g)
+  ### The "IPO" (individual potential outcome) function
+  # For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
+  # We stabilize the weight by dividing the sample average of estimated weights
   IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
-  IPO_D1 <- data[,D]/DgivenGXQ.Pred/(mean(data[,D]/DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
+  IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
 
   data_temp <- data[,c(G,Q)]
   data_temp$IPO_D0 <- IPO_D0
@@ -134,13 +136,6 @@ cdgd1_pa <- function(Y,D,G,X,Q,data,alpha=0.05,trim1=0,trim2=0) {
   Y0givenQ.Pred_G0 <- stats::predict(Y0givenGQ.Model, newdata = pred_data)
   Y1givenQ.Pred_G0 <- stats::predict(Y1givenGQ.Model, newdata = pred_data)
 
-  ### The "IPO" (individual potential outcome) function
-  # For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
-  # We stabilize the weight by dividing the sample average of estimated weights
-
-  IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
-  IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
-
   ### Estimate E(D | Q,g')
   DgivenGQ.Model <- stats::glm(stats::as.formula(paste(D, paste(paste(G,Q,sep="*"),collapse="+"), sep="~")), data=data, family=stats::binomial(link="logit"))
 

diff --git a/man/cdgd1_manual.Rd b/man/cdgd1_manual.Rd