diff --git a/DESCRIPTION b/DESCRIPTION
index 80c0dfe..6960433 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: cdgd
Title: Causal Decomposition of Group Disparities
-Version: 0.3.3
+Version: 0.3.4
Authors@R:
person("Ang", "Yu", , "ang_yu@outlook.com", role = c("aut", "cre", "cph"),
comment = c(website = "", ORCID = "0000-0002-1828-0165"))
@@ -22,9 +22,9 @@ Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
NeedsCompilation: no
-Packaged: 2023-12-09 20:01:24 UTC; Ang
+Packaged: 2024-01-06 14:01:48 UTC; Ang
Author: Ang Yu [aut, cre, cph] (,
)
Maintainer: Ang Yu
Repository: CRAN
-Date/Publication: 2023-12-09 20:10:02 UTC
+Date/Publication: 2024-01-08 14:00:06 UTC
diff --git a/MD5 b/MD5
index 81cf135..fea597c 100644
--- a/MD5
+++ b/MD5
@@ -1,20 +1,20 @@
-178ef87c23c4040132eae4e196a85281 *DESCRIPTION
+fc4ba4c7b9efcb2fb787a530fb6ddd5b *DESCRIPTION
d51c59f8ce5117b43778cd95fee34fef *LICENSE
9fa47bc528b89d6c3da57aea4f3e56f0 *NAMESPACE
-d507f724fd6293c941925b81adc2229c *NEWS.md
+a46c13c7aa4ab7b35c3a5d8a1490ddc3 *NEWS.md
ef69c0670a99158c51e3ba5d8f1858c9 *R/cdgd0_manual.R
bfaf6d7d1dae45d7a6c6161ca4c3d52f *R/cdgd0_ml.R
8da79b3526b03cfeb9cb2d6af0ac366f *R/cdgd0_pa.R
-8d5e1f62da6eb3666a59c69c4685c035 *R/cdgd1_manual.R
-085be5613f86b5f5d710bebf55009452 *R/cdgd1_ml.R
-5a3ff852028be9e222bb9425909a1f82 *R/cdgd1_pa.R
+88ed4ea8f1bb5daf1b2f9324d6667199 *R/cdgd1_manual.R
+c77d76dd291a5d49dd0553b0fa8566d1 *R/cdgd1_ml.R
+500ad430d564a90ab69808ec42604487 *R/cdgd1_pa.R
d3e698fca05ab4a2e7908d83424c2590 *R/exp_data.R
75a2ae38290583448144e83d4478959e *README.md
087c491f1ec5cb0b8e6cb173b276079d *data/exp_data.rda
43e24517ab758241cd9c3b043df4fb67 *man/cdgd0_manual.Rd
336980e8b243ffb3cf1d30843af7d5fe *man/cdgd0_ml.Rd
30d0c0717c1d13d0c64c7e9dda2b67dd *man/cdgd0_pa.Rd
-abe1d6f1200c3b9ff89a0dcb0a66e9c8 *man/cdgd1_manual.Rd
+8532960c2e75c6e893c126f7ea10ea31 *man/cdgd1_manual.Rd
30a5180ae09c793529699bb985c0b859 *man/cdgd1_ml.Rd
c8ae5db605881e76964ede4618498cab *man/cdgd1_pa.Rd
ea74744471a3d3fe351620f3df0d7e6b *man/exp_data.Rd
diff --git a/NEWS.md b/NEWS.md
index 3ed08cf..9b71ead 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+
+# cdgd 0.3.4
+
+* Corrected an error in the example of cdgd1_manual
+
# cdgd 0.3.3
* Now allows for a binary outcome.
diff --git a/R/cdgd1_manual.R b/R/cdgd1_manual.R
index cb88136..0a6ada3 100644
--- a/R/cdgd1_manual.R
+++ b/R/cdgd1_manual.R
@@ -219,9 +219,9 @@
#' data[,G] <- as.numeric(data[,G])-1
#'
#' GgivenQ.Pred <- rep(NA, nrow(data))
-#' GgivenQ.Pred[sample2] <- stats::predict(DgivenGQ.Model.sample1,
+#' GgivenQ.Pred[sample2] <- stats::predict(GgivenQ.Model.sample1,
#' newdata = data[sample2,], type="prob")[,2]
-#' GgivenQ.Pred[sample1] <- stats::predict(DgivenGQ.Model.sample2,
+#' GgivenQ.Pred[sample1] <- stats::predict(GgivenQ.Model.sample2,
#' newdata = data[sample1,], type="prob")[,2]
#'
#'
@@ -242,9 +242,9 @@
cdgd1_manual <- function(Y,D,G,
- YgivenGXQ.Pred_D0,YgivenGXQ.Pred_D1,DgivenGXQ.Pred,
- Y0givenQ.Pred_G0,Y0givenQ.Pred_G1,Y1givenQ.Pred_G0,Y1givenQ.Pred_G1,DgivenQ.Pred_G0,DgivenQ.Pred_G1,GgivenQ.Pred,
- data,alpha=0.05) {
+ YgivenGXQ.Pred_D0,YgivenGXQ.Pred_D1,DgivenGXQ.Pred,
+ Y0givenQ.Pred_G0,Y0givenQ.Pred_G1,Y1givenQ.Pred_G0,Y1givenQ.Pred_G1,DgivenQ.Pred_G0,DgivenQ.Pred_G1,GgivenQ.Pred,
+ data,alpha=0.05) {
data <- as.data.frame(data)
diff --git a/R/cdgd1_ml.R b/R/cdgd1_ml.R
index 1bd72bd..c374acf 100644
--- a/R/cdgd1_ml.R
+++ b/R/cdgd1_ml.R
@@ -49,11 +49,11 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
data <- as.data.frame(data)
-### estimate the nuisance functions with cross-fitting
+ ### estimate the nuisance functions with cross-fitting
sample1 <- sample(nrow(data), floor(nrow(data)/2), replace=FALSE)
sample2 <- setdiff(1:nrow(data), sample1)
-### propensity score model
+ ### propensity score model
data[,D] <- as.factor(data[,D])
levels(data[,D]) <- c("D0","D1") # necessary for caret implementation of ranger
@@ -171,7 +171,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
data[,Y] <- as.numeric(data[,Y])-1
}
-### outcome predictions
+ ### outcome predictions
YgivenGXQ.Pred_D0 <- YgivenGXQ.Pred_D1 <- rep(NA, nrow(data))
pred_data <- data
@@ -196,7 +196,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
YgivenGXQ.Pred_D1[sample1] <- stats::predict(YgivenDGXQ.Model.sample2, newdata = pred_data[sample1,], type="prob")[,2]
}
-### Estimate p_g(Q)=Pr(G=g | Q)
+ ### Estimate p_g(Q)=Pr(G=g | Q)
data[,G] <- as.factor(data[,G])
levels(data[,G]) <- c("G0","G1") # necessary for caret implementation of ranger
@@ -256,7 +256,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
)
}
-### Estimate E(Y_d | Q,g)
+ ### Estimate E(Y_d | Q,g)
YgivenGXQ.Pred_D1_ncf <- YgivenGXQ.Pred_D0_ncf <- rep(NA, nrow(data)) # ncf stands for non-cross-fitted
pred_data <- data
@@ -336,14 +336,14 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
Y1givenQ.Pred_G0[sample2] <- stats::predict(Y1givenGQ.Model.sample1, newdata = pred_data[sample2,])
Y1givenQ.Pred_G0[sample1] <- stats::predict(Y1givenGQ.Model.sample2, newdata = pred_data[sample1,])
-### The "IPO" (individual potential outcome) function
+ ### The "IPO" (individual potential outcome) function
# For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
# We stabilize the weight by dividing the sample average of estimated weights
IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
-### Estimate E(D | Q,g')
+ ### Estimate E(D | Q,g')
data[,D] <- as.factor(data[,D])
levels(data[,D]) <- c("D0","D1") # necessary for caret implementation of ranger
@@ -355,15 +355,15 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
}
if (algorithm=="ranger") {
message <- utils::capture.output( DgivenGQ.Model.sample1 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample1,], method="ranger",
- trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
+ trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
message <- utils::capture.output( DgivenGQ.Model.sample2 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample2,], method="ranger",
- trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
+ trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
}
if (algorithm=="gbm") {
message <- utils::capture.output( DgivenGQ.Model.sample1 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample1,], method="gbm",
- trControl=caret::trainControl(method="cv")) )
+ trControl=caret::trainControl(method="cv")) )
message <- utils::capture.output( DgivenGQ.Model.sample2 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample2,], method="gbm",
- trControl=caret::trainControl(method="cv")) )
+ trControl=caret::trainControl(method="cv")) )
}
data[,D] <- as.numeric(data[,D])-1
@@ -380,7 +380,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
DgivenQ.Pred_G1[sample2] <- stats::predict(DgivenGQ.Model.sample1, newdata = pred_data[sample2,], type="prob")[,2]
DgivenQ.Pred_G1[sample1] <- stats::predict(DgivenGQ.Model.sample2, newdata = pred_data[sample1,], type="prob")[,2]
-### The one-step estimate of \xi_{dg}
+ ### The one-step estimate of \xi_{dg}
psi_00 <- mean( (1-data[,G])/(1-mean(data[,G]))*IPO_D0 )
psi_01 <- mean( data[,G]/mean(data[,G])*IPO_D0 )
# Note that this is basically DML2. We could also use DML1:
@@ -391,7 +391,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
#psi_01_S2 <- mean( data[sample1,G]/mean(data[sample1,G])*IPO_D0[sample2] ) # sample 2 estimate
#psi_01 <- (1/2)*(psi_01_S1+psi_01_S2)
-### The one-step estimate of \xi_{dgg'g''}
+ ### The one-step estimate of \xi_{dgg'g''}
# There are 8 possible dgg'g'' combinations, so we define a function first
psi_dggg <- function(d,g1,g2,g3) {
if (d==0 & g1==0) {
@@ -517,10 +517,10 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
cond_effect_se <- se( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) )
Q_dist_se <- se( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) )
cond_selection_se <- se( data[,G]/mean(data[,G])*(data[,Y]-Y_G1) - (1-data[,G])/(1-mean(data[,G]))*(data[,Y]-Y_G0) -
- ( data[,G]/mean(data[,G])*(IPO_D0-psi_01) - (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00) ) -
- ( EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-EIF_dggg(1,0,0,0)+EIF_dggg(0,0,0,0) ) -
- ( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) -
- ( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) ))
+ ( data[,G]/mean(data[,G])*(IPO_D0-psi_01) - (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00) ) -
+ ( EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-EIF_dggg(1,0,0,0)+EIF_dggg(0,0,0,0) ) -
+ ( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) -
+ ( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) ))
cond_Jackson_reduction_se <- se( (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00)+EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-(1-data[,G])/(1-mean(data[,G]))*(data[,Y]-Y_G0) )
diff --git a/R/cdgd1_pa.R b/R/cdgd1_pa.R
index 267337e..4c17418 100644
--- a/R/cdgd1_pa.R
+++ b/R/cdgd1_pa.R
@@ -111,9 +111,11 @@ cdgd1_pa <- function(Y,D,G,X,Q,data,alpha=0.05,trim1=0,trim2=0) {
)
}
- # IPOs for modelling E(Y_d | Q,g)
+ ### The "IPO" (individual potential outcome) function
+ # For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
+ # We stabilize the weight by dividing the sample average of estimated weights
IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
- IPO_D1 <- data[,D]/DgivenGXQ.Pred/(mean(data[,D]/DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
+ IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
data_temp <- data[,c(G,Q)]
data_temp$IPO_D0 <- IPO_D0
@@ -134,13 +136,6 @@ cdgd1_pa <- function(Y,D,G,X,Q,data,alpha=0.05,trim1=0,trim2=0) {
Y0givenQ.Pred_G0 <- stats::predict(Y0givenGQ.Model, newdata = pred_data)
Y1givenQ.Pred_G0 <- stats::predict(Y1givenGQ.Model, newdata = pred_data)
- ### The "IPO" (individual potential outcome) function
- # For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
- # We stabilize the weight by dividing the sample average of estimated weights
-
- IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
- IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
-
### Estimate E(D | Q,g')
DgivenGQ.Model <- stats::glm(stats::as.formula(paste(D, paste(paste(G,Q,sep="*"),collapse="+"), sep="~")), data=data, family=stats::binomial(link="logit"))
diff --git a/man/cdgd1_manual.Rd b/man/cdgd1_manual.Rd
index 0c5f7f7..af0b072 100644
--- a/man/cdgd1_manual.Rd
+++ b/man/cdgd1_manual.Rd
@@ -255,9 +255,9 @@ message <- utils::capture.output( GgivenQ.Model.sample2 <-
data[,G] <- as.numeric(data[,G])-1
GgivenQ.Pred <- rep(NA, nrow(data))
-GgivenQ.Pred[sample2] <- stats::predict(DgivenGQ.Model.sample1,
+GgivenQ.Pred[sample2] <- stats::predict(GgivenQ.Model.sample1,
newdata = data[sample2,], type="prob")[,2]
-GgivenQ.Pred[sample1] <- stats::predict(DgivenGQ.Model.sample2,
+GgivenQ.Pred[sample1] <- stats::predict(GgivenQ.Model.sample2,
newdata = data[sample1,], type="prob")[,2]