diff --git a/DESCRIPTION b/DESCRIPTION index 80c0dfe..6960433 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cdgd Title: Causal Decomposition of Group Disparities -Version: 0.3.3 +Version: 0.3.4 Authors@R: person("Ang", "Yu", , "ang_yu@outlook.com", role = c("aut", "cre", "cph"), comment = c(website = "", ORCID = "0000-0002-1828-0165")) @@ -22,9 +22,9 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 NeedsCompilation: no -Packaged: 2023-12-09 20:01:24 UTC; Ang +Packaged: 2024-01-06 14:01:48 UTC; Ang Author: Ang Yu [aut, cre, cph] (, ) Maintainer: Ang Yu Repository: CRAN -Date/Publication: 2023-12-09 20:10:02 UTC +Date/Publication: 2024-01-08 14:00:06 UTC diff --git a/MD5 b/MD5 index 81cf135..fea597c 100644 --- a/MD5 +++ b/MD5 @@ -1,20 +1,20 @@ -178ef87c23c4040132eae4e196a85281 *DESCRIPTION +fc4ba4c7b9efcb2fb787a530fb6ddd5b *DESCRIPTION d51c59f8ce5117b43778cd95fee34fef *LICENSE 9fa47bc528b89d6c3da57aea4f3e56f0 *NAMESPACE -d507f724fd6293c941925b81adc2229c *NEWS.md +a46c13c7aa4ab7b35c3a5d8a1490ddc3 *NEWS.md ef69c0670a99158c51e3ba5d8f1858c9 *R/cdgd0_manual.R bfaf6d7d1dae45d7a6c6161ca4c3d52f *R/cdgd0_ml.R 8da79b3526b03cfeb9cb2d6af0ac366f *R/cdgd0_pa.R -8d5e1f62da6eb3666a59c69c4685c035 *R/cdgd1_manual.R -085be5613f86b5f5d710bebf55009452 *R/cdgd1_ml.R -5a3ff852028be9e222bb9425909a1f82 *R/cdgd1_pa.R +88ed4ea8f1bb5daf1b2f9324d6667199 *R/cdgd1_manual.R +c77d76dd291a5d49dd0553b0fa8566d1 *R/cdgd1_ml.R +500ad430d564a90ab69808ec42604487 *R/cdgd1_pa.R d3e698fca05ab4a2e7908d83424c2590 *R/exp_data.R 75a2ae38290583448144e83d4478959e *README.md 087c491f1ec5cb0b8e6cb173b276079d *data/exp_data.rda 43e24517ab758241cd9c3b043df4fb67 *man/cdgd0_manual.Rd 336980e8b243ffb3cf1d30843af7d5fe *man/cdgd0_ml.Rd 30d0c0717c1d13d0c64c7e9dda2b67dd *man/cdgd0_pa.Rd -abe1d6f1200c3b9ff89a0dcb0a66e9c8 *man/cdgd1_manual.Rd +8532960c2e75c6e893c126f7ea10ea31 *man/cdgd1_manual.Rd 30a5180ae09c793529699bb985c0b859 *man/cdgd1_ml.Rd c8ae5db605881e76964ede4618498cab *man/cdgd1_pa.Rd ea74744471a3d3fe351620f3df0d7e6b *man/exp_data.Rd diff --git a/NEWS.md b/NEWS.md index 3ed08cf..9b71ead 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ + +# cdgd 0.3.4 + +* Corrected an error in the example of cdgd1_manual + # cdgd 0.3.3 * Now allows for a binary outcome. diff --git a/R/cdgd1_manual.R b/R/cdgd1_manual.R index cb88136..0a6ada3 100644 --- a/R/cdgd1_manual.R +++ b/R/cdgd1_manual.R @@ -219,9 +219,9 @@ #' data[,G] <- as.numeric(data[,G])-1 #' #' GgivenQ.Pred <- rep(NA, nrow(data)) -#' GgivenQ.Pred[sample2] <- stats::predict(DgivenGQ.Model.sample1, +#' GgivenQ.Pred[sample2] <- stats::predict(GgivenQ.Model.sample1, #' newdata = data[sample2,], type="prob")[,2] -#' GgivenQ.Pred[sample1] <- stats::predict(DgivenGQ.Model.sample2, +#' GgivenQ.Pred[sample1] <- stats::predict(GgivenQ.Model.sample2, #' newdata = data[sample1,], type="prob")[,2] #' #' @@ -242,9 +242,9 @@ cdgd1_manual <- function(Y,D,G, - YgivenGXQ.Pred_D0,YgivenGXQ.Pred_D1,DgivenGXQ.Pred, - Y0givenQ.Pred_G0,Y0givenQ.Pred_G1,Y1givenQ.Pred_G0,Y1givenQ.Pred_G1,DgivenQ.Pred_G0,DgivenQ.Pred_G1,GgivenQ.Pred, - data,alpha=0.05) { + YgivenGXQ.Pred_D0,YgivenGXQ.Pred_D1,DgivenGXQ.Pred, + Y0givenQ.Pred_G0,Y0givenQ.Pred_G1,Y1givenQ.Pred_G0,Y1givenQ.Pred_G1,DgivenQ.Pred_G0,DgivenQ.Pred_G1,GgivenQ.Pred, + data,alpha=0.05) { data <- as.data.frame(data) diff --git a/R/cdgd1_ml.R b/R/cdgd1_ml.R index 1bd72bd..c374acf 100644 --- a/R/cdgd1_ml.R +++ b/R/cdgd1_ml.R @@ -49,11 +49,11 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { data <- as.data.frame(data) -### estimate the nuisance functions with cross-fitting + ### estimate the nuisance functions with cross-fitting sample1 <- sample(nrow(data), floor(nrow(data)/2), replace=FALSE) sample2 <- setdiff(1:nrow(data), sample1) -### propensity score model + ### propensity score model data[,D] <- as.factor(data[,D]) levels(data[,D]) <- c("D0","D1") # necessary for caret implementation of ranger @@ -171,7 +171,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { data[,Y] <- as.numeric(data[,Y])-1 } -### outcome predictions + ### outcome predictions YgivenGXQ.Pred_D0 <- YgivenGXQ.Pred_D1 <- rep(NA, nrow(data)) pred_data <- data @@ -196,7 +196,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { YgivenGXQ.Pred_D1[sample1] <- stats::predict(YgivenDGXQ.Model.sample2, newdata = pred_data[sample1,], type="prob")[,2] } -### Estimate p_g(Q)=Pr(G=g | Q) + ### Estimate p_g(Q)=Pr(G=g | Q) data[,G] <- as.factor(data[,G]) levels(data[,G]) <- c("G0","G1") # necessary for caret implementation of ranger @@ -256,7 +256,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { ) } -### Estimate E(Y_d | Q,g) + ### Estimate E(Y_d | Q,g) YgivenGXQ.Pred_D1_ncf <- YgivenGXQ.Pred_D0_ncf <- rep(NA, nrow(data)) # ncf stands for non-cross-fitted pred_data <- data @@ -336,14 +336,14 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { Y1givenQ.Pred_G0[sample2] <- stats::predict(Y1givenGQ.Model.sample1, newdata = pred_data[sample2,]) Y1givenQ.Pred_G0[sample1] <- stats::predict(Y1givenGQ.Model.sample2, newdata = pred_data[sample1,]) -### The "IPO" (individual potential outcome) function + ### The "IPO" (individual potential outcome) function # For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g) # We stabilize the weight by dividing the sample average of estimated weights IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0 IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1 -### Estimate E(D | Q,g') + ### Estimate E(D | Q,g') data[,D] <- as.factor(data[,D]) levels(data[,D]) <- c("D0","D1") # necessary for caret implementation of ranger @@ -355,15 +355,15 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { } if (algorithm=="ranger") { message <- utils::capture.output( DgivenGQ.Model.sample1 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample1,], method="ranger", - trControl=caret::trainControl(method="cv", classProbs=TRUE)) ) + trControl=caret::trainControl(method="cv", classProbs=TRUE)) ) message <- utils::capture.output( DgivenGQ.Model.sample2 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample2,], method="ranger", - trControl=caret::trainControl(method="cv", classProbs=TRUE)) ) + trControl=caret::trainControl(method="cv", classProbs=TRUE)) ) } if (algorithm=="gbm") { message <- utils::capture.output( DgivenGQ.Model.sample1 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample1,], method="gbm", - trControl=caret::trainControl(method="cv")) ) + trControl=caret::trainControl(method="cv")) ) message <- utils::capture.output( DgivenGQ.Model.sample2 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample2,], method="gbm", - trControl=caret::trainControl(method="cv")) ) + trControl=caret::trainControl(method="cv")) ) } data[,D] <- as.numeric(data[,D])-1 @@ -380,7 +380,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { DgivenQ.Pred_G1[sample2] <- stats::predict(DgivenGQ.Model.sample1, newdata = pred_data[sample2,], type="prob")[,2] DgivenQ.Pred_G1[sample1] <- stats::predict(DgivenGQ.Model.sample2, newdata = pred_data[sample1,], type="prob")[,2] -### The one-step estimate of \xi_{dg} + ### The one-step estimate of \xi_{dg} psi_00 <- mean( (1-data[,G])/(1-mean(data[,G]))*IPO_D0 ) psi_01 <- mean( data[,G]/mean(data[,G])*IPO_D0 ) # Note that this is basically DML2. We could also use DML1: @@ -391,7 +391,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { #psi_01_S2 <- mean( data[sample1,G]/mean(data[sample1,G])*IPO_D0[sample2] ) # sample 2 estimate #psi_01 <- (1/2)*(psi_01_S1+psi_01_S2) -### The one-step estimate of \xi_{dgg'g''} + ### The one-step estimate of \xi_{dgg'g''} # There are 8 possible dgg'g'' combinations, so we define a function first psi_dggg <- function(d,g1,g2,g3) { if (d==0 & g1==0) { @@ -517,10 +517,10 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) { cond_effect_se <- se( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) Q_dist_se <- se( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) ) cond_selection_se <- se( data[,G]/mean(data[,G])*(data[,Y]-Y_G1) - (1-data[,G])/(1-mean(data[,G]))*(data[,Y]-Y_G0) - - ( data[,G]/mean(data[,G])*(IPO_D0-psi_01) - (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00) ) - - ( EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-EIF_dggg(1,0,0,0)+EIF_dggg(0,0,0,0) ) - - ( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) - - ( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) )) + ( data[,G]/mean(data[,G])*(IPO_D0-psi_01) - (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00) ) - + ( EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-EIF_dggg(1,0,0,0)+EIF_dggg(0,0,0,0) ) - + ( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) - + ( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) )) cond_Jackson_reduction_se <- se( (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00)+EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-(1-data[,G])/(1-mean(data[,G]))*(data[,Y]-Y_G0) ) diff --git a/R/cdgd1_pa.R b/R/cdgd1_pa.R index 267337e..4c17418 100644 --- a/R/cdgd1_pa.R +++ b/R/cdgd1_pa.R @@ -111,9 +111,11 @@ cdgd1_pa <- function(Y,D,G,X,Q,data,alpha=0.05,trim1=0,trim2=0) { ) } - # IPOs for modelling E(Y_d | Q,g) + ### The "IPO" (individual potential outcome) function + # For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g) + # We stabilize the weight by dividing the sample average of estimated weights IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0 - IPO_D1 <- data[,D]/DgivenGXQ.Pred/(mean(data[,D]/DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1 + IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1 data_temp <- data[,c(G,Q)] data_temp$IPO_D0 <- IPO_D0 @@ -134,13 +136,6 @@ cdgd1_pa <- function(Y,D,G,X,Q,data,alpha=0.05,trim1=0,trim2=0) { Y0givenQ.Pred_G0 <- stats::predict(Y0givenGQ.Model, newdata = pred_data) Y1givenQ.Pred_G0 <- stats::predict(Y1givenGQ.Model, newdata = pred_data) - ### The "IPO" (individual potential outcome) function - # For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g) - # We stabilize the weight by dividing the sample average of estimated weights - - IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0 - IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1 - ### Estimate E(D | Q,g') DgivenGQ.Model <- stats::glm(stats::as.formula(paste(D, paste(paste(G,Q,sep="*"),collapse="+"), sep="~")), data=data, family=stats::binomial(link="logit")) diff --git a/man/cdgd1_manual.Rd b/man/cdgd1_manual.Rd index 0c5f7f7..af0b072 100644 --- a/man/cdgd1_manual.Rd +++ b/man/cdgd1_manual.Rd @@ -255,9 +255,9 @@ message <- utils::capture.output( GgivenQ.Model.sample2 <- data[,G] <- as.numeric(data[,G])-1 GgivenQ.Pred <- rep(NA, nrow(data)) -GgivenQ.Pred[sample2] <- stats::predict(DgivenGQ.Model.sample1, +GgivenQ.Pred[sample2] <- stats::predict(GgivenQ.Model.sample1, newdata = data[sample2,], type="prob")[,2] -GgivenQ.Pred[sample1] <- stats::predict(DgivenGQ.Model.sample2, +GgivenQ.Pred[sample1] <- stats::predict(GgivenQ.Model.sample2, newdata = data[sample1,], type="prob")[,2]