Skip to content

Commit

Permalink
version 0.3.4
Browse files Browse the repository at this point in the history
  • Loading branch information
ang-yu authored and cran-robot committed Jan 9, 2024
1 parent 56bd0bc commit 0940e2d
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 42 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
@@ -1,6 +1,6 @@
Package: cdgd
Title: Causal Decomposition of Group Disparities
Version: 0.3.3
Version: 0.3.4
Authors@R:
person("Ang", "Yu", , "ang_yu@outlook.com", role = c("aut", "cre", "cph"),
comment = c(website = "<https://ang-yu.github.io/>", ORCID = "0000-0002-1828-0165"))
Expand All @@ -22,9 +22,9 @@ Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
NeedsCompilation: no
Packaged: 2023-12-09 20:01:24 UTC; Ang
Packaged: 2024-01-06 14:01:48 UTC; Ang
Author: Ang Yu [aut, cre, cph] (<https://ang-yu.github.io/>,
<https://orcid.org/0000-0002-1828-0165>)
Maintainer: Ang Yu <ang_yu@outlook.com>
Repository: CRAN
Date/Publication: 2023-12-09 20:10:02 UTC
Date/Publication: 2024-01-08 14:00:06 UTC
12 changes: 6 additions & 6 deletions MD5
@@ -1,20 +1,20 @@
178ef87c23c4040132eae4e196a85281 *DESCRIPTION
fc4ba4c7b9efcb2fb787a530fb6ddd5b *DESCRIPTION
d51c59f8ce5117b43778cd95fee34fef *LICENSE
9fa47bc528b89d6c3da57aea4f3e56f0 *NAMESPACE
d507f724fd6293c941925b81adc2229c *NEWS.md
a46c13c7aa4ab7b35c3a5d8a1490ddc3 *NEWS.md
ef69c0670a99158c51e3ba5d8f1858c9 *R/cdgd0_manual.R
bfaf6d7d1dae45d7a6c6161ca4c3d52f *R/cdgd0_ml.R
8da79b3526b03cfeb9cb2d6af0ac366f *R/cdgd0_pa.R
8d5e1f62da6eb3666a59c69c4685c035 *R/cdgd1_manual.R
085be5613f86b5f5d710bebf55009452 *R/cdgd1_ml.R
5a3ff852028be9e222bb9425909a1f82 *R/cdgd1_pa.R
88ed4ea8f1bb5daf1b2f9324d6667199 *R/cdgd1_manual.R
c77d76dd291a5d49dd0553b0fa8566d1 *R/cdgd1_ml.R
500ad430d564a90ab69808ec42604487 *R/cdgd1_pa.R
d3e698fca05ab4a2e7908d83424c2590 *R/exp_data.R
75a2ae38290583448144e83d4478959e *README.md
087c491f1ec5cb0b8e6cb173b276079d *data/exp_data.rda
43e24517ab758241cd9c3b043df4fb67 *man/cdgd0_manual.Rd
336980e8b243ffb3cf1d30843af7d5fe *man/cdgd0_ml.Rd
30d0c0717c1d13d0c64c7e9dda2b67dd *man/cdgd0_pa.Rd
abe1d6f1200c3b9ff89a0dcb0a66e9c8 *man/cdgd1_manual.Rd
8532960c2e75c6e893c126f7ea10ea31 *man/cdgd1_manual.Rd
30a5180ae09c793529699bb985c0b859 *man/cdgd1_ml.Rd
c8ae5db605881e76964ede4618498cab *man/cdgd1_pa.Rd
ea74744471a3d3fe351620f3df0d7e6b *man/exp_data.Rd
5 changes: 5 additions & 0 deletions NEWS.md
@@ -1,3 +1,8 @@

# cdgd 0.3.4

* Corrected an error in the example of cdgd1_manual

# cdgd 0.3.3

* Now allows for a binary outcome.
Expand Down
10 changes: 5 additions & 5 deletions R/cdgd1_manual.R
Expand Up @@ -219,9 +219,9 @@
#' data[,G] <- as.numeric(data[,G])-1
#'
#' GgivenQ.Pred <- rep(NA, nrow(data))
#' GgivenQ.Pred[sample2] <- stats::predict(DgivenGQ.Model.sample1,
#' GgivenQ.Pred[sample2] <- stats::predict(GgivenQ.Model.sample1,
#' newdata = data[sample2,], type="prob")[,2]
#' GgivenQ.Pred[sample1] <- stats::predict(DgivenGQ.Model.sample2,
#' GgivenQ.Pred[sample1] <- stats::predict(GgivenQ.Model.sample2,
#' newdata = data[sample1,], type="prob")[,2]
#'
#'
Expand All @@ -242,9 +242,9 @@


cdgd1_manual <- function(Y,D,G,
YgivenGXQ.Pred_D0,YgivenGXQ.Pred_D1,DgivenGXQ.Pred,
Y0givenQ.Pred_G0,Y0givenQ.Pred_G1,Y1givenQ.Pred_G0,Y1givenQ.Pred_G1,DgivenQ.Pred_G0,DgivenQ.Pred_G1,GgivenQ.Pred,
data,alpha=0.05) {
YgivenGXQ.Pred_D0,YgivenGXQ.Pred_D1,DgivenGXQ.Pred,
Y0givenQ.Pred_G0,Y0givenQ.Pred_G1,Y1givenQ.Pred_G0,Y1givenQ.Pred_G1,DgivenQ.Pred_G0,DgivenQ.Pred_G1,GgivenQ.Pred,
data,alpha=0.05) {

data <- as.data.frame(data)

Expand Down
34 changes: 17 additions & 17 deletions R/cdgd1_ml.R
Expand Up @@ -49,11 +49,11 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {

data <- as.data.frame(data)

### estimate the nuisance functions with cross-fitting
### estimate the nuisance functions with cross-fitting
sample1 <- sample(nrow(data), floor(nrow(data)/2), replace=FALSE)
sample2 <- setdiff(1:nrow(data), sample1)

### propensity score model
### propensity score model
data[,D] <- as.factor(data[,D])
levels(data[,D]) <- c("D0","D1") # necessary for caret implementation of ranger

Expand Down Expand Up @@ -171,7 +171,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
data[,Y] <- as.numeric(data[,Y])-1
}

### outcome predictions
### outcome predictions
YgivenGXQ.Pred_D0 <- YgivenGXQ.Pred_D1 <- rep(NA, nrow(data))

pred_data <- data
Expand All @@ -196,7 +196,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
YgivenGXQ.Pred_D1[sample1] <- stats::predict(YgivenDGXQ.Model.sample2, newdata = pred_data[sample1,], type="prob")[,2]
}

### Estimate p_g(Q)=Pr(G=g | Q)
### Estimate p_g(Q)=Pr(G=g | Q)
data[,G] <- as.factor(data[,G])
levels(data[,G]) <- c("G0","G1") # necessary for caret implementation of ranger

Expand Down Expand Up @@ -256,7 +256,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
)
}

### Estimate E(Y_d | Q,g)
### Estimate E(Y_d | Q,g)
YgivenGXQ.Pred_D1_ncf <- YgivenGXQ.Pred_D0_ncf <- rep(NA, nrow(data)) # ncf stands for non-cross-fitted

pred_data <- data
Expand Down Expand Up @@ -336,14 +336,14 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
Y1givenQ.Pred_G0[sample2] <- stats::predict(Y1givenGQ.Model.sample1, newdata = pred_data[sample2,])
Y1givenQ.Pred_G0[sample1] <- stats::predict(Y1givenGQ.Model.sample2, newdata = pred_data[sample1,])

### The "IPO" (individual potential outcome) function
### The "IPO" (individual potential outcome) function
# For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
# We stabilize the weight by dividing the sample average of estimated weights

IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1

### Estimate E(D | Q,g')
### Estimate E(D | Q,g')
data[,D] <- as.factor(data[,D])
levels(data[,D]) <- c("D0","D1") # necessary for caret implementation of ranger

Expand All @@ -355,15 +355,15 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
}
if (algorithm=="ranger") {
message <- utils::capture.output( DgivenGQ.Model.sample1 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample1,], method="ranger",
trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
message <- utils::capture.output( DgivenGQ.Model.sample2 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample2,], method="ranger",
trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
trControl=caret::trainControl(method="cv", classProbs=TRUE)) )
}
if (algorithm=="gbm") {
message <- utils::capture.output( DgivenGQ.Model.sample1 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample1,], method="gbm",
trControl=caret::trainControl(method="cv")) )
trControl=caret::trainControl(method="cv")) )
message <- utils::capture.output( DgivenGQ.Model.sample2 <- caret::train(stats::as.formula(paste(D, paste(G,paste(Q,collapse="+"),sep="+"), sep="~")), data=data[sample2,], method="gbm",
trControl=caret::trainControl(method="cv")) )
trControl=caret::trainControl(method="cv")) )
}

data[,D] <- as.numeric(data[,D])-1
Expand All @@ -380,7 +380,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
DgivenQ.Pred_G1[sample2] <- stats::predict(DgivenGQ.Model.sample1, newdata = pred_data[sample2,], type="prob")[,2]
DgivenQ.Pred_G1[sample1] <- stats::predict(DgivenGQ.Model.sample2, newdata = pred_data[sample1,], type="prob")[,2]

### The one-step estimate of \xi_{dg}
### The one-step estimate of \xi_{dg}
psi_00 <- mean( (1-data[,G])/(1-mean(data[,G]))*IPO_D0 )
psi_01 <- mean( data[,G]/mean(data[,G])*IPO_D0 )
# Note that this is basically DML2. We could also use DML1:
Expand All @@ -391,7 +391,7 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
#psi_01_S2 <- mean( data[sample1,G]/mean(data[sample1,G])*IPO_D0[sample2] ) # sample 2 estimate
#psi_01 <- (1/2)*(psi_01_S1+psi_01_S2)

### The one-step estimate of \xi_{dgg'g''}
### The one-step estimate of \xi_{dgg'g''}
# There are 8 possible dgg'g'' combinations, so we define a function first
psi_dggg <- function(d,g1,g2,g3) {
if (d==0 & g1==0) {
Expand Down Expand Up @@ -517,10 +517,10 @@ cdgd1_ml <- function(Y,D,G,X,Q,data,algorithm,alpha=0.05,trim1=0,trim2=0) {
cond_effect_se <- se( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) )
Q_dist_se <- se( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) )
cond_selection_se <- se( data[,G]/mean(data[,G])*(data[,Y]-Y_G1) - (1-data[,G])/(1-mean(data[,G]))*(data[,Y]-Y_G0) -
( data[,G]/mean(data[,G])*(IPO_D0-psi_01) - (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00) ) -
( EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-EIF_dggg(1,0,0,0)+EIF_dggg(0,0,0,0) ) -
( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) -
( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) ))
( data[,G]/mean(data[,G])*(IPO_D0-psi_01) - (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00) ) -
( EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-EIF_dggg(1,0,0,0)+EIF_dggg(0,0,0,0) ) -
( EIF_dggg(1,1,1,1)-EIF_dggg(0,1,1,1)-EIF_dggg(1,0,1,1)+EIF_dggg(0,0,1,1) ) -
( EIF_dggg(1,0,1,1)-EIF_dggg(0,0,1,1)-EIF_dggg(1,0,1,0)+EIF_dggg(0,0,1,0) ))

cond_Jackson_reduction_se <- se( (1-data[,G])/(1-mean(data[,G]))*(IPO_D0-psi_00)+EIF_dggg(1,0,1,0)-EIF_dggg(0,0,1,0)-(1-data[,G])/(1-mean(data[,G]))*(data[,Y]-Y_G0) )

Expand Down
13 changes: 4 additions & 9 deletions R/cdgd1_pa.R
Expand Up @@ -111,9 +111,11 @@ cdgd1_pa <- function(Y,D,G,X,Q,data,alpha=0.05,trim1=0,trim2=0) {
)
}

# IPOs for modelling E(Y_d | Q,g)
### The "IPO" (individual potential outcome) function
# For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
# We stabilize the weight by dividing the sample average of estimated weights
IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
IPO_D1 <- data[,D]/DgivenGXQ.Pred/(mean(data[,D]/DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1
IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1

data_temp <- data[,c(G,Q)]
data_temp$IPO_D0 <- IPO_D0
Expand All @@ -134,13 +136,6 @@ cdgd1_pa <- function(Y,D,G,X,Q,data,alpha=0.05,trim1=0,trim2=0) {
Y0givenQ.Pred_G0 <- stats::predict(Y0givenGQ.Model, newdata = pred_data)
Y1givenQ.Pred_G0 <- stats::predict(Y1givenGQ.Model, newdata = pred_data)

### The "IPO" (individual potential outcome) function
# For each d and g value, we have IE(d,g)=\frac{\one(D=d)}{\pi(d,X,g)}[Y-\mu(d,X,g)]+\mu(d,X,g)
# We stabilize the weight by dividing the sample average of estimated weights

IPO_D0 <- (1-data[,D])/(1-DgivenGXQ.Pred)/mean((1-data[,D])/(1-DgivenGXQ.Pred))*(data[,Y]-YgivenGXQ.Pred_D0) + YgivenGXQ.Pred_D0
IPO_D1 <- data[,D]/DgivenGXQ.Pred/mean(data[,D]/DgivenGXQ.Pred)*(data[,Y]-YgivenGXQ.Pred_D1) + YgivenGXQ.Pred_D1

### Estimate E(D | Q,g')
DgivenGQ.Model <- stats::glm(stats::as.formula(paste(D, paste(paste(G,Q,sep="*"),collapse="+"), sep="~")), data=data, family=stats::binomial(link="logit"))

Expand Down
4 changes: 2 additions & 2 deletions man/cdgd1_manual.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0940e2d

Please sign in to comment.