# 1. Neyman Orthogonality Proof

## Given:
1. $ y = \alpha D + \beta'W + \epsilon $
2. $ \tilde{y} = y - X'W; \quad \tilde{D} = D - X_{n1.1}W $
3. $ \alpha = \arg \min_{\alpha \in \mathbb{R}} E\left[ (\tilde{y} - \alpha \tilde{D}^2) \right] = E[\tilde{D}^2]^{-1} E[\tilde{D} {y}] $

## The objective is to prove:
$ \frac{d\alpha}{d\eta} = -\left( \frac{\partial M}{\partial \alpha} (\alpha, \eta_0) \right)^{-1} \left( \frac{\partial M}{\partial \eta} (\alpha, \eta_0) \right) $

## Defining the Objective Function
$ M(\alpha, \eta) = E\left[ (\tilde{y} - \alpha \tilde{D})^2 \right] $

## Setting the First-Order Condition (FOC) for the minimization problem
$ \frac{\partial M}{\partial \alpha} = 2 E\left[ (\tilde{y} - \alpha \tilde{D})(-\tilde{D}) \right] = 0 $
This simplifies to:
$ E\left[ \tilde{D} (\tilde{y} - \alpha \tilde{D}) \right] = 0 $
Solving for $\alpha$:
$ \alpha = \frac{E[\tilde{D} \tilde{y}]}{E[\tilde{D}^2]} $

## Implicit Function Theorem Application
$ \frac{d\alpha}{d\eta} = -\left( \frac{\partial M}{\partial \alpha} \right)^{-1} \left( \frac{\partial M}{\partial \eta} \right) $

## Calculate Partial Derivatives

**Partial Derivative with Respect to $\alpha$**:
$ \frac{\partial M}{\partial \alpha} = 2 E\left[ \tilde{D}^2 \right] $
Given that $ E[\tilde{D} (\tilde{y} - \alpha \tilde{D})] = 0 $, it follows that:
$ \frac{\partial M}{\partial \alpha} = 2 E\left[ \tilde{D}^2 \right] $

**Partial Derivative with Respect to $\eta$**:
$ \frac{\partial M}{\partial \eta} = 2 E\left[ (\tilde{y} - \alpha \tilde{D})(-\frac{\partial \tilde{D}}{\partial \eta}) \right] $
This simplifies to:
$ \frac{\partial M}{\partial \eta} = -2 E\left[ \tilde{D} (\tilde{y} - \alpha \tilde{D}) \right] $

## Combine the Results
Using the implicit function theorem:
$ \frac{d\alpha}{d\eta} = -\left( \frac{\partial M}{\partial \alpha} \right)^{-1} \left( \frac{\partial M}{\partial \eta} \right) $
Substituting the partial derivatives:
$ \frac{d\alpha}{d\eta} = -\left( 2 E\left[ \tilde{D}^2 \right] \right)^{-1} \left( -2 E\left[ \tilde{D} (\tilde{y} - \alpha \tilde{D}) \right] \right) $
Simplifies to:
$ \frac{d\alpha}{d\eta} = \left( E\left[ \tilde{D}^2 \right] \right)^{-1} E\left[ \tilde{D} (\tilde{y} - \alpha \tilde{D}) \right] $

Since $ E\left[ \tilde{D} (\tilde{y} - \alpha \tilde{D}) \right] = 0 $, we conclude:
$ \frac{d\alpha}{d\eta} = 0 $

Thus, we have shown that $\frac{d\alpha}{d\eta} = 0$, which implies that $ S_1 = 0 $ and $ S_2 = 0 $. \( S_2 = 0 \)._2 = 0 $.





y defined as 0

# 2. Code Section

In [1]:
options(pkgType = "binary", repos = "https://cran-archive.r-project.org/")

In [2]:
install.packages("hdm")
install.packages("parallel")
install.packages("ggplot2")
install.packages("foreach")
install.packages("doParallel")
install.packages("dplyr")
install.packages("broom")

library(hdm)
library(parallel)
library(ggplot2)
library(foreach)
library(doParallel)
library(dplyr)
library(broom)

package 'hdm' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\user\AppData\Local\Temp\Rtmp4GjMHG\downloaded_packages


"package 'parallel' is a base package, and should not be updated"

package 'ggplot2' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\user\AppData\Local\Temp\Rtmp4GjMHG\downloaded_packages
package 'foreach' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\user\AppData\Local\Temp\Rtmp4GjMHG\downloaded_packages
package 'doParallel' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\user\AppData\Local\Temp\Rtmp4GjMHG\downloaded_packages
package 'dplyr' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\user\AppData\Local\Temp\Rtmp4GjMHG\downloaded_packages
package 'broom' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\user\AppData\Local\Temp\Rtmp4GjMHG\downloaded_packages


"package 'doParallel' was built under R version 3.6.3"Loading required package: iterators
"package 'dplyr' was built under R version 3.6.3"
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

"package 'broom' was built under R version 3.6.3"

In [3]:
install.packages("glmnet")
library(glmnet)

package 'glmnet' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\user\AppData\Local\Temp\Rtmp4GjMHG\downloaded_packages


"package 'glmnet' was built under R version 3.6.3"Loading required package: Matrix
"package 'Matrix' was built under R version 3.6.3"Loaded glmnet 4.1-1


## 2.1 Orthogonal Learning

In [4]:
gen_data <- function(n, d, p, delta, base, beta) {
  X <- matrix(rnorm(n * d), nrow = n, ncol = d)
  D <- rbinom(n, 1, p)
  y <- 10 * D + beta * X[, 1] + rnorm(n)
  return(list(y = y, D = D, X = X))
}

In [5]:
# Two Means Estimator
twomeans <- function(y, D) {
  hat0 <- mean(y[D == 0])
  hat1 <- mean(y[D == 1])
  V0 <- var(y[D == 0]) / mean(1 - D)
  V1 <- var(y[D == 1]) / mean(D)
  hat <- hat1 - hat0
  stderr <- sqrt((V0 + V1) / length(y))
  return(list(hat = hat, stderr = stderr))
}

In [6]:
# Partialling out Estimator
partialling_out <- function(y, D, W) {
  yfit <- lm(y ~ W)
  yhat <- predict(yfit, as.data.frame(W))
  yres <- y - as.numeric(yhat)
  Dfit <- lm(D ~ W)
  Dhat <- predict(Dfit, as.data.frame(W))
  Dres <- D - as.numeric(Dhat)
  hat <- mean(yres * Dres) / mean(Dres^2)
  epsilon <- yres - hat * Dres
  V <- mean(epsilon^2 * Dres^2) / mean(Dres^2)^2
  stderr <- sqrt(V / length(y))
  return(list(hat = hat, stderr = stderr))
}

In [7]:
# Double Lasso Estimator

double_lasso <- function(y, D, W) {
  yfit.rlasso <- rlasso(W, y, post = FALSE)
  yhat.rlasso <- predict(yfit.rlasso, as.data.frame(W))
  yres <- y - as.numeric(yhat.rlasso)
  Dfit.rlasso <- rlasso(W, D, post = FALSE)
  Dhat.rlasso <- predict(Dfit.rlasso, as.data.frame(W))
  Dres <- D - as.numeric(Dhat.rlasso)
  hat <- mean(yres * Dres) / mean(Dres^2)
  epsilon <- yres - hat * Dres
  V <- mean(epsilon^2 * Dres^2) / mean(Dres^2)^2
  stderr <- sqrt(V / length(y))
  return(list(hat = hat, stderr = stderr))
}

In [8]:
# Parallel computing

run_simulation <- function(B, n, d, delta, base, beta) {
  n_experiments <- B
  cov <- numeric(n_experiments)
  hats <- numeric(n_experiments)
  stderrs <- numeric(n_experiments)
  
  cl <- makeCluster(detectCores() - 1)
  registerDoParallel(cl)
  
  clusterExport(cl, c("gen_data", "double_lasso", "n", "d", "delta", "base", "beta"))
  
  results <- foreach(i = 1:n_experiments, .combine = rbind, .packages = c("hdm", "stats")) %dopar% {
    data <- gen_data(n, d, 0.2, delta, base, beta)
    y <- data$y
    D <- data$D
    X <- data$X
    results <- double_lasso(y, D, cbind(D * X, X))
    hat <- results$hat
    stderr <- results$stderr
    ci_lower <- hat - 1.96 * stderr
    ci_upper <- hat + 1.96 * stderr
    coverage <- (ci_lower <= delta) & (delta <= ci_upper)
    return(c(hat, stderr, coverage))
  }
  
  stopCluster(cl)
  
  hats <- results[, 1]
  stderrs <- results[, 2]
  cov <- results[, 3]
  
  coverage_rate <- mean(cov)
  
  return(list(coverage_rate = coverage_rate, hats = hats, stderrs = stderrs))
}

In [9]:
# Simulation for different B values

B_values <- c(100, 1000, 10000)
n <- 100
d <- 100
delta <- 10
base <- 0.3
beta <- 0.5

In [22]:
cl <- makeCluster(detectCores() - 1)
registerDoParallel(cl)

clusterExport(cl, c("run_simulation", "n", "d", "delta", "base", "beta"))  
results_list <- parLapply(cl, B_values, function(B) {
  system.time(run_simulation(B, n, d, delta, base, beta))
})

ERROR: Error in checkForRemoteErrors(val): 3 nodes produced errors; first error: no se pudo encontrar la función "makeCluster"


In [18]:
par(mfrow = c(3, 1))
for (i in 1:length(B_values)) {
  hist(results_list[[i]]$hats, 
       main = paste("Distribution of Estimates for B =", B_values[i]),
       xlab = "Estimate", col = "skyblue")
}

ERROR: Error in hist(results_list[[i]]$hats, main = paste("Distribution of Estimates for B =", : objeto 'results_list' no encontrado


## Answer: 
Parallel computing can lead to lower running times since it divides the workload into smaller chunks that can be processed independently. Each chunk is assigned to a different processing unit, enabling parallel execution. This division of tasks reduces the overall time required to complete the computation compared to processing the entire workload on a single processor.

## 2.2 Double Lasso - using school data

In [2]:
data <- read.csv("bruhn2016.csv")
data <- na.omit(data)
head(data, 10)

Unnamed: 0,outcome.test.score,treatment,school,is.female,mother.attended.secondary.school,father.attened.secondary.school,failed.at.least.one.school.year,family.receives.cash.transfer,has.computer.with.internet.at.home,is.unemployed,has.some.form.of.income,saves.money.for.future.purchases,intention.to.save.index,makes.list.of.expenses.every.month,negotiates.prices.or.payment.methods,financial.autonomy.index
3,56.67166,1,35002914,1,1,1,0,0,0,1,0,0,48,0,1,56
4,29.07938,0,35908915,1,0,0,0,0,0,0,0,0,42,0,0,27
5,49.56353,1,33047324,1,0,0,0,0,1,0,1,0,50,0,1,31
6,43.09943,0,53012542,1,0,0,0,1,1,0,1,0,93,0,1,77
7,71.3296,0,53006984,1,1,0,0,0,1,1,1,0,52,0,1,43
8,80.06344,0,33049475,1,1,1,0,0,0,0,1,0,50,0,0,31
9,88.75163,1,35925639,1,1,0,0,0,0,0,0,0,59,0,1,50
10,61.43614,1,35008758,1,0,0,0,0,1,1,1,0,44,0,1,14
12,66.01345,0,33024537,1,0,0,0,0,1,1,1,1,60,0,1,56
14,64.26411,1,53011520,1,0,0,1,0,0,0,0,0,47,0,1,46


In [3]:
names(data) <- gsub("\\.", "_", names(data))

x <- data[, !(names(data) %in% c("outcome_test_score"))]
y <- data$outcome_test_score

In [8]:
# Fit the OLS model
ols_model <- lm(y ~ ., data = data.frame(y, x))

# Get the main coefficient and its standard error
ols_coef <- coef(summary(ols_model))
treatment_ols_coef <- ols_coef["treatment", "Estimate"]
std_error <- ols_coef["treatment", "Std. Error"]
treatment_ols_ci_lower <- treatment_ols_coef - 1.96 * ols_coef["treatment", "Std. Error"]
treatment_ols_ci_upper <- treatment_ols_coef + 1.96 * ols_coef["treatment", "Std. Error"]

summary(ols_model)

ols_results <- list(coef = ols_coef, std_error = std_error, lower_ci = treatment_ols_ci_lower, upper_ci = treatment_ols_ci_upper)


Call:
lm(formula = y ~ ., data = data.frame(y, x))

Residuals:
    Min      1Q  Median      3Q     Max 
-41.331  -9.411   0.421   9.595  45.906 

Coefficients:
                                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)                           5.191e+01  7.872e-01  65.938  < 2e-16 ***
treatment                             4.291e+00  2.457e-01  17.459  < 2e-16 ***
school                               -5.430e-08  1.623e-08  -3.345 0.000826 ***
is_female                             2.665e+00  2.541e-01  10.488  < 2e-16 ***
mother_attended_secondary_school      6.849e-01  2.867e-01   2.389 0.016927 *  
father_attened_secondary_school       7.407e-01  2.898e-01   2.556 0.010601 *  
failed_at_least_one_school_year      -5.858e+00  2.827e-01 -20.725  < 2e-16 ***
family_receives_cash_transfer        -1.581e+00  2.761e-01  -5.725 1.06e-08 ***
has_computer_with_internet_at_home   -2.147e-01  2.747e-01  -0.782 0.434381    
is_unemployed                        -2

In [10]:
# Save the results
ols_results <- data.frame(
  Estimator = ols_coef,
  Std_Error = std_error,
  Lower_CI = treatment_ols_ci_lower,
  Upper_CI = treatment_ols_ci_upper
)

# Print the results
print(ols_results)

                                     Estimator.Estimate Estimator.Std..Error
(Intercept)                                5.190746e+01         7.872108e-01
treatment                                  4.290505e+00         2.457496e-01
school                                    -5.429764e-08         1.623340e-08
is_female                                  2.664799e+00         2.540707e-01
mother_attended_secondary_school           6.848575e-01         2.867163e-01
father_attened_secondary_school            7.406970e-01         2.897887e-01
failed_at_least_one_school_year           -5.858304e+00         2.826700e-01
family_receives_cash_transfer             -1.580694e+00         2.760917e-01
has_computer_with_internet_at_home        -2.147292e-01         2.746803e-01
is_unemployed                             -2.148296e+00         2.938031e-01
has_some_form_of_income                    2.235208e+00         2.825041e-01
saves_money_for_future_purchases          -1.104369e-01         3.652471e-01

In [19]:
# Perform cross-validation to find the best lambda
n_folds <- 5
n <- nrow(x)
folds <- sample(1:n_folds, n, replace = TRUE)

best_lambda <- 0
best_cv_score <- Inf

lambda_seq <- seq(0.1, 10, by = 0.1)  # Rango de valores lambda

for (lambda in lambda_seq) {
  cv_scores <- numeric(n_folds)
  for (fold in 1:n_folds) {
    train_idx <- which(folds != fold)
    test_idx <- which(folds == fold)
    
    lasso_model <- cv.glmnet(as.matrix(x[train_idx, ]), y[train_idx], alpha = 1, lambda = lambda)
    
    predictions <- predict(lasso_model, newx = as.matrix(x[test_idx, ]))
    cv_score <- mean((y[test_idx] - predictions)^2)
    cv_scores[fold] <- cv_score
  }
  
  mean_cv_score <- mean(cv_scores)
  if (mean_cv_score < best_cv_score) {
    best_cv_score <- mean_cv_score
    best_lambda <- lambda
  }
}

print(paste("Best Lambda:", best_lambda))

ERROR: Error in cv.glmnet(as.matrix(x[train_idx, ]), y[train_idx], alpha = 1, : Need more than one value of lambda for cv.glmnet


In [20]:
# Fit the LASSO model with the best lambda
lasso_model <- glmnet(as.matrix(x), y, alpha = 1, lambda = best_lambda)

# Get the main coefficient
lasso_coef <- coef(lasso_model)[2]  # Treatment coefficient

# Make predictions to calculate residuals
predictions <- predict(lasso_model, newx = as.matrix(x))

# Calculate residuals
lasso_residuals <- y - predictions
std_lasso <- sd(lasso_residuals) / sqrt(length(y))

# Get the 95% confidence interval
lasso_conf <- quantile(lasso_residuals, c(0.025, 0.975))

# Save the results
lasso_results <- data.frame(
  Method = "LASSO",
  Estimator = lasso_coef,
  Std_Error = std_lasso,
  Lower_CI = lasso_conf[1],
  Upper_CI = lasso_conf[2]
)

# Print the results
print(lasso_results)

     Method Estimator Std_Error  Lower_CI Upper_CI
2.5%  LASSO  4.290498 0.1225869 -26.36989 25.15332


In [25]:
# Using the Double LASSO with theoretical lambda
library(hdm)

dml <- rlassoEffect(as.matrix(x), y, as.matrix(x[, 1]), method = "double selection")
dml_res <- summary(dml)

if (!is.null(dml_res$coef) && !is.null(dml_res$se)) {
  # Extract the main coefficient and its standard error
  dml_coef <- dml_res$coef[2]
  dml_std <- dml_res$se[2]

# Calculate the 95% confidence interval
z <- 1.96  # 95% confidence level
dml_conf <- c(dml_coef - z * dml_std, dml_coef + z * dml_std)

# Save the results
dml_results <- data.frame(
  Method = "Double LASSO Theoretical Lambda",
  Estimator = dml_coef,
  Std_Error = dml_std,
  Lower_CI = dml_conf[1],
  Upper_CI = dml_conf[2]
)

# Print the results
print(dml_results)

ERROR: Error in parse(text = x, srcfile = src): <text>:24:0: unexpected end of input
22:   Upper_CI = dml_conf[2]
23: )
   ^


In [28]:
# Using the Double LASSO with partialling out
dml_partial <- rlassoEffect(as.matrix(x), y, as.matrix(x[, 1]), method = "partialling out")

if (!is.null(dml_partial$coef) && !is.null(dml_partial$se)) {
  # Extract the main coefficient and its standard error
  dml_partial_coef <- dml_partial$coef[2]
  dml_partial_std <- dml_partial$se[2]

# Calculate the 95% confidence interval
dml_partial_conf <- c(dml_partial_coef - z * dml_partial_std, dml_partial_coef + z * dml_partial_std)

# Save the results
dml_partial_results <- data.frame(
  Method = "Double LASSO Partialling Out",
  Estimator = dml_partial_coef,
  Std_Error = dml_partial_std,
  Lower_CI = dml_partial_conf[1],
  Upper_CI = dml_partial_conf[2]
)

# Print the results
print(dml_partial_results)

ERROR: Error in parse(text = x, srcfile = src): <text>:23:0: unexpected end of input
21: # Print the results
22: print(dml_partial_results)
   ^


In [34]:
# Plot the results
methods <- c("OLS", "Cross-Validation", "Theoretical Lambda", "Partialling Out")
coefficients <- c(ols_coef, lasso_coef, dml_coef, dml_partial_coef)
lower_CI <- c(Lower_CI, lasso_conf[1], dml_conf[1], dml_partial_conf[1])
upper_CI <- c(Upper_CI, lasso_conf[2], dml_conf[2], dml_partial_conf[2])

plot(1:length(methods), coefficients, ylim = c(min(lower_CI), max(upper_CI)), ylab = "Coefficient", xlab = "Method", 
     main = "Double LASSO Estimators and Confidence Intervals", xaxt = "n", type = "o")
axis(1, at = 1:length(methods), labels = methods)

segments(1:length(methods), lower_CI, 1:length(methods), upper_CI)

ERROR: Error in eval(expr, envir, enclos): objeto 'Lower_CI' no encontrado
