## WORK GROUP 5

### 1. Bootstraping

In [None]:
library(boot)
library(rpart)
library(randomForest)
library(nnet)
library(gbm)
library(rpart.plot)
library(keras)

In [49]:
## loading the data
Penn <- as.data.frame(read.table("../data/penn_jae.dat", header=T ))
n <- dim(Penn)[1]
p_1 <- dim(Penn)[2]
Penn<- subset(Penn, tg==4 | tg==0)
attach(Penn)

The following objects are masked from Penn (pos = 6):

    abdt, agegt54, agelt35, black, dep, durable, female, hispanic,
    husd, inuidur1, inuidur2, lusd, muld, nondurable, othrace, q1, q2,
    q3, q4, q5, q6, recall, tg




In [None]:
boot.fn <- function(data, index)
  coef(lm(log(inuidur1)~T4+ (female+black+othrace+factor(dep)+q2+q3+q4+q5+q6+agelt35+agegt54+durable+lusd+husd), data = data, subset = index)) 

In [None]:
boot.fn(data, 1:nrow(data))

In [None]:
set.seed(123)
boot.fn(data, sample(nrow(data), nrow(data), replace = T))

In [None]:
lineal = boot(data, boot.fn, 1000)
lineal

In [None]:
#t4
plot(lineal, index=2) 

In [None]:
#female
plot(lineal, index=3) 

In [None]:
summary(lm(log(inuidur1)~T4+ (female+black+othrace+factor(dep)+q2+q3+q4+q5+q6+agelt35+agegt54+durable+lusd+husd), data = data))$coef[2:4,]

### 2. Comparative models

In [1]:
library(lmtest)
library(sandwich)

"package 'lmtest' was built under R version 4.0.5"
Loading required package: zoo

"package 'zoo' was built under R version 4.0.5"

Attaching package: 'zoo'


The following objects are masked from 'package:base':

    as.Date, as.Date.numeric


"package 'sandwich' was built under R version 4.0.5"


In [2]:
load("../data/cps2012.RData")
dim(data)

#### OLS

In [3]:
set.seed(1234)
training <- sample(nrow(data), nrow(data)*(3/4), replace=FALSE)

data_train <- data[training,]
data_test <- data[-training,]

In [4]:
dim(data_test)
dim(data_train)

In [5]:
X_basic <-  "female + female:(widowed+divorced+separated+nevermarried+hsd08+hsd911+hsg+cg+ad+mw+so+we+exp1+exp2+exp3)"
X_flex <- "female + female:(widowed+divorced+separated+nevermarried+hsd08+hsd911+hsg+cg+ad+mw+so+we+exp1+exp2+exp3)+(widowed+divorced+separated+nevermarried+hsd08+hsd911+hsg+cg+ad+mw+so+we+exp1+exp2+exp3)^2"
formula_basic <- as.formula(paste("lnw", "~", X_basic))
formula_flex <- as.formula(paste("lnw", "~", X_flex))

print(class(formula_basic))

[1] "formula"


In [7]:
dim(model_X_basic_train)

In [13]:
dim(model_X_flex_train)

In [9]:
dim(model_X_basic_test)

In [12]:
dim(model_X_flex_test)

In [11]:
model_X_basic_train <- model.matrix(formula_basic,data_train)
model_X_basic_test <- model.matrix(formula_basic,data_test)
p_basic <- dim(model_X_basic_train)[2]

model_X_flex_train <- model.matrix(formula_flex,data_train)
model_X_flex_test <- model.matrix(formula_flex,data_test)
p_flex <- dim(model_X_flex_train)[2]

In [14]:
Y_train <- data_train$lnw
Y_test <- data_test$lnw

In [15]:
## OLS basic 
fit.lm.basic <- lm(formula_basic, data_train)
fit.lm.basic


Call:
lm(formula = formula_basic, data = data_train)

Coefficients:
        (Intercept)               female       female:widowed  
            2.91033             -0.71193             -0.10023  
    female:divorced     female:separated  female:nevermarried  
           -0.01400             -0.08382             -0.02270  
       female:hsd08        female:hsd911           female:hsg  
           -0.74701             -0.51944             -0.17263  
          female:cg            female:ad            female:mw  
            0.36295              0.60386             -0.10492  
          female:so            female:we          female:exp1  
           -0.05738             -0.01726              0.04753  
        female:exp2          female:exp3  
           -0.20025              0.02943  


In [16]:
# Compute the Out-Of-Sample Performance
yhat.lm.basic <- predict(fit.lm.basic, newdata=data_test)
cat("The mean squared error (MSE) using the basic model is equal to" , mean((Y_test-yhat.lm.basic)^2)) # MSE OLS (basic model)   

The mean squared error (MSE) using the basic model is equal to 0.3949444

In [17]:
MSE.lm.basic <- summary(lm((Y_test-yhat.lm.basic)^2~1))$coef[1:2]
MSE.lm.basic

In [18]:
R2.lm.basic <- 1-MSE.lm.basic[1]/var(Y_test)
cat("The R^2 using the basic model is equal to",R2.lm.basic) # MSE OLS (basic model) 

The R^2 using the basic model is equal to 0.1102755

In [None]:
## OLS flexible model

In [19]:
# ols (flexible model)
fit.lm.flex <- lm(formula_flex, data_train) 

# Compute the Out-Of-Sample Performance
options(warn=-1)
yhat.lm.flex <- predict(fit.lm.flex, newdata=data_test)
MSE.lm.flex <- summary(lm((Y_test-yhat.lm.flex)^2~1))$coef[1:2]

R2.lm.flex <- 1-MSE.lm.flex[1]/var(Y_test)
cat("The R^2 using the flexible model is equal to",R2.lm.flex) # MSE OLS (flexible model) 

The R^2 using the flexible model is equal to 0.2287174

#### LASSO (HDM) 

In [70]:
install.packages("hdm")

Installing package into 'D:/Kenia/Documents/R/win-library/4.0'
(as 'lib' is unspecified)



package 'hdm' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\Kenia\AppData\Local\Temp\Rtmp2FdbpB\downloaded_packages


In [20]:
library(hdm) 
fit.rlasso  <- rlasso(formula_basic, data_train, post=FALSE)
fit.rlasso.post <- rlasso(formula_basic, data_train, post=TRUE)

yhat.rlasso   <- predict(fit.rlasso, newdata=data_test)
yhat.rlasso.post   <- predict(fit.rlasso.post, newdata=data_test)

MSE.lasso <- summary(lm((Y_test-yhat.rlasso)^2~1))$coef[1:2]
MSE.lasso.post <- summary(lm((Y_test-yhat.rlasso.post)^2~1))$coef[1:2]

R2.lasso <- 1-MSE.lasso[1]/var(Y_test)
R2.lasso.post <- 1-MSE.lasso.post[1]/var(Y_test)
cat("The R^2 using the basic model is equal to",R2.lasso,"for lasso and",R2.lasso.post,"for post-lasso") # R^2 lasso/post-lasso (basic model) 

The R^2 using the basic model is equal to 0.1062557 for lasso and 0.1042477 for post-lasso

In [21]:
fit.rlasso.flex  <- rlasso(formula_flex, data_train, post=FALSE)
fit.rlasso.post.flex <- rlasso(formula_flex, data_train, post=TRUE)
yhat.rlasso.flex   <- predict(fit.rlasso.flex, newdata=data_test)
yhat.rlasso.post.flex   <- predict(fit.rlasso.post.flex, newdata=data_test)

MSE.lasso.flex <- summary(lm((Y_test-yhat.rlasso.flex)^2~1))$coef[1:2]
MSE.lasso.post.flex <- summary(lm((Y_test-yhat.rlasso.post.flex)^2~1))$coef[1:2]

R2.lasso.flex <- 1-MSE.lasso.flex[1]/var(Y_test)
R2.lasso.post.flex <- 1-MSE.lasso.post.flex[1]/var(Y_test)
cat("The R^2 using the flexible model is equal to",R2.lasso.flex,"for lasso and",R2.lasso.post.flex,"for post-lasso") # R^2 lasso/post-lasso (flexible model) 

The R^2 using the flexible model is equal to 0.2271759 for lasso and 0.22455 for post-lasso

#### LASSO/ELASTIC NET/RIDGE LASSO CV

In [22]:
dim(model_X_basic_test)

In [23]:
library(glmnet)
fit.lasso.cv   <- cv.glmnet(model_X_basic_train, Y_train, family="gaussian", alpha=1)
fit.ridge   <- cv.glmnet(model_X_basic_train, Y_train, family="gaussian", alpha=0)
fit.elnet   <- cv.glmnet(model_X_basic_train, Y_train, family="gaussian", alpha=.5)

yhat.lasso.cv    <- predict(fit.lasso.cv, newx = model_X_basic_test)
yhat.ridge   <- predict(fit.ridge, newx = model_X_basic_test)
yhat.elnet   <- predict(fit.elnet, newx = model_X_basic_test)

MSE.lasso.cv <- summary(lm((Y_test-yhat.lasso.cv)^2~1))$coef[1:2]
MSE.ridge <- summary(lm((Y_test-yhat.ridge)^2~1))$coef[1:2]
MSE.elnet <- summary(lm((Y_test-yhat.elnet)^2~1))$coef[1:2]

R2.lasso.cv <- 1-MSE.lasso.cv[1]/var(Y_test)
R2.ridge <- 1-MSE.ridge[1]/var(Y_test)
R2.elnet <- 1-MSE.elnet[1]/var(Y_test)

# R^2 using cross-validation (basic model) 
cat("R^2 using cross-validation for lasso, ridge and elastic net in the basic model:",R2.lasso.cv,R2.ridge,R2.elnet)

Loading required package: Matrix

Loaded glmnet 4.1-3



R^2 using cross-validation for lasso, ridge and elastic net in the basic model: 0.08193926 0.09136083 0.08870976

In [24]:
dim(model_X_flex_test)

In [25]:
fit.lasso.cv.flex   <- cv.glmnet(model_X_flex_train, Y_train, family="gaussian", alpha=1)
fit.ridge.flex   <- cv.glmnet(model_X_flex_train, Y_train, family="gaussian", alpha=0)
fit.elnet.flex   <- cv.glmnet(model_X_flex_train, Y_train, family="gaussian", alpha=.5)

yhat.lasso.cv.flex    <- predict(fit.lasso.cv.flex , newx = model_X_flex_test)
yhat.ridge.flex    <- predict(fit.ridge.flex , newx = model_X_flex_test)
yhat.elnet.flex    <- predict(fit.elnet.flex , newx = model_X_flex_test)

MSE.lasso.cv.flex  <- summary(lm((Y_test-yhat.lasso.cv.flex )^2~1))$coef[1:2]
MSE.ridge.flex  <- summary(lm((Y_test-yhat.ridge.flex )^2~1))$coef[1:2]
MSE.elnet.flex  <- summary(lm((Y_test-yhat.elnet.flex )^2~1))$coef[1:2]

R2.lasso.cv.flex  <- 1-MSE.lasso.cv.flex [1]/var(Y_test)
R2.ridge.flex  <- 1-MSE.ridge.flex [1]/var(Y_test)
R2.elnet.flex  <- 1-MSE.elnet.flex [1]/var(Y_test)

# R^2 using cross-validation (flexible model) 
cat("R^2 using cross-validation for lasso, ridge and elastic net in the flexible model:",R2.lasso.cv.flex,R2.ridge.flex,R2.elnet.flex)

R^2 using cross-validation for lasso, ridge and elastic net in the flexible model: 0.2134812 0.2106494 0.2095135

In [26]:
MSE.lasso.cv.flex[1]
MSE.lasso.post.flex[1]

#### Non Linear Model 

In [42]:
install.packages("gbm")
install.packages("keras")

Installing package into 'D:/Kenia/Documents/R/win-library/4.0'
(as 'lib' is unspecified)

Installing package into 'D:/Kenia/Documents/R/win-library/4.0'
(as 'lib' is unspecified)



package 'gbm' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\Kenia\AppData\Local\Temp\RtmpIruQP5\downloaded_packages


Installing package into 'D:/Kenia/Documents/R/win-library/4.0'
(as 'lib' is unspecified)

also installing the dependencies 'RcppTOML', 'here', 'config', 'tfautograph', 'reticulate', 'tensorflow', 'tfruns', 'zeallot'





  There are binary versions available but the source versions are later:
           binary source needs_compilation
reticulate   1.24   1.25              TRUE
tensorflow  2.8.0  2.9.0             FALSE
keras       2.8.0  2.9.0             FALSE

  Binaries will be installed
package 'RcppTOML' successfully unpacked and MD5 sums checked
package 'here' successfully unpacked and MD5 sums checked
package 'config' successfully unpacked and MD5 sums checked
package 'tfautograph' successfully unpacked and MD5 sums checked
package 'reticulate' successfully unpacked and MD5 sums checked
package 'tfruns' successfully unpacked and MD5 sums checked
package 'zeallot' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\Kenia\AppData\Local\Temp\RtmpIruQP5\downloaded_packages


installing the source packages 'tensorflow', 'keras'




In [52]:
library(rpart)
library(nnet)
library(gbm)
library(rpart.plot)
library(keras)

Loaded gbm 2.1.8



ERROR: Error in library(rpart.plot): there is no package called 'rpart.plot'


In [27]:
library(xtable)
table<- matrix(0, 15, 3)
table[1,1:2]   <- MSE.lm.basic
table[2,1:2]   <- MSE.lm.flex
table[3,1:2]   <- MSE.lasso
table[4,1:2]   <- MSE.lasso.post
table[5,1:2]   <- MSE.lasso.flex
table[6,1:2]   <- MSE.lasso.post.flex
table[7,1:2]   <- MSE.lasso.cv
table[8,1:2]   <- MSE.ridge
table[9,1:2]   <- MSE.elnet
table[10,1:2]   <- MSE.lasso.cv.flex
table[11,1:2]  <- MSE.ridge.flex
table[12,1:2]  <- MSE.elnet.flex
table[13,1:2]  <- MSE.rf
table[14,1:2]  <- MSE.boost
table[15,1:2]  <- MSE.pt



table[1,3]   <- R2.lm.basic
table[2,3]   <- R2.lm.flex
table[3,3]   <- R2.lasso
table[4,3]   <- R2.lasso.post
table[5,3]   <- R2.lasso.flex
table[6,3]   <- R2.lasso.post.flex
table[7,3]   <- R2.lasso.cv
table[8,3]   <- R2.ridge
table[9,3]   <- R2.elnet
table[10,3]   <- R2.lasso.cv.flex
table[11,3]  <- R2.ridge.flex
table[12,3]  <- R2.elnet.flex
table[13,3]  <- R2.rf
table[14,3]  <- R2.boost
table[15,3]  <- R2.pt




colnames(table)<- c("MSE", "S.E. for MSE", "R-squared")
rownames(table)<- c("Least Squares (basic)","Least Squares (flexible)", "Lasso", "Post-Lasso","Lasso (flexible)","Post-Lasso (flexible)", 
                    "Cross-Validated lasso", "Cross-Validated ridge","Cross-Validated elnet","Cross-Validated lasso (flexible)","Cross-Validated ridge (flexible)","Cross-Validated elnet (flexible)",  
                    "Random Forest","Boosted Trees", "Pruned Tree")
tab <- xtable(table, digits =3)
print(tab,type="latex") # set type="latex" for printing table in LaTeX
tab

ERROR: Error in eval(expr, envir, enclos): objeto 'MSE.rf' no encontrado
