In [109]:
library(devtools)
install_github("gabrielrvsc/HDeconometrics")
library(HDeconometrics)

Skipping install of 'HDeconometrics' from a github remote, the SHA1 (38afe6ce) has not changed since last install.
  Use `force = TRUE` to force installation



In [133]:
library(glmnet)
options(warn=-1)
install.packages("mvtnorm")
library(mvtnorm)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [136]:
generate_data = function(n, p, rho, signal) {
  SIGMA = c()
  for (i in 1:p){
    for (j in 1:p){
      SIGMA = c(SIGMA, rho^abs(i-j))
    }
  }
  SIGMA = matrix(SIGMA, nrow = p)
  X = rmvnorm(n, mean = rep(0, p), sigma = SIGMA)
  J = 1:p
  if (signal == "sparse"){
    beta = 2 / sqrt(n) * as.numeric(J <= sqrt(p))
  }
  if (signal == "dense"){
    beta = 5 / (J * sqrt(n))
  }
  R2 = 0.8
  sigma2 = ((1-R2)/R2 * t(beta) %*% SIGMA %*% beta)
  epsilon = rnorm(n, 0, sqrt(sigma2))
  y = X %*% beta + epsilon
  return(cbind(data.frame(y=y), data.frame(X)))
}

In [119]:
n = 100
ps = c(10, 25, 50)
rhos = c(0, 0.25, 0.5)
lambdas <- 10^seq(-1, -3, by = -.1)

In [120]:
aic_bic = function(trainX, trainY, testX, testY, cc){
  ridge = ic.glmnet(trainX, trainY, crit = cc, alpha=0)
  first.step.coef = coef(ridge)[-1]
  penalty.factor = abs(first.step.coef+1/sqrt(nrow(x)))^(-1)
  adaridge = ic.glmnet(trainX, trainY, crit = "aic", penalty.factor=penalty.factor)
  pred = predict(adaridge,newdata=testX)
  MSE = mean((testY-pred)^2)
  return(MSE)          
}

In [114]:
loocv = function(trainX, trainY, testX, testY){
  cv_fit = cv.glmnet(trainX, as.vector(trainY), nfolds = 100, alpha = 0, lambda = lambdas)
  opt_lambda <- cv_fit$lambda.min
  fit <- cv_fit$glmnet.fit
  pred <- predict(fit, s = opt_lambda, newx = testX)
  MSE = mean((testY-pred)^2)
  return(MSE)
}

In [None]:
ridge.res.df = data.frame(type = "", p = "", rho = "", criterion = "", MSE = "")

In [None]:
for (tt in list("sparse", "dense")) {
  for (pp in ps){
    for (rr in rhos){
      aic.res.df = data.frame(type = "", p = "", rho = "", MSE = "")
      bic.res.df = data.frame(type = "", p = "", rho = "", MSE = "")
      loocv.res.df = data.frame(type = "", p = "", rho = "", MSE = "")
      
      for (i in 1:1000) {
        data = generate_data(120, pp, rr, tt)
        x = data[, -1]
        y = data[, 1]
        trainX = x[1:100, ]
        trainX = scale(trainX, center = TRUE, scale = TRUE)
        testX = x[101:120, ]
        testX = scale(testX, center = TRUE, scale = TRUE)

        y = read.csv(file = txtY, row.names=1)
        trainY = y[1:100, ]
        testY = y[101:120, ]
        trainY = scale(trainY, center = TRUE, scale = TRUE)
        testY = scale(testY, center = TRUE, scale = TRUE)

        MSE1 = aic_bic(trainX, trainY, testX, testY, "aic")
        aic.res.df[nrow(ridge.res.df)+1, ] <- c(tt, pp, rr, MSE1)
        
        MSE2 = aic_bic(trainX, trainY, testX, testY, "bic")
        bic.res.df[nrow(ridge.res.df)+1, ] <- c(tt, pp, rr, MSE2)
        
        MSE3 = loocv(trainX, trainY, testX, testY)
        loocv.res.df[nrow(ridge.res.df)+1, ] <- c(tt, pp, rr, MSE2)
      }   

      ridge.res.df[nrow(ridge.res.df)+1, ] <- c(tt, pp, rr, "aic", mean(as.numeric(aic.res.df$MSE)))
      ridge.res.df[nrow(ridge.res.df)+1, ] <- c(tt, pp, rr, "bic", mean(as.numeric(bic.res.df$MSE)))
      ridge.res.df[nrow(ridge.res.df)+1, ] <- c(tt, pp, rr, "loocv",mean(as.numeric(loocv.res.df$MSE)))
    }
  }
}




In [117]:
print(ridge.res.df)

     type  p  rho criterion                MSE
1                                             
2  sparse 10    0       aic   0.32301370500182
3  sparse 10    0       bic   0.32301370500182
4  sparse 10    0     loocv   0.32301370500182
5  sparse 10 0.25       aic  0.405402771474179
6  sparse 10 0.25       bic  0.405402771474179
7  sparse 10 0.25     loocv  0.405402771474179
8  sparse 10  0.5       aic  0.260524716411254
9  sparse 10  0.5       bic  0.260524716411254
10 sparse 10  0.5     loocv  0.260524716411254
11 sparse 25    0       aic  0.356156347540512
12 sparse 25    0       bic  0.358046184318616
13 sparse 25    0     loocv  0.358046184318616
14 sparse 25 0.25       aic  0.308197842107795
15 sparse 25 0.25       bic  0.308197842107795
16 sparse 25 0.25     loocv  0.308197842107795
17 sparse 25  0.5       aic  0.288351729634545
18 sparse 25  0.5       bic  0.288351729634545
19 sparse 25  0.5     loocv  0.288351729634545
20 sparse 50    0       aic  0.570767637753515
21 sparse 50 