# "Hello World" Example in R

In [7]:
library(glmnet)
library(rjson)

## Researcher Loads in Data

In [8]:
n <- 100
p <- 50
s <- 5
sigma <- 1.
signal <- 10.

In [9]:
X = read.table("X.csv", sep=',', header=FALSE)
y = read.table('y.csv')
length(y)
dim(X)

## Selection Algorithm (function)

In [40]:
selection_algorithm <- function(X, y) {
    n <- nrow(X)
    p <- ncol(X)
    
    min_success <- 6
    ntries <- 10
    
    make_lambda_grid <- function(X, y) {
        # Return a vector of lambda values where the corresponding lasso model
        # satisfies the following constraint:
        #   number of selected variables < sqrt(0.8 * p)
        p <- ncol(X)
        model <- cv.glmnet(X, y, alpha=1)$glmnet.fit
        lambdas <- model$lambda
        nselected <- model$df  # number of selected vars for each lambda
        return(lambdas[nselected < sqrt(0.8 * p)])
    }

    lambda_grid <- make_lambda_grid(X, y)
    success <- matrix(0, nrow=p, ncol=length(lambda_grid))
    
    for(i in 1:ntries) {
        subsample_indexes <- sample(1:n, floor(n/2), replace=FALSE)
        Xsub <- X[subsample_indexes,]
        noisy_y <- y[subsample_indexes] + rnorm(floor(n/2))
        model <- cv.glmnet(Xsub, noisy_y, alpha=1)
        
        lambdas <- model$lambda
        coefs <- coef(model, lambda_grid)
        success <- success + (coefs[-1,] != 0)
    }
    
    selection_fn <- function(x) {
        return(sum(x > min_success) > 0)
    }
    selected <- apply(X=success, MARGIN=2, FUN=selection_fn)
    vars <- which(selected != 0)  # indexes of selected lambdas
    return(as.numeric(vars))
}

selected_vars <- selection_algorithm(as.matrix(X), y[,1])
selected_vars

In [41]:
print(selected_vars)

 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 
 2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 


In [42]:
compute_sufficient_statistics <- function(X, y, fixed_selection) {
    fixed_selection <- fromJSON(fixed_selection)
    return(list(t(X) %*% y, sum(y^2)))
}

compute_estimators <- function(suff_stat, fixed_sel, set_sel) {
    fixed_selection <- fromJSON(fixed_sel)
    set_sel <- fromJSON(set_sel)
    return(ginv(t(X) %*% X %*% (t(X) %*% y))[set_sel['selected_vars']])
}

resample_data <- function(X, y, fixed_sel) {
    fixed_sel <- fromJSON(fixed_sel)
    n <- nrow(X)
    p <- ncol(X)
    resids <- y - X %*% ginv(t(X) %*% X %*% (t(X) %*% y))
    fitted <- y - resids
    
    resampled <- sample(1:n, n, replace=TRUE)
    y_rilde <- fitted + resids[resampled]
    
    return(list("X" = X, "y" = y_tilde))
}

In [43]:
library(IRdisplay)

In [44]:
IRdisplay::display_json('[2,3,4]')

In [45]:
IRdisplay:::display_raw('application/mytype', FALSE, toJSON(selected_vars), NULL)