# "Hello World" Example in R

In [1]:
library(glmnet)
library(rjson)
library(tidyverse)

Loading required package: Matrix
Loading required package: foreach
Loaded glmnet 2.0-16

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.2.1 ──
[32m✔[39m [34mggplot2[39m 3.0.0     [32m✔[39m [34mpurrr  [39m 0.2.5
[32m✔[39m [34mtibble [39m 1.4.2     [32m✔[39m [34mdplyr  [39m 0.7.6
[32m✔[39m [34mtidyr  [39m 0.8.1     [32m✔[39m [34mstringr[39m 1.3.1
[32m✔[39m [34mreadr  [39m 1.1.1     [32m✔[39m [34mforcats[39m 0.3.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mpurrr[39m::[32maccumulate()[39m masks [34mforeach[39m::accumulate()
[31m✖[39m [34mtidyr[39m::[32mexpand()[39m     masks [34mMatrix[39m::expand()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m     masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m        masks [34mstats[39m::lag()
[31m✖[39m [34mpurrr[39m::[32mwhen()[39m       masks [34mforeach[39m::when()


## Researcher Loads in Data

In [2]:
n <- 100
p <- 50
s <- 5
sigma <- 1.
signal <- 10.

In [4]:
X = read.table("../X.csv", sep=',', header=FALSE)
y = read.table('../y.csv')
length(y)
dim(X)

## Selection Algorithm (function)

In [5]:
selection_algorithm <- function(X, y) {
    n <- nrow(X)
    p <- ncol(X)
    
    min_success <- 6
    ntries <- 10
    
    make_lambda_grid <- function(X, y) {
        # Return a vector of lambda values where the corresponding lasso model
        # satisfies the following constraint:
        #   number of selected variables < sqrt(0.8 * p)
        p <- ncol(X)
        model <- cv.glmnet(X, y, alpha=1)$glmnet.fit
        lambdas <- model$lambda
        nselected <- model$df  # number of selected vars for each lambda
        return(lambdas[nselected < sqrt(0.8 * p)])
    }

    lambda_grid <- make_lambda_grid(X, y)
    success <- matrix(0, nrow=p, ncol=length(lambda_grid))
    
    for(i in 1:ntries) {
        subsample_indexes <- sample(1:n, floor(n/2), replace=FALSE)
        Xsub <- X[subsample_indexes,]
        noisy_y <- y[subsample_indexes] + rnorm(floor(n/2))
        model <- cv.glmnet(Xsub, noisy_y, alpha=1)
        
        lambdas <- model$lambda
        coefs <- coef(model, lambda_grid)
        success <- success + (coefs[-1,] != 0)
    }
    
    selection_fn <- function(x) {
        return(sum(x > min_success) > 0)
    }
    selected <- apply(X=success, MARGIN=2, FUN=selection_fn)
    vars <- which(selected != 0)  # indexes of selected lambdas
    return(as.numeric(vars))
}

selected_vars <- selection_algorithm(as.matrix(X), y[,1])
selected_vars <- data.frame(selection = selected_vars)
selected_vars

selection
<dbl>
1
2
3
4
5
6
7
8
9
10


In [97]:
# original
#compute_sufficient_statistics <- function(X, y, fixed_selection) {
#    fixed_selection <- fromJSON(fixed_selection)
#    return(list(t(X) %*% y, sum(y^2)))
#}

# fix attempt
compute_sufficient_statistics <- function(data, fixed_selection) {
    fixed_selection <- fromJSON(fixed_selection)
    X = as.matrix(data[["X"]])
    y = as.matrix(data[["y"]])
    suff_stat_1 <- t(X) %*% y
    suff_stat_2 <- sum(y^2)
    combined <- c(suff_stat_1, suff_stat_2)
    combined  <- data.frame(combined)
    return(combined)
}

compute_estimators <- function(suff_stat, fixed_sel, set_sel) {
    fixed_selection <- fromJSON(fixed_sel)
    set_sel <- fromJSON(set_sel)
    return(ginv(t(X) %*% X %*% (t(X) %*% y))[set_sel['selected_vars']])
}

resample_data <- function(X, y, fixed_sel) {
    fixed_sel <- fromJSON(fixed_sel)
    n <- nrow(X)
    p <- ncol(X)
    resids <- y - X %*% ginv(t(X) %*% X %*% (t(X) %*% y))
    fitted <- y - resids
    
    resampled <- sample(1:n, n, replace=TRUE)
    y_rilde <- fitted + resids[resampled]
    
    return(list("X" = X, "y" = y_tilde))
}

In [98]:
#as.matrix(t(X)) %*% as.matrix(y)

data <- list("X"=X, "y"=y)
suff_stat <- compute_sufficient_statistics(data, "{}")
is.data.frame(suff_stat)

In [None]:
library(IRdisplay)

In [None]:
IRdisplay::display_json('[2,3,4]')

In [None]:
IRdisplay:::display_raw('application/mytype', FALSE, toJSON(selected_vars), NULL)

In [None]:
IRdisplay:::display_raw('application/mytype', TRUE, as.raw(c(3,4,5,7,9)), NULL)

In [None]:
?raw