# Code for generating Figure 1 and the c-value histograms

In [1]:
# Read in raw data and get 
alpha_grid <- seq(0.01, 0.99, by = 0.01)
N <- 50
p_theta <- c(0.00, 0.91, 1.82, 2.73, 3.64, 4.55, 5.46, 6.37, 7.27, 8.18, 9.09,
             10.00, 10.91, 11.82, 12.73, 13.64, 14.55, 15.46, 16.37, 17.28)
p_theta_scaled <- p_theta / sqrt(50)
p_theta_char <- c("0.00", "0.91","1.82","2.73", "3.64", "4.55", "5.46", "6.37", "7.27", "8.18", "9.09",
                  "10.00", "10.91", "11.82", "12.73", "13.64", "14.55", "15.46", "16.37", "17.28")


raw_data_list <- paste0("../../results/section3_simulation_study/tau=1.00_N=050_P_theta_norm=",
                        p_theta_char, "_frequentist_analysis.tsv")

theta_hat_risk <- rep(NA, times = length(raw_data_list))
names(theta_hat_risk) <- paste0("p_theta_norm_", p_theta_char)
theta_hat_se <- theta_hat_risk

theta_star_risk <- theta_hat_risk
theta_star_se <- theta_hat_risk

# Prob. of picking theta_star as a function of alpha
# rows index alpha values and column index theta_star_prob
theta_star_prob <- matrix(nrow = length(alpha_grid), ncol = length(raw_data_list),
                          dimnames = list(c(), paste0("p_theta_norm_", p_theta_char)))
theta_tilde_risk <- theta_star_prob
theta_tilde_se <- theta_star_prob
calibration_curves <- theta_star_prob


# save wins, bounds, and c-values
wins <- matrix(nrow = 2500, ncol = length(raw_data_list), dimnames = list(c(),
    paste0("p_theta_norm_", p_theta_char)))
c_values <- wins

for(ix in 1:length(raw_data_list)){
  if(!file.exists(raw_data_list[ix])){
    print(paste("Missing", raw_data_list[ix]))
  } else{
    tmp_results <- read.delim(raw_data_list[ix])
    
    theta_hat_risk[ix] <- mean(tmp_results[,"MLE_Err"])
    theta_hat_se[ix] <- sd(tmp_results[,"MLE_Err"])/sqrt(nrow(tmp_results))
    # This is the standard deviation of the average loss from 2500 simulation reps
    # That is, the variability of our estimate of risk if we repeatedly re-ran the 2500 simulation reps
    
    theta_star_risk[ix] <- mean(tmp_results[,"Bayes_Err"])
    theta_star_se[ix] <- sd(tmp_results[,"Bayes_Err"])/sqrt(nrow(tmp_results))
    
    for(aix in 1:length(alpha_grid)){
      # compute proportion of times we pick alternative (i.e. alpha < c_value) for each value of alpha
      theta_star_prob[aix, ix] <- mean(tmp_results[,"c_value"] >= alpha_grid[aix])
      
      # get loss of theta_tilde
      theta_tilde_loss <- tmp_results[,"MLE_Err"] * (tmp_results[,"c_value"] < alpha_grid[aix]) +
        tmp_results[,"Bayes_Err"] * (tmp_results[,"c_value"] >= alpha_grid[aix])
      theta_tilde_risk[aix,ix] <- mean(theta_tilde_loss)
      theta_tilde_se[aix,ix] <- sd(theta_tilde_loss)/sqrt(nrow(tmp_results))
      
      # calibration: if alpha > by_break, then we know W > b(y,alpha)
      calibration_curves[aix,ix] <- mean(alpha_grid[aix] > tmp_results[,"by_break"])
    }
    
    wins[,ix] <- tmp_results[,"Win"]
    c_values[,ix] <- tmp_results[,"c_value"]
    
  }
}


In [2]:
# Find plot dimensions
width = (8.5 - 2*1.)/3. # 8.5 inches - 2 * 1 inch margins  / 3 panels
height = width # want each panel to be square

res = 600 # DPI

mar = c(2.0,2.0,1,0.5) # margins on (bottom, left, top, right)
tck = -0.02 # tick size
mgp = c(2.9,0.05, 0.0) # axis parameters. distances for (title, tick labels, axis line)
title_scale <- 0.8
label_scale <- 0.85
legend_scale <- 0.7
tick_label_size <- 0.7
title_line <- 0.2 # space between title and top of axis

In [3]:
# Calibration

panel_fn = "../out/figure1_panel_A.png"
png(panel_fn, width = width, height = height, units = "in", res = res, type = "cairo")
par(mar = mar, mgp = mgp, tck=tck)

plot(1, type = "n", xlim = c(0, 1), ylim = c(0,1), xaxs="i", yaxs="i",
     ylab = "", xlab="", cex.axis=tick_label_size)
title(main="Calibration", line=title_line, cex.main=title_scale)
title(xlab=expression(alpha), line=0.6, cex.lab=label_scale)
title(ylab="Coverage", line=0.8, cex.lab=label_scale)

abline(a = 0, b = 1, lty = 2)
lines(alpha_grid, calibration_curves[,5], col = rgb(1,0,0))
lines(alpha_grid, calibration_curves[,10], col = rgb(0,0,1))
lines(alpha_grid, calibration_curves[,15], col = rgb(0,1,0))

legend("bottomright", col = c(rgb(1,0,0), rgb(0,0,1), rgb(0,1,0), 'black'), 
       legend = expression("\u2016"*P[1]^symbol("\136")*theta*"\u2016"/sqrt(N)~"=0.36",
                          "\u2016"*P[1]^symbol("\136")*theta*"\u2016"/sqrt(N)~"=0.82",
                           "\u2016"*P[1]^symbol("\136")*theta*"\u2016"/sqrt(N)~"=1.27",
                           "Nominal"),
       horiz = FALSE, lty = c(1, 1, 1, 3), cex = legend_scale)
dev.off()

<p float="left">
<img src="../out/figure1_panel_A.png" width="250" /> 
</p>

In [4]:
# Look at the probability of picking theta_star

panel_fn = "../out/figure1_panel_B.png"
png(panel_fn, width = width, height = height, units = "in", res = res, type = "cairo")
par(mar = mar, mgp = mgp, tck=tck)

plot(1, type = "n", xlim = c(0, 2.0), ylim = c(0,1), xaxs="i", yaxs="i",
     xlab = "", ylab = "", cex.axis=tick_label_size)
title(main="Selection probability", line=title_line, cex.main=title_scale)
title(xlab=expression("\u2016"*P[1]^symbol("\136")*theta*"\u2016"/sqrt(N)), line=1.0, cex.lab=label_scale)
title(ylab=expression(P(c(y) >= alpha)), line=0.5, cex.lab=label_scale)


lines(p_theta/sqrt(N), theta_star_prob[95,], col = 'green', lwd = 2)
lines(p_theta/sqrt(N), theta_star_prob[50,], col = 'orange', lwd = 2)
lines(p_theta/sqrt(N), theta_star_prob[10,], col = 'blue', lwd = 2)
legend("bottomleft", legend = expression(alpha~"=0.95", alpha~"=0.5",alpha~"=0.1"),
       col = c("green", "orange", "blue"), lwd = c(2,2,2), horiz = FALSE, cex = legend_scale)
dev.off()

<p float="left">
<img src="../out/figure1_panel_B.png" width="250" /> 
</p>

In [5]:
# Look at the probability of picking theta_star

panel_fn = "../out/figure1_panel_C.png"
png(panel_fn, width = width, height = height, units = "in", res = 300, type = "cairo")
par(mar = mar, mgp = mgp, tck=tck)


plot(1, type = "n", xlim  = c(0,2.), ylim = c(0,1.3), xaxs="i", yaxs="i",
     xlab ="" , ylab = "", cex.axis=tick_label_size)
title(main="Risk", line=title_line, cex.main=title_scale)
title(xlab=expression("\u2016"*P[1]^symbol("\136")*theta*"\u2016"/sqrt(N)), line=0.9, cex.lab=label_scale)
title(ylab="Risk / N", line=0.8, cex.lab=label_scale)
lines(p_theta/sqrt(N), theta_hat_risk/50)
lines(p_theta/sqrt(N), theta_star_risk/50, lty = 2)

lines(p_theta/sqrt(N), theta_tilde_risk[95,]/50, col = 'green')
lines(p_theta/10, theta_tilde_risk[50,]/50, col = 'orange')
lines(p_theta/10, theta_tilde_risk[10,]/50, col = 'blue')
legend("bottomright", legend = expression(alpha~"=0.95", alpha~"=0.5",alpha~"=0.1","MLE", "Bayes."),
       col = c("green", "orange", "blue", "black", "black"), lwd = c(2,2,2),
       lty=c(1,1,1,1,3), horiz = FALSE, cex = legend_scale)
dev.off()

<p float="left">
    <img src="../out/figure1_panel_C.png" width="250" /> 
</p>

In [6]:
# Look at histograms of c-values
panel_fn = "../out/figureS2.png"
png(panel_fn, width = width, height = height, units = "in", res = res, type = "cairo")
par(mar = mar, mgp = mgp, tck=tck)

hist(c_values[,5], breaks = seq(0,1,by = 0.025), freq = FALSE, col = rgb(1, 0, 0, 1/5),
     xlab = "", main = "", ylim = c(0, 10), cex.axis=label_scale, xaxs="i", yaxs="i")
hist(c_values[,10], breaks = seq(0, 1, by = 0.025), freq = FALSE, col = rgb(0,0,1,1/5), add = TRUE)
hist(c_values[,15], breaks = seq(0, 1, by = 0.025), freq = FALSE, col = rgb(0,1,0,1/5), add = TRUE)

title(main="Histogram of c-values", line=title_line, cex.main=title_scale)
title(xlab="c-value", line=0.6, cex.lab=label_scale)
title(ylab="Density", line=0.8, cex.lab=label_scale)

legend("top", col = c(rgb(1,0,0,1/3), rgb(0,0,1,1/3), rgb(0,1,0,1/3)), 
       #legend = expression(hat(sigma)(theta)~"=0.36", hat(sigma)(theta)~"=0.82", hat(sigma)(theta)~"=1.27"),
       legend = expression("\u2016"*P[1]^symbol("\136")*theta*"\u2016"/sqrt(N)~"=0.36",
                          "\u2016"*P[1]^symbol("\136")*theta*"\u2016"/sqrt(N)~"=0.82",
                           "\u2016"*P[1]^symbol("\136")*theta*"\u2016"/sqrt(N)~"=1.27"),
       horiz = FALSE, cex = legend_scale, lty=1)
box()
dev.off()


<p float="left">
<img src="../out/figureS2.png" width="250" /> 
</p>