# Code for generating Figure S2 on Logistic Regression

## Plot empirical rates of converge

In [1]:
base_fn <- "../../results//logistic_regression/"
fn_approximation_errors <- paste(base_fn, "/lr_approximation_errors.tsv", sep="")
df_approx_errs <- read.csv(fn_approximation_errors, sep = "\t")

In [2]:
# Find plot dimensions
width = (8.5 - 2*1.)/3. # 8.5 inches - 2 * 1 inch margins  / 3 panels
height = width # want each panel to be square

res = 600 # DPI

mar = c(2.0,2.0,1,0.5) # margins on (bottom, left, top, right)
tck = -0.02 # tick size
mgp = c(2.6,0.05, 0.0) # axis parameters. distances for (title, tick labels, axis line)
title_scale <- 0.8
label_scale <- 0.75
legend_scale <- 0.67
title_line <- 0.2 # space between title and top of axis

In [3]:
N <- df_approx_errs$N

MLE_to_Truth <- df_approx_errs$MLE_to_Truth_means
MLE_to_Truth_SEM <- df_approx_errs$MLE_to_Truth_sems

post_Mean_to_MLE <- df_approx_errs$Posterior_Mean_to_MLE_means
post_Mean_to_MLE_SEM <- df_approx_errs$Posterior_Mean_to_MLE_sems

MAP_to_post_Mean <- df_approx_errs$MAP_to_Posterior_Mean_means
MAP_to_post_Mean <- df_approx_errs$MAP_to_Posterior_Mean_sems

approx_MAP_to_MAP <- df_approx_errs$Approximation_to_MAP_means
approx_MAP_to_MAP <- df_approx_errs$Approximation_to_MAP_sems

In [4]:
# Approximation Errors
panel_fn = "../out//figureS4_panel_A.png"
png(panel_fn, width = width, height = height, units = "in", res = res, type = "cairo")
par(mar = mar, mgp = mgp, tck=tck)

plot(N, type = "n", log='xy', ylim=c(0.0000000000003, 800.),
     xlim=c(1, 1000000),
     ylab = "", xlab="", cex.axis=0.5)
title(main="Approximation Errors", line=title_line, cex.main=title_scale)
title(xlab=expression("Sample size ("*M*")"), line=0.8, cex.lab=label_scale)
title(ylab="Distance", line=0.8, cex.lab=label_scale)

lines(N, MLE_to_Truth, col = rgb(1,0,0))
lines(N, post_Mean_to_MLE, col = rgb(0,1,0))
lines(N, MAP_to_post_Mean, col = rgb(0,0,1))
lines(N, approx_MAP_to_MAP, col = rgb(0.4,0.2,0.2))

legend("bottomleft", col = c(rgb(1,0,0), rgb(0,1,0), rgb(0,0, 1), rgb(0.4,0.2, 0.2)), 
       legend = expression("\u2016"*hat(theta)-theta*"\u2016",
                           "\u2016"*E * "[" * theta* "|X,Y]" - hat(theta) *"\u2016",
                           "\u2016"*theta^{"*"} - E * "[" * theta* "|X,Y]" *"\u2016",
                          "\u2016"*tilde(theta)^{"*"} - theta^{"*"}*"\u2016"),
       horiz = FALSE, lty = 1, cex = legend_scale)

dev.off()

<p>
    <img src="../out/figureS4_panel_A.png" width="400"/>
</p>

## Plot coverage and histogram of c-values

In [5]:
# load in by_break vals for both sets of simulations.
fn_cvalues_and_b_breaks <- paste(base_fn, "/logistic_regression_cvals_and_calibration.tsv", sep="")

In [6]:
alpha_grid <- seq(0.01, 0.99, by = 0.01)
df_cvals_and_b_breaks <- read.csv(fn_cvalues_and_b_breaks, sep = "\t")
by_breaks <- df_cvals_and_b_breaks$by_break
cal_curve <- c()
for(aix in 1:length(alpha_grid)){
    cal_curve <- c(cal_curve, mean(alpha_grid[aix]>by_breaks))
}
c_vals = df_cvals_and_b_breaks$c_value

In [7]:
# Calibration
panel_fn = "../out/figureS4_panel_C.png"
png(panel_fn, width = width, height = height, units = "in", res = res, type = "cairo")
par(mar = mar, mgp = mgp, tck=tck)

plot(1, type = "n", xlim = c(0, 1), ylim = c(0,1), xaxs="i", yaxs="i",
     ylab = "", xlab="", cex.axis=0.5)
title(main="Calibration", line=title_line, cex.main=title_scale)
title(xlab=expression(alpha), line=0.6, cex.lab=label_scale)
title(ylab="Coverage", line=0.8, cex.lab=label_scale)

abline(a = 0, b = 1, lty = 2)
lines(alpha_grid, cal_curve, col = 'black')

legend("bottomright", col = 'black', 
       legend = expression("Observed","Nominal"),
       horiz = FALSE, lty = c(1, 3), cex = legend_scale)
dev.off()

<p>
    <img src="../out/figureS4_panel_C.png" width="400"/>
</p>

In [8]:
# Look at histograms of c-values
panel_fn = "../out/figureS4_panel_B.png"
png(panel_fn, width = width, height = height, units = "in", res = res, type = "cairo")
par(mar = mar, mgp = mgp, tck=tck)

hist(c_vals, breaks = seq(0,1,by = 0.025), freq = FALSE, col = 'black',
     xlab = "", main = "", ylim = c(0, 5.5), cex.axis=label_scale, xaxs="i", yaxs="i")

title(main="Histogram of c-values", line=title_line, cex.main=title_scale)
title(xlab="c-value", line=0.6, cex.lab=label_scale)
title(ylab="Density", line=0.8, cex.lab=label_scale)

box()
dev.off()

<p>
    <img src="../out/figureS4_panel_B.png" width="400"/>
</p>