In [1]:
library(lme4)
library(dplyr)

# following / adapted from https://pdhoff.github.io/FABInference/articles/exampleFHmodel.html
load(url("http://www2.stat.duke.edu/~pdh10/Datasets/els.RData")) 

Loading required package: Matrix

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Below, y is the student-level reading score, and group is the school id for each student. That is, group[i] is the school of student with score y[i]. The matrix X is a matrix of school-level variables, and the matrix W is the same data except with the variable for each school replicated for each student in the school.

In [2]:
# Perform empirical Bayes step for minimum number of students = 2, 5, and 10
for (min_school_size in c(2,5,10)){
    # Filter to schools with at least some minimum number of students
    school_sizes <- (els %>% group_by(school) %>% summarise(n()))
    large_schools <- (school_sizes %>% filter(school_sizes["n()"] >= min_school_size))
    groups = sort(large_schools$school)

    in_groups_by_student = els$school %in% groups
    els_filt = els[in_groups_by_student,]    
    els_data <- els_filt

    # Separate out student-level characteristics from school-level characteristics:
    y<-els_data$rscore 
    group <- els_data$school 

    W<-as.matrix(els_data[,c("flp","urban","rural","private","catholic","rm","rs","rw","enrollment") ] )  
    X<-apply(W,2,function(x){ tapply(x,group,"mean") } ) 

    # Use LME4, following Hoff 2020 to estimate beta, tau and sigma
    #install.packages("lme4")
    fit<-lmer(y ~ W + (1|group) )
    beta<-fixef(fit)
    tau<-sqrt(unlist(VarCorr(fit)))
    sigma<-attr(VarCorr(fit), "sc") # within group standard deviation

    # Save out data needed for computing posterior means and c-values
    els_data_to_save <- els_data %>% group_by(school) %>% summarise(mean=mean(rscore), sigma_n=sigma/sqrt(n()))
    els_data_to_save['prior_mean'] <- beta[1] + X %*% beta[2:10]
    els_data_to_save['prior_sd'] <- tau
    write.csv(els_data_to_save, sprintf("../results/education/means_and_SEs_by_school_min_students=%02d.csv",
                                        min_school_size))
}

In [48]:
# Simulate and refit linear model 
for (min_school_size in c(2,5,10)){
    
    # Perform empirical Bayes step again
    school_sizes <- (els %>% group_by(school) %>% summarise(n()))
    large_schools <- (school_sizes %>% filter(school_sizes["n()"] >= min_school_size))
    groups = sort(large_schools$school)

    in_groups_by_student = els$school %in% groups
    els_filt = els[in_groups_by_student,]    
    els_data <- els_filt

    # Separate out student-level characteristics from school-level characteristics:
    y<-els_data$rscore 
    group <- els_data$school 

    W<-as.matrix(els_data[,c("flp","urban","rural","private","catholic","rm","rs","rw","enrollment") ] )  
    X<-apply(W,2,function(x){ tapply(x,group,"mean") } ) 

    # Use LME4, following Hoff 2020 to estimate beta, tau and sigma
    #install.packages("lme4")
    fit<-lmer(y ~ W + (1|group) )
    beta<-fixef(fit)
    tau<-sqrt(unlist(VarCorr(fit)))
    sigma<-attr(VarCorr(fit), "sc") # within group standard deviation

    # Save out data needed for computing posterior means and c-values
    els_data_to_save <- els_data %>% group_by(school) %>% summarise(mean=mean(rscore), sigma_n=sigma/sqrt(n()))
    els_data_to_save['prior_mean'] <- beta[1] + X %*% beta[2:10]
    els_data_to_save['prior_sd'] <- tau
    write.csv(els_data_to_save, sprintf("../results/education/means_and_SEs_by_school_min_students=%02d.csv",
                                        min_school_size))
    
    for (i in 1:5000) { 
        # simulate a dataset using parameters inferred on real data
        school_means_sim <- beta[1] + X %*% beta[2:10] + tau*rnorm(n = length(groups))
        student_scores_sim <- school_means_sim[paste(group),] + sigma*rnorm(n = length(group))

        # refit parameters (beta, sigma and tau)
        fit_sim <- refit(fit, student_scores_sim)
        beta_sim <-fixef(fit_sim)
        tau_sim <-sqrt(unlist(VarCorr(fit_sim)))
        sigma_sim <-attr(VarCorr(fit_sim), "sc") # within group standard deviation

        # save out summary necessary for refitting
        scores_df <- data.frame(cbind(student_scores_sim, names(student_scores_sim)))
        colnames(scores_df) <- c('scores', 'school')
        scores_df$scores <- as.numeric(as.character(scores_df$scores))
        els_data_to_save <- scores_df %>% group_by(school) %>% summarise(
            mean=mean(scores),sigma_n=sigma_sim/sqrt(n()))
        els_data_to_save['school_mean'] <- school_means_sim
        els_data_to_save['prior_mean'] <- beta_sim[1] + X %*% beta_sim[2:10]
        els_data_to_save['prior_sd'] <- tau_sim
        write.csv(els_data_to_save, sprintf("../results/education/simulation_min_students=%02d_%03d.csv",
                                            min_school_size, i))
    }
}

`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argum