-
Notifications
You must be signed in to change notification settings - Fork 16
/
css_stats.R
75 lines (64 loc) · 2.01 KB
/
css_stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
library(lmtest)
library(Hmisc)
library(foreach)
library(sandwich)
# code by dean eckles and eytan bakshy
sandwich.lm <- function(fit) {
bread.value <- bread(fit)
# get the meat
psi <- as.matrix(estfun(fit))
weights <- fit$weights
m <- NROW(psi)
# psi is already multiplied times the weights
rval <- t(psi) %*% (psi / weights) / m^2
rownames(rval) <- colnames(rval) <- colnames(psi)
meat.value <- rval
bread.value %*% meat.value %*% bread.value
}
# weighted linear regression coeftest with sandwich standard errors
weighted.lm.coeftest <- function(formula, data, ...) {
m <- lm(formula, data=data, weights=.weights, ...)
coeftest(m, vcov=sandwich.lm)
}
## the bootstrap
r.double.or.nothing <- function(n) {
2 * rbinom(n, 1, .5)
}
clustered.bootstrap <- function(data, clusters, statistic, .R=250,
.verbose = FALSE,
.RNG = r.double.or.nothing,
.parallel = FALSE,
.combine=c,
...) {
if(length(clusters) == 0) {
return(iid.bootstrap(data, statistic, .R, .RNG, ...))
}
# lists containing information for each group
cluster.ids <- list()
num.clusters <- list()
for(i in clusters) {
groups.factor <- as.factor(data[[i]])
cluster.ids[[i]] <- as.numeric(groups.factor)
num.clusters[[i]] <- length(levels(groups.factor))
}
replicates <- foreach(r=1:.R, .combine=.combine) %do% {
# generate and multiply weight vectors
w <- foreach(i = clusters, .combine = `*`) %do%
{.RNG(num.clusters[[i]])[cluster.ids[[i]]]}
statistic(mutate(data, .weights=w, .r=r), ...)
}
replicates
}
iid.bootstrap <- function(data, statistic, .R=250,
.RNG = r.double.or.nothing, .combine=c, ...) {
replicates <- foreach(r=1:.R, .combine=.combine) %do% {
statistic(mutate(data, .weights=.RNG(nrow(data)), .r=r), ...)
}
replicates
}
norm.intervals <- function(x, alpha=0.05) {
data.frame(
lower=qnorm(alpha/2, mean(x), sd(x)),
mean=mean(x),
upper=qnorm(1-alpha/2, mean(x), sd(x)))
}