Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewpbray committed Feb 26, 2015
0 parents commit 3563b94
Show file tree
Hide file tree
Showing 10 changed files with 483 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.Rproj.user
.Rhistory
.RData
18 changes: 18 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Package: lmmoptim
Title: Optimization routine for linear mixed models
Version: 0.0.0.9000
Authors@R: as.person(c(
"Andrew Bray <andrew.bray@gmail.com> [cre]",
"Michael Lavine <lavine@math.umass.edu> [aut]"
))
Description: An implementation of the optimization algorithm described in,
"Approximately Exact Calculations for Linear Mixed Models" by Lavine and
Hodges (2015). The algorithm finds arbitrarily precise bounds on the
global min/max of the restricted likelihood of linear mixed models with two
variances. Also included are functions to visualize the likelihood function
and the optimization process.
Depends: R (>= 3.1.2)
License: GPL-2
LazyData: true
Imports:
ggplot2
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
exportPattern("^[^\\.]")
139 changes: 139 additions & 0 deletions R/boxfuncs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# When dividing the plane into boxes, a box should have the following attributes:
# xlims=sigsqelims, ylims=sigsqslims
# for each line, an indicator of whether the box is above, below, or straddling the line
# for each line, upper and lower bounds on RLL

makebox <- function ( lims.sigsqs=NA, lims.sigsqe=NA, status=NA, lines ) {

# sanity checks
if ( missing(lims.sigsqs) || missing(lims.sigsqe) )
print ( "please supply lims.sigsqs and lims.sigsqe" )
if ( missing(status) ) print ( "please supply status" )
if ( missing(lines) ) print ( "please supply lines" )

# If the box has a parent then it inherits the parent's status.
# But if the parent straddles a line, we must check whether
# the child also straddles the line.
strad <- which ( status == "straddle" )
status[strad] <- getstatus ( lims.sigsqe = lims.sigsqe,
lims.sigsqs = lims.sigsqs,
lines = lines[strad,]
)

# we could get some of the bounds from the parent,
# but it's just as easy to recalculate them
bounds <- getbounds ( lims.sigsqe = lims.sigsqe,
lims.sigsqs = lims.sigsqs,
status = status,
lines = lines
)

return ( list ( lims.sigsqe = lims.sigsqe,
lims.sigsqs = lims.sigsqs,
status = status,
bounds = bounds
)
)
} # end makebox

splitbox <- function ( box, lines ) {
# split a box into four children
NW <- with ( box, makebox ( lims.sigsqe = c ( lims.sigsqe[1], mean(lims.sigsqe) ),
lims.sigsqs = c ( mean(lims.sigsqs), lims.sigsqs[2] ),
status = status,
lines = lines
)
)
NE <- with ( box, makebox ( lims.sigsqe = c ( mean(lims.sigsqe), lims.sigsqe[2] ),
lims.sigsqs = c ( mean(lims.sigsqs), lims.sigsqs[2] ),
status = status,
lines = lines
)
)
SW <- with ( box, makebox ( lims.sigsqe = c ( lims.sigsqe[1], mean(lims.sigsqe) ),
lims.sigsqs = c ( lims.sigsqs[1], mean(lims.sigsqs) ),
status = status,
lines = lines
)
)
SE <- with ( box, makebox ( lims.sigsqe = c ( mean(lims.sigsqe), lims.sigsqe[2] ),
lims.sigsqs = c ( lims.sigsqs[1], mean(lims.sigsqs) ),
status = status,
lines = lines
)
)
return ( list ( NW, NE, SW, SE ) ) # list of four boxes
}

getstatus <- function ( lims.sigsqe, lims.sigsqs, lines ){
# Is a box above, below, or straddling the lines with these slopes and intercepts?

# value of the lines at the left side of the box
tmp1 <- with ( lines, ifelse ( is.finite(slope),
int.sigsqs + slope * lims.sigsqe[1],
NA
)
)
# value of the lines at the right side of the box
tmp2 <- with ( lines, ifelse ( is.finite(slope),
int.sigsqs + slope * lims.sigsqe[2],
NA
)
)
above <- with ( lines, ifelse ( slope > -Inf, lims.sigsqs[1] > tmp1, lims.sigsqe[1] > int.sigsqe ) )
below <- with ( lines, ifelse ( slope > -Inf, lims.sigsqs[2] < tmp2, lims.sigsqe[2] < int.sigsqe ) )
straddle <- !(above | below)

# sanity check
if ( any ( above & below ) ) print ( "a box can't be both above and below a line")

status <- rep ( NA, nrow(lines) )
status[above] <- "above"
status[below] <- "below"
status[straddle] <- "straddle"

return(status)
}

getbounds <- function ( lims.sigsqe, lims.sigsqs, lines, status ){
# small sanity check
if ( missing(lims.sigsqe) || missing(lims.sigsqs) )
print ( "Please supply lims.sigsqe and lims.sigsqs")
if ( missing(lines) ) print ( "Please supply lines" )
if ( missing(status) ) print ( "Please supply status")
if ( length(status) != nrow(lines) )
print ( "length(status) != nrow(lines)" )

# evaluate each line at the upper-right corner of the box
ur <- with ( lines, a * lims.sigsqs[2] + lims.sigsqe[2] )
eval.ur <- with ( lines, -.5 * ( multiplier.log*log(ur) + multiplier.inv/ur ) )
# evaluate each line at the lower-left corner of the box
ll <- with ( lines, a * lims.sigsqs[1] + lims.sigsqe[1] )
eval.ll <- ifelse ( ll == 0,
-Inf,
with ( lines, -.5 * ( multiplier.log*log(ll) + multiplier.inv/ll ) )
)

bounds <- data.frame (
lower = rep ( NA, length(status) ),
upper = rep ( NA, length(status) )
)
# The next two lines of code are for lines that are not straddled.
# "above" means the box is above the line
bounds$lower <- ifelse ( status == "above", eval.ur, eval.ll )
bounds$upper <- ifelse ( status == "above", eval.ll, eval.ur )
# now we'll take care of straddled lines
strad <- status == "straddle"
bounds$lower [ strad ] <- pmin ( eval.ur[strad], eval.ll[strad] )
# for the upper bound, we can evaluate anywhere on the line, so we might
# as well evaluate at (int.sigsqe,0)
bounds$upper [ strad ] <-
with ( lines[strad,],
ifelse ( is.na(int.sigsqe),
-.5 * ( multiplier.log * log(int.sigsqs) + multiplier.inv/int.sigsqs ), # horizontal line
-.5 * ( multiplier.log * log(int.sigsqe) + multiplier.inv/int.sigsqe ) # other lines
)
)

return ( bounds )
}
95 changes: 95 additions & 0 deletions R/linefuncs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Given X, Z, Y, find the constants in Hodges' reexpression

findlines <- function ( x, z, y, SigE, SigS ){
# Find the constants in Hodges' reexpression.
# Just those constants related to the model, not the prior.
# sanity checks
if ( !is.matrix(x) ) print ("x should be a matrix")
if ( !is.matrix(z) ) print ("z should be a matrix")
if ( !is.vector(y) ) print ("y should be a vector")

n <- length(y)
nx <- ncol(x)
nz <- ncol(z)

if ( nrow(x) != n ) print ("nrow(x) != length(y)")
if ( nrow(z) != n ) print ("nrow(z) != length(y)")

if ( !is.matrix(SigE) ) print ("SigE should be a matrix")
if ( !is.matrix(SigS) ) print ("SigS should be a matrix")
if ( nrow(SigE) != n || ncol(SigE) !=n )
print ("SigE should be a square matrix to match length(y)")
if ( nrow(SigS) != nz || ncol(SigS) != nz )
print ("SigS should be a square matrix to match ncol(z)")

# Is SigE the identity? If not, make it so.
if ( !identical (SigE, diag(n) ) ){
tmp <- inv.sqrt(SigE)
y <- tmp %*% y
x <- tmp %*% x
z <- tmp %*% z
SigE <- diag(n)
}

# Is SigS the identity? If not, make it so.
if ( !identical ( SigS, diag(nz) ) ){
tmp <- sqrt.m(SigS)
z <- z %*% tmp
SigS <- diag(nz)
}

# sx, sz, Gamma_x, Gamma_z, Gamma_c # Is Gamma_c really needed?
qrx <- qr (x, LAPACK=FALSE ) # use LINPACK to get rank(x)
sx <- qrx$rank
Gamx <- qr.Q(qrx)[,1:sx]
if ( sx == 1 ) Gamx <- matrix ( Gamx, ncol=1 )

tmp <- qr.resid ( qrx, z )
qrz <- qr (tmp, LAPACK=FALSE ) # use LINPACK to get rank(x)
sz <- qrz$rank
Gamz <- qr.Q(qrz)[,1:sz]
if ( sz == 1 ) Gamz <- matrix ( Gamz, ncol=1 )

M <- qr.solve ( cbind(Gamx,Gamz), cbind(x,z) )
M.zz <- M [ -(1:sx), -(1:ncol(x)), drop=FALSE ]
tmp <- svd(M.zz)
a <- tmp$d^2 # follows Eq (15.1)
v <- t(tmp$u) %*% t(Gamz) %*% y # follows Eq (15.4)
if ( length(a) != sz ) print ( "length(a) != sz" )
if ( length(v) != sz ) print ( "length(v) != sz" )
rss <- sum ( resid ( lm ( y ~ cbind(Gamx,Gamz) ) )^2 )

lines <- data.frame (
a = c ( a, 0 ),
v = c ( v, sqrt(rss) ),
int.sigsqs = c ( v^2/a, NA ),
int.sigsqe = c ( v^2, rss/(n-(sx+sz)) ),
slope = c ( -1/a, -Inf ),
multiplier.log = c ( rep(1,sz), n-(sx+sz) ),
multiplier.inv = c ( v^2, rss )
)
return(lines)
}


addprior <- function ( lines, a.E=0, b.E=0, a.S=0, b.S=0 ){
if ( (a.E != 0) || (b.E != 0) ) { # if sigesq ~ IG(a.E,b.E)
vert <- which ( lines$slope == -Inf )
lines$multiplier.log[vert] <- lines$multiplier.log[vert] + 2*(a.E+1)
lines$multiplier.inv[vert] <- lines$multiplier.inv[vert] + b.E
lines$v[vert] <- sqrt(lines$multiplier.inv[vert])
lines$int.sigsqe[vert] <- lines$multiplier.inv[vert] / lines$multiplier.log[vert]
}
if ( (a.S != 0) || (b.S != 0) ) { # if sigssq ~ IG(a.S,b.S)
horiz <- data.frame ( a = 1,
multiplier.inv = 2*b.S,
multiplier.log = 2*(a.S+1),
v = sqrt(2*b.S),
int.sigsqs = b.S / (a.S+1),
int.sigsqe = NA,
slope = 0
)
lines <- rbind ( lines, horiz )
}
return ( lines )
}
104 changes: 104 additions & 0 deletions R/mainfunc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
fitlmm <- function ( lines, startbox, eps=0, delE=0, delS=0, M=Inf, maxit=10, ratio=FALSE ) {
# five control settings
# don't subdivide boxes whose upper and lower bounds are within eps
# don't subdivide boxes whose sigsqE spans less than delE
# don't subdivide boxes whose sigsqS spans less than delS
# don't subdivide boxes whose max RLL is more than M below the MRLE
# don't do more than maxit iterations
# ratio determines whether the sigsqE and sigsqS conditions are for ratios or absolute differences

if ( missing ( lines ) ) {
print ( "please supply lines" ); return
}
if ( missing ( startbox ) ) {
startbox <- makebox ( lines = lines,
lims.sigsqs = c ( 0, max ( lines$int.sigsqs[is.finite(lines$int.sigsqs)] ) ),
lims.sigsqe = c ( 0, max ( lines$int.sigsqe[is.finite(lines$int.sigsqe)] ) ),
status = rep ( "straddle", nrow(lines) )
)
}
inactive <- list()
ninact <- 0
# check whether startbox is a box or a list of boxes
ifelse ( length(startbox)==4 &&
identical ( names(startbox),
c("lims.sigsqe", "lims.sigsqs", "status", "bounds")
),
active <- list(startbox),
active <- startbox
)
#active <- list()
#active[[1]] <- startbox
nact <- length(active)
lowbound <- -Inf
iter <- 0

# conditions under which a box becomes inactive
killfunc <- function ( box, lb, M, eps, delE, delS, ratio ) {
# lb is a lower bound on max logRL; it changes at each iteration
# M, eps, delE, delS stay constant throughout the iterations.
cond.low <- sum ( box$bounds$upper < lb - M )
cond.eps <- sum ( box$bounds$upper - box$bounds$lower ) < eps
cond.E <- ifelse ( ratio,
diff ( log(box$lims.sigsqe) ) < delE,
diff ( box$lims.sigsqe ) < delE
)
cond.S <- ifelse ( ratio,
diff ( log(box$lims.sigsqs) ) < delS,
diff ( box$lims.sigsqs ) < delS
)
return ( cond.low || cond.eps || cond.E || cond.S )
}

while ( nact > 0 && iter < maxit ) {
# Find the lower bound of each box and the maximum of the lower bounds.
# For each active box, either make it inactive or divide it.
low.act <- max ( vapply ( X = active,
FUN = function(box) { sum ( box$bounds$lower ) },
FUN.VALUE = 0.1
)
)
lowbound <- max ( lowbound, low.act )
kill <- vapply ( X = active,
FUN = killfunc,
FUN.VALUE = TRUE,
lb=lowbound, M=M,eps=eps, delE-delE, delS=delS, ratio=ratio
)
nkill <- sum(kill)
if ( nkill > 0 ) { inactive [ (ninact+1) : (ninact+nkill) ] <- active[kill] }
ninact <- length(inactive)
kids <- list()
nkids <- 0
for ( i in which(!kill) ) {
kids [ (nkids+1):(nkids+4) ] <- splitbox ( active[[i]], lines ) # boxes are split into 4 parts
nkids <- nkids + 4
}
active <- kids
nact <- length(active)

iter <- iter+1
write ( c ( "iteration", iter, "nact", nact, "ninact", ninact, "lowbound", lowbound ),
file="bigcode.out", ncolumns=8, append=TRUE
)
}


tmp <- t ( vapply ( X = c ( active, inactive ),
FUN = function(box) { c ( box$lims.sigsqs,
box$lims.sigsqe,
sum ( box$bounds$lower ),
sum ( box$bounds$upper )
)
},
FUN.VALUE = c ( sigsqs.lo = 0.1,
sigsqs.hi = 0.1,
sigsqe.lo = 0.1,
sigsqe.hi = 0.1,
rll.lower = 0.1,
rll.upper = 0.1
)
)
)
return ( data.frame ( tmp ) )
}

Loading

0 comments on commit 3563b94

Please sign in to comment.