# Biostat 257 HW2
## Caesar Z. Li
## UID: 704135662


## Question 1
Write down the log-likelihood of the $i$ -th datum, with $(\boldsymbol{y}_i, \boldsymbol{X}_i, \boldsymbol{Z}_i)$, given parameters, $(\boldsymbol{\beta}, \boldsymbol{\Sigma}, \sigma)$.    
<br/>
Answer: the marginal distribution of a linear mixed effect model is given by,
$$
\boldsymbol{y}_i \sim N(\boldsymbol{X}_i \boldsymbol{\beta}, \boldsymbol{Z}_i\boldsymbol{\Sigma}\boldsymbol{Z}^{T}_i + \sigma^2\boldsymbol{I}) 
$$

Thus the marginal likelihood is given by,
$$
-\frac{n_i}{2}log(2\pi)-\frac{1}{2}log(|\boldsymbol{D}|)-\frac{1}{2}(\boldsymbol{y}_i - \boldsymbol{X}_i \boldsymbol{\beta})^{T}\boldsymbol{D}^{-1}(\boldsymbol{y}_i - \boldsymbol{X}_i \boldsymbol{\beta})
$$

, where $\boldsymbol{D} = \boldsymbol{Z}_i\boldsymbol{\Sigma}\boldsymbol{Z}^{T}_i + \sigma^2\boldsymbol{I}$



## Question 2 Start-up Code



In [8]:
# define a type that holds LMM datum
struct LmmObs{T <: AbstractFloat}
    # data
    y :: Vector{T}
    X :: Matrix{T}
    Z :: Matrix{T}
    # working arrays
    # whatever intermediate arrays you may want to pre-allocate
    res        :: Vector{T}
    storage_q  :: Vector{T}
    ztz        :: Matrix{T}
    storage_qq :: Matrix{T}
end

# constructor
function LmmObs(
        y::Vector{T}, 
        X::Matrix{T}, 
        Z::Matrix{T}) where T <: AbstractFloat
    res        = similar(y)
    storage_q  = Vector{T}(undef, size(Z, 2))
    ztz        = transpose(Z) * Z
    storage_qq = similar(ztz)
    LmmObs(y, X, Z, res, storage_q, ztz, storage_qq)
end

LmmObs

In [215]:
function logl!(
        obs :: LmmObs{T}, 
        β   :: Vector{T}, 
        L   :: Matrix{T}, 
        σ²  :: T) where T <: AbstractFloat
    n, p, q = size(obs.X, 1), size(obs.X, 2), size(obs.Z, 2)    
    # TODO: compute and return the log-likelihood
    obs.storage_qq .= I + transpose(L) * obs.ztz * L / σ²
    #obs.storage_qq = 
    obs.res .= y - obs.X * β
    
    return -(n//2) * log(2π) - (1//2) * (n * log(σ²) + logdet(obs.storage_qq)) -
        (1//2) * transpose(obs.res) * ((1 / σ²) * I - (1 / σ²^2) * obs.Z * inv(inv(L * transpose(L)) + obs.ztz / σ²) * transpose(obs.Z)) * obs.res
    
    #sleep(1e-3) # wait 1 ms as if your code takes 1ms
    #return 0.0
end

logl! (generic function with 2 methods)

In [219]:
function logl!(
        obs :: LmmObs{T}, 
        β   :: Vector{T}, 
        L   :: Matrix{T}, 
        σ²  :: T) where T <: AbstractFloat
    n, p, q = size(obs.X, 1), size(obs.X, 2), size(obs.Z, 2)    
    # TODO: compute and return the log-likelihood
    obs.storage_qq .= I + transpose(L) * obs.ztz * L / σ²
    dtmt = logdet(obs.storage_qq)
    obs.storage_qq .= inv(L * transpose(L)) + obs.ztz / σ²
    obs.res .= y - obs.X * β
    obs.storage_q .= transpose(obs.Z) * obs.res
    
    return -(n//2) * log(2π) - (1//2) * (n * log(σ²) + dtmt) -
        (1//2) * (dot(obs.res, obs.res) * (1 / σ²) - (1 / σ²^2) * dot(obs.storage_q, obs.storage_qq \ obs.storage_q))
    
    #sleep(1e-3) # wait 1 ms as if your code takes 1ms
    #return 0.0
end

logl! (generic function with 2 methods)

## Question 3 Correctness (15 pts)

In [9]:
using BenchmarkTools, Distributions, LinearAlgebra, Random

Random.seed!(257)
# dimension
n, p, q = 2000, 5, 3
# predictors
X  = [ones(n) randn(n, p - 1)]
Z  = [ones(n) randn(n, q - 1)]
# parameter values
β  = [2.0; -1.0; rand(p - 2)]
σ² = 1.5
Σ  = fill(0.1, q, q) + 0.9I
# generate y
y  = X * β + Z * rand(MvNormal(Σ)) + sqrt(σ²) * randn(n)

# form an LmmObs object
obs = LmmObs(y, X, Z)

┌ Info: Precompiling BenchmarkTools [6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf]
└ @ Base loading.jl:1242
┌ Info: Recompiling stale cache file /u/home/c/caeserio/.julia/compiled/v1.2/Distributions/xILW0.ji for Distributions [31c24e10-a181-5473-b8eb-7969acd0382f]
└ @ Base loading.jl:1240


LmmObs{Float64}([5.739048710854997, 5.705395720270055, 2.7368899643050355, 1.4201223592870755, -0.2099433929180451, 3.5886971824690486, -1.3778538474575956, -0.08406026821055246, -2.208007878450787, 1.309558511583542  …  1.294787618017269, -1.9701265304395086, -2.040383092851745, -1.4590296825658675, 0.18616271231054726, 1.0681247149968018, 2.2292080864625254, 1.195238535460355, 1.1310626949609701, -0.43507816286713785], [1.0 -2.506566300781151 … 0.5863780184080776 1.1092991040518192; 1.0 -0.974090320735282 … 1.4143507320583761 0.45608259198567447; … ; 1.0 -1.0076371084863895 … -1.3241972696483915 1.4547609424344008; 1.0 0.38036793320364776 … -0.5857507269707397 1.796804266836504], [1.0 -0.6380567326757537 1.4738982136806946; 1.0 -2.0711110232845926 0.21422658785510312; … ; 1.0 0.5917731507133951 -0.9163364468263059; 1.0 0.9463732120394507 -0.325860403600768], [2.4996631e-316, 2.56859986e-316, 2.36931196e-316, 2.7970985e-316, NaN, 2.58664156e-316, 2.797557e-316, 2.7971072e-316, NaN, 2.

In [17]:
μ  = X * β
Ω  = Z * Σ * transpose(Z) +  σ² * I
mvn = MvNormal(μ, Symmetric(Ω)) # MVN(μ, Σ)
logpdf(mvn, y)

-3247.456858063827

Now check my answers with the answer from standard package above:

In [220]:
L = Matrix(cholesky(Σ).L)
logl!(obs, β, L, σ²)

-3247.4568580638243

In [221]:
@assert logl!(obs, β, Matrix(cholesky(Σ).L), σ²) ≈ logpdf(mvn, y)

## Question 4 Efficiency 
<br/>

Benchmarking your code and compare to the Distributions.jl function `logpdf`.

In [111]:
# benchmark the `logpdf` function in Distribution.jl
bm1 = @benchmark logpdf($mvn, $y)

BenchmarkTools.Trial: 
  memory estimate:  30.55 MiB
  allocs estimate:  4
  --------------
  minimum time:     42.359 ms (1.07% GC)
  median time:      43.844 ms (2.97% GC)
  mean time:        46.699 ms (2.78% GC)
  maximum time:     50.171 ms (2.55% GC)
  --------------
  samples:          107
  evals/sample:     1

In [222]:
# benchmark your implementation
L = Matrix(cholesky(Σ).L)
bm2 = @benchmark logl!($obs, $β, $L, $σ²)

BenchmarkTools.Trial: 
  memory estimate:  35.42 KiB
  allocs estimate:  24
  --------------
  minimum time:     32.243 μs (0.00% GC)
  median time:      39.740 μs (0.00% GC)
  mean time:        62.655 μs (7.12% GC)
  maximum time:     4.637 ms (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [224]:
# this is the points you'll get
clamp(median(bm1).time / median(bm2).time / 1000 * 30, 0, 30)

30.0

In [22]:
names(Main)[4:end]

16-element Array{Symbol,1}:
 :Dchol 
 :LmmObs
 :Main  
 :X     
 :Z     
 :mvn   
 :n     
 :obs   
 :p     
 :q     
 :y     
 :Σ     
 :Ω     
 :β     
 :μ     
 :σ²    

0.03

In [180]:
a = obs.Z * L * transpose(L) * transpose(obs.Z) + σ² * I
b = cholesky(Symmetric(a))
c = y - obs.X * β
-(n//2) * log(2π) - (1//2) * (dot(c, b \ c))

-2831.2102069941466

In [188]:
(1//2) * (n * log(σ²) + logdet(σ² * I + transpose(L) * obs.ztz * L))

416.85484873184157

In [189]:
-(n//2) * log(2π) - (1//2) * n * log(σ²) * (1 + logdet(I + transpose(L) * obs.ztz * L / σ²))

-10986.421139444486

In [203]:
I + transpose(L) * obs.ztz * L / σ²

3×3 Array{Float64,2}:
 1356.8     136.259   120.75 
  136.259  1317.38    139.492
  120.75    139.492  1332.48 