# Mixed models

This repository shows the different algorithmic approaches to fit generalized linear mixed models.

\begin{align}
\mathbf{y} \mid \mathbf{u} &\sim \mathcal{N}\left(\mathbf{X}\boldsymbol \beta + \mathbf{Z}\mathbf{u}, \sigma^2 \mathbf{I}\right)\\
\mathbf{u} & \sim  \mathcal{N}\left(\mathbf{0}, \mathbf{Q}\right)
\end{align}

\begin{align}
\mathbf{y} \sim \mathcal{N}\left(\mathbf{X}\boldsymbol \beta, \sigma^2 \mathbf{I} +   \mathbf{Z} \mathbf{Q}  \mathbf{Z}^T\right)
\end{align}

In [1]:
import statsmodels
from patsy import dmatrices, dmatrix, demo_data

In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas
from sklearn.preprocessing import LabelEncoder

In [3]:
tab = pandas.read_csv("./data/sleepstudy.csv")

In [4]:
Y, X = dmatrices("Reaction ~ Days", tab)

In [5]:
def _build_ranef_model_matrix(tab, factor, ranef):
    inter_tab = tab[[factor, ranef]].copy()    
    inter_tab['grp'] = LabelEncoder().fit_transform(tab.Subject)
    inter_tab = inter_tab[[factor, "grp"]].pivot(columns=factor).reindex()
    inter_tab.values[np.isfinite(inter_tab.values)] = 1
    inter_tab.values[np.isnan(inter_tab.values)] = 0
    
    slope_tab = tab[[factor, ranef]].copy().pivot(columns=factor).reindex()
    slope_tab.values[np.isnan(slope_tab.values)] = 0
    
    Z = pd.concat([inter_tab, slope_tab], axis=1, sort=True)
    Z = Z.reindex(sorted(Z.columns, key=lambda x: x[1]), axis=1)
    
    return Z
Z = _build_ranef_model_matrix(tab, "Subject", "Days")

In [21]:
import scipy.stats

In [7]:
nu = scipy.stats.norm.rvs(size=10).reshape((2, 5))
nu

array([[-0.12901427, -1.41500198,  0.7590279 , -0.76963767, -0.10886077],
       [ 0.62217977, -0.51004901,  0.60579558, -0.13394496,  1.87665008]])

In [8]:
G = scipy.zeros(shape=(Z.shape[1], Z.shape[1]))

In [9]:
X[:10,]

array([[1., 0.],
       [1., 1.],
       [1., 2.],
       [1., 3.],
       [1., 4.],
       [1., 5.],
       [1., 6.],
       [1., 7.],
       [1., 8.],
       [1., 9.]])

In [18]:
X[:10,]

array([[1., 0.],
       [1., 1.],
       [1., 2.],
       [1., 3.],
       [1., 4.],
       [1., 5.],
       [1., 6.],
       [1., 7.],
       [1., 8.],
       [1., 9.]])

In [49]:
nu = np.array([1, 1, 0.5, 1])
G = np.zeros(shape=(2, 2))

In [50]:
G[np.tril_indices(2)] = nu[1:]

In [51]:
G

array([[1. , 0. ],
       [0.5, 1. ]])

In [20]:
G.T

array([[1., 2.],
       [0., 3.]])

In [38]:
k = scipy.linalg.block_diag(*[G for i in range(int(Z.shape[1] / 2))])

In [39]:
k.shape

(36, 36)

In [28]:
scipy.stats.multivariate_normal.rvs(size=1, mean=[0,0], cov=G.dot(G.T))

array([ 2.06880208, -1.05955829])

In [41]:
nu

array([1, 2, 3])

In [45]:
scipy.diag(nu)

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

In [48]:
nu0 = sp.array([1, 1, 0.5, 1])

NameError: name 'sp' is not defined