# Mixed models

This repository shows the different algorithmic approaches to fit generalized linear mixed models.

\begin{align}
\mathbf{y} \mid \mathbf{u} &\sim \mathcal{N}\left(\mathbf{X}\boldsymbol \beta + \mathbf{Z}\mathbf{u}, \sigma^2 \mathbf{I}\right)\\
\mathbf{u} & \sim  \mathcal{N}\left(\mathbf{0}, \mathbf{Q}\right)
\end{align}

\begin{align}
\mathbf{y} \sim \mathcal{N}\left(\mathbf{X}\boldsymbol \beta, \sigma^2 \mathbf{I} +   \mathbf{Z} \mathbf{Q}  \mathbf{Z}^T\right)
\end{align}

In [1]:
import statsmodels
from patsy import dmatrices, dmatrix, demo_data

In [2]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas
from sklearn.preprocessing import LabelEncoder

In [3]:
tab = pandas.read_csv("./data/sleepstudy.csv")

In [4]:
Y, X = dmatrices("Reaction ~ Days", tab)

In [5]:
def _build_ranef_model_matrix(tab, factor, ranef):
    inter_tab = tab[[factor, ranef]].copy()    
    inter_tab['grp'] = LabelEncoder().fit_transform(tab.Subject)
    inter_tab = inter_tab[[factor, "grp"]].pivot(columns=factor).reindex()
    inter_tab.values[np.isfinite(inter_tab.values)] = 1
    inter_tab.values[np.isnan(inter_tab.values)] = 0
    
    slope_tab = tab[[factor, ranef]].copy().pivot(columns=factor).reindex()
    slope_tab.values[np.isnan(slope_tab.values)] = 0
    
    Z = pd.concat([inter_tab, slope_tab], axis=1, sort=True)
    Z = Z.reindex(sorted(Z.columns, key=lambda x: x[1]), axis=1)
    
    return Z
Z = _build_ranef_model_matrix(tab, "Subject", "Days")

In [7]:
import scipy.stats

In [10]:
nu = scipy.stats.norm.rvs(size=10).reshape((2, 5))
nu

array([[ 0.11810381,  2.38824922, -1.63684812, -0.17010394,  0.70563158],
       [-0.74328986,  1.35520054,  0.14254812, -0.19973519,  0.98262199]])

In [11]:
G = scipy.zeros(shape=(Z.shape[1], Z.shape[1]))

In [13]:
Z.shape[1]

36