version 0.1-1

cran · Feb 9, 2000 · 74783f7 · 74783f7
commit 74783f7
Show file tree

Hide file tree

Showing 77 changed files with 5,534 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,12 @@
+Package: SASmixed
+Version: 0.1-1
+Date: 2000/02/09
+Title: Data sets from "SAS System for Mixed Models"
+Author: Original by Littel, Milliken, Stroup, and Wolfinger
+  modifications by Douglas Bates <bates@stat.wisc.edu> and
+  Jose Pinheiro <jcp@bell-labs.com>
+Description: Data sets and sample lme analyses corresponding
+  to the examples in Littel, Milliken, Stroup and Wolfinger
+  (1996), "SAS System for Mixed Models", SAS Institute.
+Depends: nlme
+License: GPL version 2 or later
diff --git a/INDEX b/INDEX
@@ -0,0 +1,23 @@
+Animal                   Animal breeding experiment
+AvgDailyGain             Average daily weight gain of steers on different
+                         diets
+BIB                      Data from a balanced incomplete block design
+Bond                     Strengths of metal bonds
+Cultivation              Bacterial innoculation applied to grass
+                         cultivars
+Demand                   Per-capita demand deposits by state and year
+Genetics                 Heritability data
+HR                       Heart rates of patients on different drug
+                         treatments
+IncBlk                   An unbalanced incomplete block experiment
+Mississippi              Nitrogen concentrations in the Mississippi River
+Multilocation            A multilocation trial
+PBIB                     A partially balanced incomplete block experiment
+SIMS                     Second International Mathematics Study data
+Semi2                    Oxide layer thicknesses on semiconductors
+Semiconductor            Semiconductor split-plot experiment
+TeachingI                Teaching Methods I
+TeachingII               Teaching Methods II
+WWheat                   Winter wheat
+WaferTypes               Data on different types of silicon wafers
+Weights                  Data from a weight-lifting program
diff --git a/R/zzz.R b/R/zzz.R
@@ -0,0 +1,25 @@
+### $Id: zzz.R,v 1.1 2000/02/09 18:41:49 bates Exp $
+###
+###    Sample analyses of data from SAS System for Mixed Models
+###
+### Copyright 2000-2000  Douglas M. Bates <bates@stat.wisc.edu>
+###               
+###
+### This file is part of the SASmixed library for R and related languages.
+### It is made available under the terms of the GNU General Public
+### License, version 2, or at your option, any later version,
+### incorporated herein by reference.
+### 
+### This program is distributed in the hope that it will be
+### useful, but WITHOUT ANY WARRANTY; without even the implied
+### warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+### PURPOSE.  See the GNU General Public License for more
+### details.
+### 
+### You should have received a copy of the GNU General Public
+### License along with this program; if not, write to the Free
+### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+### MA 02111-1307, USA
+
+### Force loading of the nlme package for the examples.
+require(nlme)
diff --git a/TITLE b/TITLE
@@ -0,0 +1 @@
+SASmixed        Data sets from "SAS System for Mixed Models"
diff --git a/Usinglme.bib b/Usinglme.bib
@@ -0,0 +1,21 @@
+@String{id   = "$Id: Usinglme.bib,v 1.1 1999/10/13 00:50:09 saikat Exp $"}
+
+@Article{	  lair:ware:1982,
+  author	= {Laird, Nan M. and Ware, James H.},
+  title		= {Random-effects Models for Longitudinal Data},
+  journal	= {Biometrics},
+  year		= 1982,
+  volume	= 38,
+  pages		= {963--974},
+  keywords	= {Variance components; Repeated measurements; Empirical
+		  Bayes; EM algorithm}
+}
+
+@Book{litt:mill:stro:wolf:1996,
+  author =	 {Ramon C. Littell and George A. Milliken and Walter
+                  W. Stroup and Russell D. Wolfinger},
+  title = 	 {SAS System for Mixed Models},
+  publisher = 	 {SAS Institute, Inc.},
+  year = 	 1996
+}
+
diff --git a/Usinglme.tex b/Usinglme.tex
@@ -0,0 +1,285 @@
+%%% $Id: Usinglme.tex,v 1.1 1999/10/13 00:50:09 saikat Exp $
+\documentclass[]{article}
+\NeedsTeXFormat{LaTeX2e}[1995/06/01]
+\usepackage[dcu,dcucite]{harvard}
+\usepackage{times}
+\usepackage{amsbsy}
+\usepackage{amsmath}
+\usepackage{graphicx}
+\usepackage{alltt}
+\graphicspath{{figs/}}
+\DeclareGraphicsExtensions{.epsi.gz,.epsi,.eps.gz,.eps,.ps,.ps.gz}
+\DeclareGraphicsRule{*}{eps}{*}{}
+\DeclareGraphicsRule{.epsi.gz}{eps}{.bb}{`zcat #1}
+\DeclareGraphicsRule{.eps.gz}{eps}{.bb}{`zcat #1}
+\DeclareGraphicsRule{.ps.gz}{eps}{.bb}{`zcat #1}
+\setkeys{Gin}{width=\textwidth}
+\input{defs}
+\newenvironment{Example}%
+{\begin{list}{}{\setlength{\leftmargin}{.5\parindent}}%
+  \item\begin{alltt}\small}%
+    {\end{alltt}\end{list}}
+\pagenumbering{arabic}
+\begin{document}
+\title{\textbf{\textsf{lme} for \textsf{SAS PROC MIXED} Users}}
+\author{Douglas M.~Bates\smallskip\\
+  \textsf{\normalsize Department of Statistics}\\[-0.3ex]
+  \textsf{\normalsize University of Wisconsin -- Madison}
+  \thanks{This research was supported by the National Science
+    Foundation through grant DMS-9704349.}
+  \and  Jos\'e C.~Pinheiro\smallskip\\
+  \textsf{\normalsize Bell Laboratories}\\[-0.3ex]
+  \textsf{\normalsize Lucent Technologies}}
+\date{}
+\maketitle
+
+\section{Introduction}
+\label{sec:intro}
+
+The \Co{lme} function from the \Co{nlme} library for \Splus{} or the
+\Co{lme} library for \R{} is used to fit linear mixed-effects models.
+It is similar in scope to the \textsf{SAS} procedure \Co{PROC MIXED}
+described in \citeasnoun{litt:mill:stro:wolf:1996}.
+
+A file on the SAS Institute web site (\textsf{http://www.sas.com})
+contains all the data sets in the book and all the SAS programs used
+in \citeasnoun{litt:mill:stro:wolf:1996}.  We have converted the data
+sets from the tabular representation used for SAS to the
+\Co{groupedData} objects used by \Co{lme}.  To help users familiar
+with \Co{SAS PROC MIXED} get up to speed with \Co{lme} more quickly,
+we provide transcripts of some \Co{lme} analyses paralleling the
+\Co{SAS PROC MIXED} analyses in \citeasnoun{litt:mill:stro:wolf:1996}.
+
+In this paper we highlight some of the similarities and differences of
+\Co{lme} analysis and \Co{SAS PROC MIXED} analysis.
+
+\section{Similarities between lme and SAS PROC MIXED}
+\label{sec:similarities}
+
+Both \Co{SAS PROC MIXED} and \Co{lme} can fit linear mixed-effects
+models expressed in the Laird-Ware formulation.  For a single level of
+grouping \citeasnoun{lair:ware:1982} write the $n_i\/$-dimensional
+response vector $\by_i$ for the $i\/$th experimental unit as
+\begin{gather}
+  \label{eqn:oneLevel}
+  \by_i = \bX_i \bbeta + \bZ_i \bb_i + \beps_i,\quad i=1,\dots,M\\
+  \bb_i\sim\gaus(\bzer,\bSigma),
+  \quad\beps_i\sim\gaus(\bzer,\sigma^2 \bI)\notag
+\end{gather}
+where $\bbeta$ is the $p$-dimensional vector of \emph{fixed effects},
+$\bb_i$ is the $q$-dimensional vector of \emph{random effects},
+$\bX_i$ (of size $n_i\times p$) and $\bZ_i$ (of size $n_i\times q$)
+are known fixed-effects and random-effects regressor matrices, and
+$\beps_i$ is the $n_i\/$-dimensional \emph{within-group error} vector
+with a spherical Gaussian distribution.  The assumption
+$\mathrm{Var}(\beps_i)=\sigma^2\bI$ can be relaxed using additional
+arguments in the model fitting.
+
+The basic specification of the model requires a linear model
+expression for the fixed effects and a linear model expression for the 
+random effects.  In \Co{SAS PROC MIXED} the fixed-effects part is
+specified in the \Co{model} statement and the random-effects
+part in the \Co{random} statement.  In \Co{lme} the
+arguments are called \Co{fixed} and \Co{random}.
+
+Both \Co{SAS PROC MIXED} and \Co{lme} allow a mixed-effects model to
+be fit by maximum likelihood (\Co{method = ml} in SAS) or by maximum
+residual likelihood, sometimes also called restricted maximum
+likelihood or \textsf{REML}.  This is the default criterion in \Co{SAS
+  PROC MIXED}.  The default criterion in \Co{lme} is maximum
+likelihood.  To get \textsf{REML} estimates in \Co{lme}, set the
+optional argument \Co{REML=TRUE}.
+
+
+\section{Important differences}
+\label{sec:differences}
+
+One of the most important differences has just been stated but is
+worth repeating.  SAS defaults to \textsf{REML} fits; \Co{lme}
+defaults to maximum likelihood fits.
+
+The output from \Co{PROC MIXED} typically includes values of the
+Akaike Information Criterion (\textsf{AIC}) and Schwartz's Bayesian
+Criterion (\textsf{SBC}).  These are used to compare different models
+fit to the same data.  The output of the \Co{summary} function applied
+to the object created by \Co{lme} also produces values of \textsf{AIC}
+and \textsf{BIC} but the definitions used in \Co{PROC MIXED} and in
+\Co{lme} are different.  In \Co{lme} the definitions are such that
+``smaller is better''.  In \Co{PROC MIXED} the definitions are such
+that ``bigger is better''.
+
+When models are fit by \textsf{REML}, the values of \textsf{AIC},
+\textsf{SBC} (or \textsf{BIC}) and the log-likelihood can only be
+compared between models with exactly the same fixed-effects structure.
+When models are fit by maximum likelihood these criteria can be
+compared between any models fit to the same data.  That is, these
+quality-of-fit criteria can be used to evaluate different
+fixed-effects specifications or different random-effects
+specifications or different specifications of both fixed effects and
+random effects.  The greater flexibility of model comparisons when
+using maximum likelihood is the reason that this is the default
+criterion in \Co{lme}.
+
+We encourage developing and testing the model using likelihood ratio
+tests or the \textsf{AIC} and \textsf{BIC} criteria.  Once a form
+for both the random effects and the fixed effects has been determined,
+the model can be refit with \Co{REML = TRUE} if the restricted
+estimates of the variance components are desired.
+
+\section{Data manipulation}
+\label{sec:data}
+
+Both \Co{PROC MIXED} and \Co{lme} work with data in a tabular form
+with one row per observation.  There are, however, important
+differences in the internal representations of variables in the data.
+
+In \textsf{SAS} a qualitative factor can be stored either as numerical
+values or alphanumeric labels.  When a factor stored as numerical
+values is used in \Co{PROC MIXED} it is listed in the \Co{class}
+statement to indicate that it is a factor.  In \s{} this information
+is stored with the data itself by converting the variable to a factor
+when it is first stored.  If the factor represents an ordered set of
+levels, it should be converted to an \Co{ordered} factor.
+
+For example the SAS code
+\begin{Example}
+data animal;
+ input trait animal y;
+ datalines;
+1 1 6
+1 2 8
+1 3 7
+2 1 9
+2 2 5
+2 3 .
+;
+\end{Example}
+would require that the \Co{trait} and \Co{animal} variables be
+specified in a class statement in any model that is fit.
+
+In \s{} these data could be read from a file, say \texttt{animal.dat},
+and converted to factors by
+\begin{Example}
+S> animal <- read.table( "animal.dat", header = TRUE )
+S> class( animal )
+[1] "data.frame"
+S> animal$trait <- as.factor( animal$trait )
+S> animal$animal <- as.factor( animal$animal )
+\end{Example}
+In general it is a good idea to check the types of variables in a data 
+frame before working with it.  One way of doing this is to apply
+the function \textsf{data.class} to each variable in turn using the
+\Co{sapply} function.
+\begin{Example}
+S> sapply( animal, data.class )
+    trait    animal         y 
+ "factor"  "factor" "numeric" 
+\end{Example}
+
+To make specification of models in \Co{lme} easier and to make graphic
+presentations more informative, we recommend converting from a
+\Co{data.frame} object to a \Co{groupedData} object.  This class of
+objects contains a formula specifying the response, the primary
+covariate (if there is one) and the grouping factor or factors.  The
+data sets from \citeasnoun{litt:mill:stro:wolf:1996} have been
+converted to \Co{groupedData} objects in this directory.
+
+\subsection{Unique levels of factors}
+\label{sec:nested}
+
+Designs with nested grouping factors are indicated differently in the
+two languages.  An example of such an experimental design is the
+semiconductor experiment described in section 2.2 of
+\citeasnoun{litt:mill:stro:wolf:1996} where twelve wafers are 
+assigned to four experimental treatments with three wafers per
+treatment.  The levels for the wafer factor are \Co{1}, \Co{2}, and
+\Co{3} but the wafer factor is only meaningful within the same level
+of the treatment factor, \Co{et}.  There is nothing associating wafer
+\Co{1} of the third treatment group with wafer \Co{1} of the first
+treatment group.
+
+In \Co{SAS} this nesting of factors is denoted by \Co{wafer(et)}.  In
+\s{} the nesting is written with \Co{~ ET/Wafer} and read ``wafer
+within ET''.  If both levels of nested factors are to be associated
+with random effects then this is all you need to know.  You would use
+an expression with a \Co{"/"} in the grouping factor part of the
+formula for the \Co{groupedData} object.  Then the random effects
+could be specified as
+\begin{Example}
+  random = list( ET = ~ 1, Wafer = ~ 1 )
+\end{Example}
+or, equivalently
+\begin{Example}
+  random = ~ 1 | ET/Wafer
+\end{Example}
+
+In this case, however, there would not usually be any random effects
+associated with the ``experimental treatment'' or \Co{ET} factor.  The 
+only random effects are at the \Co{Wafer} level.  It is necessary to
+create a factor that will have unique levels for each \Co{Wafer}
+within each level of \Co{ET}.  One way to do this is to assign
+\begin{Example}
+S> Semiconductor$Grp <-
++  getGroups( Semiconductor, form = ~ ET / Wafer, level = 2 )
+S> levels( Semiconductor$Grp )  # check on the distinct levels
+ [1] "1/1" "1/2" "1/3" "2/1" "2/2" "2/3" "3/1" "3/2" "3/3" "4/1"
+[11] "4/2" "4/3"
+\end{Example}
+after which we could specify \Co{random = ~ 1 | Grp}.
+
+\subsection{General approach}
+\label{sec:generalApproach}
+
+As a general approach to importing data into \s{} for mixed-effects
+analysis you should:
+\begin{itemize}
+\item Create a \Co{data.frame} with one row per observation and one
+  column per variable.
+\item Use \Co{ordered} or \Co{as.ordered} to explicitly convert any
+  ordered factors to class \Co{ordered}.
+\item Use \Co{ordered} or \Co{as.ordered} to explicitly convert any
+  ordered factors to class \Co{ordered}.
+\item If necessary, use \Co{getGroups} to create a factor with unique
+  levels from inner nested factors.
+\item Specify the formula for the response, the primary covariate and
+  the grouping structure to create a \Co{groupedData} object from the
+  data frame.  Labels and units for the response and the primary
+  covariate can also be specified at this time as can \Co{outer} and
+  \Co{inner} factor expressions.
+\item Plot the data.  Plot it several ways.  The use of trellis
+  graphics is closely integrated with the \Co{nlme} library.  The
+  trellis plots can provide invaluable insight into the structure of
+  the data.  Use them.
+\end{itemize}
+
+\section{Contrasts}
+\label{sec:contrasts}
+
+When comparing estimates produced by \Co{SAS PROC MIXED} and by
+\Co{lme} one must be careful to consider the contrasts that are
+used to define the effects of factors.  In \textsf{SAS} a model with
+an intercept and a qualitative factor is defined in terms of the
+intercept and the indicator variables for all but the last level of
+the factor.  The default behaviour in \s{} is to use the Helmert
+contrasts for the factor.  On a balanced factor these provide a set of 
+orthogonal contrasts.  In \R{} the default is the ``treatment''
+contrasts which are almost the same as the SAS parameterization except 
+that they drop the indicator of the first level, not the last level.
+
+When in doubt, check which contrasts are being used with the
+\textsf{contrasts} function.
+
+To make comparisons easier, you may find it worthwhile to declare
+\begin{Example}
+S> options(contrasts = c(factor = "contr.SAS",
++                        ordered = "contr.poly"))
+\end{Example}
+at the beginning of your session.
+
+\bibliography{Usinglme}
+\end{document}
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
diff --git a/data/00Index b/data/00Index
@@ -0,0 +1,23 @@
+Animal                   Animal breeding experiment
+AvgDailyGain             Average daily weight gain of steers on different
+                         diets
+BIB                      Data from a balanced incomplete block design
+Bond                     Strengths of metal bonds
+Cultivation              Bacterial innoculation applied to grass
+                         cultivars
+Demand                   Per-capita demand deposits by state and year
+Genetics                 Heritability data
+HR                       Heart rates of patients on different drug
+                         treatments
+IncBlk                   An unbalanced incomplete block experiment
+Mississippi              Nitrogen concentrations in the Mississippi River
+Multilocation            A multilocation trial
+PBIB                     A partially balanced incomplete block experiment
+SIMS                     Second International Mathematics Study data
+Semi2                    Oxide layer thicknesses on semiconductors
+Semiconductor            Semiconductor split-plot experiment
+TeachingI                Teaching Methods I
+TeachingII               Teaching Methods II
+WWheat                   Winter wheat
+WaferTypes               Data on different types of silicon wafers
+Weights                  Data from a weight-lifting program