R/wordfish.R

#' Estimate a Wordfish Model
#' 
#' Estimates a Wordfish model using Conditional Maximum Likelihood.
#' 
#' Fits a Wordfish model with document ideal points constrained to mean zero
#' and unit standard deviation.
#' 
#' The \code{control} list specifies options for the estimation process.
#' \code{conv.check} is either 'll' which stops when the difference
#' in log likelihood between iterations is less than \code{tol}, or 'cor'
#' which stops when one minus the correlation between the \code{theta}s
#' from the current and the previous iterations is less  
#' than \code{tol}. \code{sigma} is the standard deviation for the beta 
#' prior in poisson form. \code{startparams} is a list of starting values
#' (\code{theta}, \code{beta}, \code{psi} and \code{alpha}) or a 
#' previously fitted Wordfish model for the same data.
#' \code{verbose} generates a running commentary during estimation
#' 
#' The model has two equivalent forms: a poisson model with two sets of
#' document and two sets of word parameters, and a multinomial with two sets of
#' word parameters and document ideal points.  The first form is used for
#' estimation, the second is available for alternative summaries, prediction, 
#' and profile standard error calculations.
#' 
#' The model is regularized by assuming a prior on beta with mean zero and
#' standard deviation sigma (in poisson form).  If you don't want to
#' regularize, set beta to a large number.
#' 
#' @param wfm a word frequency matrix
#' @param dir set global identification by forcing \code{theta[dir[1]]} <
#' \code{theta[dir[2]]} (defaults to first and last document)
#' @param control list of estimation options
#' @param verbose produce a running commentary
#' @return An object of class wordfish.  This is a list containing:
#' 
#' \item{dir}{global identification of the dimension} 
#' \item{theta}{document
#' positions} 
#' \item{alpha}{document fixed effects} 
#' \item{beta}{word slope
#' parameters} 
#' \item{psi}{word fixed effects} 
#' \item{docs}{names of the documents} 
#' \item{words}{names of words} 
#' \item{sigma}{regularization parameter for betas in poisson form}
#' \item{ll}{final log likelihood} 
#' \item{se.theta}{standard errors for document position} 
#' \item{data}{the original data}
#' 
#' @author Will Lowe
#' @importFrom numDeriv hessian
#' @seealso \code{\link{plot.wordfish}}, \code{\link{summary.wordfish}},
#' \code{\link{coef.wordfish}}, \code{\link{fitted.wordfish}},
#' \code{\link{predict.wordfish}}, \code{\link{sim.wordfish}}
#' @references Slapin and Proksch (2008) 'A Scaling Model for Estimating
#' Time-Series Party Positions from Texts.' American Journal of Political
#' Science 52(3):705-772.
#' @examples
#' 
#' dd <- sim.wordfish()
#' wf <- wordfish(dd$Y)
#' summary(wf)
#' 
#' @importFrom methods is
#' @importFrom stats dmultinom cor median optim optimize
#' @importFrom utils flush.console
#' @export
wordfish <- function(wfm,
                     dir=c(1, length(docs(wfm))),
                     control=list(tol=1e-06, sigma=3,
                                  startparams=NULL,
                                  conv.check=c('ll', 'cor')), 
                     verbose=FALSE){
  thecall <- match.call()
  dir <- dir
  
  if (!is.wfm(wfm))
    stop("First argument must be an object of type wfm")
  
  ## strip out any words that for whatever reason do not occur in any document
  wm <- wordmargin(wfm)
  origV <- length(words(wfm))
  uninformative <- apply(wfm, wm, sum) == 0
  if (sum(uninformative)>0 & verbose)
    cat('Ejecting words that never occur: ', paste((1:origV)[uninformative], sep=','), '\n')
  
  good.words <- words(wfm)[!uninformative] ## not indices, words
  if (wm==1)
    wfm <- wfm[good.words,]
  else
    wfm <- wfm[,good.words]
  
  #Y <- as.worddoc(wfm)
  tY <- as.docword(wfm)
  D  <- NROW(tY)
  V  <- NCOL(tY)
  wfm <- NULL   ## one version is quite enough
  
  ## enforce control defaults
  tol <- ifelse(is.null(control$tol), 1e-06, control$tol)
  sigma <- ifelse(is.null(control$sigma), 3, control$sigma)
  conv.check <- ifelse(is.null(control$conv.check), 'll', control$conv.check)
  
  optimfail <- "Warning: optim failed to converge"
  
  ## the actual likelihood function
  LL <- function(params, tY){
    logexpected <- outer(params$theta, params$beta) +
      outer(params$alpha, params$psi, "+")
    sum(sum(tY * logexpected - exp(logexpected)))  - sum(0.5*(params$beta^2/sigma^2))
  }
  
  ## Objective functions are _minus_ log likelihoods (for optim)
  
  ## estimate all psi and beta
  LL.psi.beta <- function(p, y, theta, alpha, sigma) {
    beta <- p[1]
    psi  <- p[2]
    eta  <- psi + beta*theta + alpha
    ll <- sum(eta*y - exp(eta)) - 0.5*(beta^2/sigma^2)
    return(-ll)
  }
  
  DLL.psi.beta <- function(p, y, theta, alpha, sigma){
    beta <- p[1]
    psi <- p[2]
    mu <- exp(psi + beta*theta + alpha)
    dll <- c(sum(theta * (y-mu)) - beta/sigma^2,
             sum(y-mu))
    return(-dll)
  }
  
  ## estimate theta[1]
  LL.first.theta <- function(p, y, beta, psi) {
    theta <- p[1]
    eta <- psi + beta*theta   # alpha=0
    ll <- sum(eta*y - exp(eta))
    return(-ll)
  }
  
  ## estimate remaining thetas and alphas
  LL.alpha.theta <- function(p, y, beta, psi) {
    theta <- p[1]
    alpha <- p[2]
    eta <- psi + beta*theta + alpha
    ll <- sum(eta*y - exp(eta))
    return(-ll)
  }
  
  DLL.alpha.theta <- function(p, y, beta, psi){
    theta <- p[1]
    alpha <- p[2]
    mu <- exp(psi + beta*theta + alpha)
    dll <- c(sum(beta * (y-mu)),
             sum(y-mu))
    return(-dll)
  }
  
  if (!is.null(control$startparams)) {
    inc <- control$startparams
    if (is(inc, 'wordfish')){
      pars <- coef.wordfish(inc, form='poisson')
      ## fill in known word parameters if they're available
      inter <- intersect(good.words, rownames(pars$words)) 
      med.beta <- median(pars$words$beta)
      med.psi <- median(pars$words$psi)
      newpars <- matrix(rep(c(med.beta, med.psi), each=length(good.words)), 
                        ncol=2, dimnames=list(good.words, c('beta', 'psi')))
      newpars[inter,'beta'] <- pars$words[inter,'beta']
      newpars[inter,'psi'] <- pars$words[inter,'psi']
      params <- list(beta=newpars[,'beta'],
                     psi=newpars[,'psi'],
                     alpha=pars$docs$alpha,
                     theta=inc$theta)
    } else {
      ## a plain old list came in
      params <- list(beta=inc$beta,
                     psi=inc$psi,
                     alpha=inc$alpha,
                     theta=inc$theta)
      stopifnot((length(params$beta) == ncol(tY)) &&
                (length(params$psi) == ncol(tY)) &&
                (length(params$alpha) == nrow(tY)) && 
                (length(params$theta) == nrow(tY))) 
    }
  } else {
    params <- initialize.urfish(tY)
  }
  
  
  if (conv.check=='ll'){
    ll <- LL(params, tY)
    if (verbose) cat("0\tLL:", ll, "\n")
  }
  
  iter <- 1
  diff <- Inf ## the quantity we're checking is less than tol
  while (diff > tol) {
    if (conv.check=='cor') { old.theta <- params$theta }
    else { old.ll <- ll }
    
    if (verbose) cat(iter, "\t")
    
    ## Estimate first theta (alpha = 0)
    resa <- optim(par=c(params$theta[1]),
                  fn=LL.first.theta,
                  gr=NULL,
                  y=as.numeric(tY[1,]),
                  beta=params$beta,
                  psi=params$psi,
                  method=c("BFGS")
    )
    params$theta[1] <- resa$par[1]
    params$alpha[1] <- 0  ## just in case
    if (resa$convergence!=0)
      cat(optimfail, "while estimating theta[ 1 ]\n")
    
    for (i in 2:D) {
      resa <- optim(par=c(params$theta[i], params$alpha[i]),
                    fn=LL.alpha.theta,
                    gr=DLL.alpha.theta,
                    y=as.numeric(tY[i,]),
                    beta=params$beta,
                    psi=params$psi,
                    method=c('BFGS')
      )
      params$theta[i] <- resa$par[1]
      params$alpha[i] <- resa$par[2]
      if (resa$convergence!=0)
        cat(optimfail, "while estimating theta[", i, "] and alpha[", i, "]\n")
      
    }
    
    ## Z-score transformation of estimates for theta
    thetabar     <- mean(params$theta)
    params$theta <- (params$theta - thetabar) / sd(params$theta)
    
    ## oldbeta      <- params$beta
    ## params$beta  <- params$beta * sd(params$theta)
    ## params$psi   <- params$psi + oldbeta*thetabar
    
    ## global identification
    if (params$theta[dir[1]] > params$theta[dir[2]])
      params$theta <- params$theta*(-1)
    
    for (j in 1:V) {
      resa <- optim(par=c(params$beta[j], params$psi[j]),
                    fn=LL.psi.beta,
                    gr=DLL.psi.beta,
                    y=tY[,j],
                    theta=params$theta,
                    alpha=params$alpha,
                    sigma=sigma,
                    method=c('BFGS')
      )
      params$beta[j] <- resa$par[1]
      params$psi[j] <- resa$par[2]
      if (resa$convergence!=0)
        cat(optimfail, "while estimating beta[", j, "] and psi[", j, "]\n")
    }
    
    ll <- LL(params, tY)
    if (conv.check=='ll'){
      diff <- (ll - old.ll)/abs(old.ll)
      if (verbose) {
        cat("LL:", ll, "\n")
        flush.console()
      }
    } else {
      diff <- 1-cor(params$theta, old.theta) 
      if (verbose) {
        cat("1-cor:", diff, "\n")
        flush.console()
      }
    }
    
    iter <- iter + 1
  }
  
  if (verbose){
    ll <- LL(params, tY)
    cat("Final LL:", ll, "\n")
  }
  
  model <- list(dir=dir,
                theta=params$theta,
                alpha=params$alpha,
                beta=params$beta,
                psi=params$psi,
                docs=docs(tY),
                words=words(tY),
                sigma=sigma,
                ll=ll,
                data=t(tY),
                call=thecall)
  
  ## asymptotic standard errors (in multinomial form)
  mnll <- function(tt, b, p, y){
    linpred <- c(0, tt*b + p)
    dmultinom(y, size=sum(y), exp(linpred), log=TRUE)
  }
  model$se.theta <- rep(NA, D)
  for (i in 1:D){
    ## one pass maximum posterior calculation
    new.beta <- (model$beta[2:V] - model$beta[1])
    new.psi <- (model$psi[2:V] - model$psi[1])
    model$theta[i] <- optimize(mnll, interval=c(-6,6), new.beta, new.psi,
                               tY[i,], maximum=TRUE)$maximum
    ## numerical hessian
    neghess <- -hessian(mnll, model$theta[i], b=new.beta, p=new.psi, y=tY[i,])
    invneghess <- solve(neghess)
    model$se.theta[i] <- sqrt(invneghess)
  }
  
  class(model) <- c("wordfish", class(model))
  return(model)
}

##' @method print wordfish
##' @export
print.wordfish <- function(x,
                           digits=max(3,getOption('digits')-3),
                           ...){
  cat("Call:\n\t")
  print(x$call)
  cat("\nDocument Positions:\n")
  pp <- predict.wordfish(x, se.fit=TRUE, interval='confidence')
  colnames(pp) <- c("Estimate", "Std. Error", "Lower", "Upper")
  print(pp, digits=digits)
}

#' Extract Word Parameters
#' 
#' Extract word parameters beta and psi in an appropriate model
#' parameterization
#' 
#' Slope parameters and intercepts are labelled beta and psi respectively.  In
#' multinomial form the coefficient names reflect the fact that the
#' first-listed word is taken as the reference category.  In poisson form, the
#' coefficients are labeled by the words the correspond to.
#' 
#' Note that in both forms there will be beta and psi parameters, so make sure
#' they are the ones you want.
#' 
#' @param object an object of class wordfish
#' @param form which parameterization of the model to return parameters for
#' @param ... extra arguments
#' @return A data.frame of word parameters from a wordfish model in one or
#' other parameterization.
#' @author Will Lowe
#' @export
#' @method coef wordfish
#' @seealso \code{\link{wordfish}}
coef.wordfish <- function(object, form=c('poisson', 'multinomial'), ...){
  m <- object
  fm <- match.arg(form)
  if (fm == 'multinomial'){
    V <- length(m$beta)
    word.params <- data.frame(beta=(m$beta[2:V] - m$beta[1]),
                              psi=(m$psi[2:V] - m$psi[1]))
    rownames(word.params) <- paste(m$words[2:V], "/", m$words[1], sep='')
    d <- list(words=word.params)
  } else {
    word.params <- data.frame(beta=m$beta, psi=m$psi)
    rownames(word.params) <- m$words
    docs <- data.frame(alpha=m$alpha)
    rownames(docs) <- m$docs
    d <- list(words=word.params, docs=docs)
  }
  
  class(d) <- c('coef.wordfish', class(d))
  return(d)
}

print.coef.wordfish <- function(x,
                                digits=max(3,getOption('digits')-3),
                                ...){
  print(x$words, digits=digits)
  if (!is.null(x$docs))
    print(x$docs, digits=digits)
}

#' Plot the Word Parameters From a Wordfish Model
#' 
#' Plots sorted beta and optionally also psi parameters from a Wordfish model
#' 
#' 
#' @param x a fitted Wordfish model
#' @param pch Default is to use small dots to plot positions
#' @param psi whether to plot word fixed effects
#' @param ... Any extra graphics parameters to pass in
#' @return A plot of sorted beta and optionally psi parameters.
#' @author Will Lowe
#' @seealso \code{\link{wordfish}}
#' @importFrom methods is
#' @importFrom graphics dotchart text plot
#' @export
#' @method plot coef.wordfish
plot.coef.wordfish <- function(x, pch=20, psi=TRUE, ...){
  
  if (!is(x, "coef.wordfish"))
    stop("First argument must be coefficients from a Wordfish model")
  
  if (is.null(x$docs))
    stop(paste("Plotting word parameters in the multinomial parameterization",
               "probably won't\n  be very informative.  Try plotting the value",
               "of coef(model, form='poisson')"))
  
  ord <- order(x$words$beta)
  
  if (!psi){
    dotchart(x$words$beta[ord],
             labels=row.names(x$words)[ord],
             pch=pch, ...)
  } else {
    plot(x$words$beta[ord],
         x$words$psi[ord],
         type='n',
         xlab="Beta",
         ylab="Psi", ...)
    text(x$words$beta[ord],
         x$words$psi[ord],
         row.names(x$words)[ord])
  }
}

#' Summarize a Wordfish Model
#' 
#' Summarises estimated document positions from a fitted Wordfish model
#' 
#' if `level' is passed to the function, e.g. 0.95 for 95 percent confidence,
#' this generates the appropriate width intervals.
#' 
#' @param object fitted wordfish model
#' @param level confidence interval coverage
#' @param ... extra arguments, e.g. level
#' @return A data.frame containing estimated document position with standard
#' errors and confidence intervals.
#' @author Will Lowe
#' @seealso \code{\link{wordfish}}
#' @export
#' @method summary wordfish
summary.wordfish <- function(object, level=0.95, ...){
  m <- object
  pp <- predict.wordfish(m, se.fit=TRUE, interval='confidence')
  colnames(pp) <- c("Estimate", "Std. Error", "Lower", "Upper")
  rownames(pp) <- m$docs
  ret <- list(model=m, scores=pp)
  class(ret) <- c('summary.wordfish', class(ret))
  return(ret)
}

#' @method print summary.wordfish
#' @export
print.summary.wordfish <- function(x,
                                   digits=max(3,getOption('digits')-3),
                                   ...){
  cat("Call:\n\t")
  print(x$model$call)
  cat("\nDocument Positions:\n")
  print(x$scores, digits=digits)
}

#' Predict Method for Wordfish
#' 
#' Predicts positions of new documents using a fitted Wordfish model
#' 
#' Standard errors for document positions are generated by numerically
#' inverting the relevant Hessians from the profile likelihood of the
#' multinomial form of the model.
#' 
#' @param object A fitted wordfish model
#' @param newdata An optional data frame or object of class wfm in which to
#' look for word counts to predict document ideal points which to predict.  If
#' omitted, the fitted values are used.
#' @param se.fit A switch indicating if standard errors are required.
#' @param interval Type of interval calculation
#' @param level Tolerance/confidence level
#' @param ... further arguments passed to or from other methods.
#' @return \code{predict.wordfish} produces a vector of predictions or a matrix
#' of predictions and bounds with column names `fit' and `se.fit', and with
#' `lwr', and `upr' if `interval' is also set.
#' @author Will Lowe
#' @importFrom numDeriv hessian
#' @seealso \code{\link{wordfish}}
#' @export
#' @importFrom stats dmultinom optimize qnorm
#' @method predict wordfish
predict.wordfish <- function(object,
                             newdata=NULL,
                             se.fit=FALSE,
                             interval=c("none", "confidence"),
                             level=0.95,
                             ...){
  
  m <- object
  if (is.null(newdata)){
    ## use the original data stored in the m
    newd <- as.docword(m$data)
  } else {
    if (is.wfm(newdata))
      newd <- as.docword(newdata)
    else
      stop("Use as.wfm to convert newdata into a suitable format")
  }
  
  interval <- match.arg(interval)
  if (interval != "none")
    se.fit=TRUE
  
  ## asymptotic standard errors
  mnll <- function(tt, b, p, y){
    linpred <- c(0, tt*b + p)
    dmultinom(y, size=sum(y), exp(linpred), log=TRUE)
  }
  
  V <- length(m$beta)
  new.beta <- (m$beta[2:V] - m$beta[1])
  new.psi <- (m$psi[2:V] - m$psi[1])
  
  preds <- rep(NA, NROW(newd))
  preds.se <- rep(NA, NROW(newd))
  for (i in 1:NROW(newd)){
    preds[i] <- optimize(mnll, interval=c(-6,6), new.beta, new.psi, newd[i,], maximum=TRUE)$maximum
    if (se.fit){
      neghess <- -hessian(mnll, preds[i], b=new.beta, p=new.psi, y=newd[i,])
      invneghess <- solve(neghess)
      preds.se[i] <- sqrt(invneghess)
    }
  }
  
  if (se.fit){
    res <- data.frame(fit=preds, se.fit=preds.se)
    rownames(res) <- rownames(newd)
    if (interval == "confidence"){
      z <- qnorm(1-(1-level)/2)
      res$lwr <- preds - z * preds.se
      res$upr <- preds + z * preds.se
    }
  } else {
    res <- preds
    names(res) <- rownames(newd) ## we can at least label it
  }
  return(res)
}

#' Plot a Wordfish Model
#' 
#' Plots a fitted Wordfish model with confidence intervals
#' 
#' 
#' @param x a fitted Wordfish model
#' @param truevals True document positions if known
#' @param level Intended coverage of confidence intervals
#' @param pch Default is to use small dots to plot positions
#' @param ... Any extra graphics parameters to pass in
#' @return A plot of sorted estimated document positions, with confidence
#' intervals and true document positions, if these are available.
#' @author Will Lowe
#' @export
#' @importFrom methods is
#' @importFrom stats cor
#' @importFrom graphics dotchart segments points text title
#' @importFrom grDevices rgb
#' @seealso \code{\link{wordfish}}
#' @method plot wordfish
plot.wordfish <- function(x,
                          truevals=NULL,
                          level=0.95,
                          pch=20,
                          ...){
  
  if (!is(x, "wordfish"))
    stop("First argument must be a wordfish model")
  
  vv       <- as.data.frame(predict.wordfish(x, se.fit=TRUE, 
                                             interval='conf', level=level))
  ord      <- order(vv$fit)
  theta    <- vv$fit[ord]
  upper    <- vv$upr[ord]
  lower    <- vv$lwr[ord]
  
  name.theta <- x$docs[ord]
  
  if (!is.null(truevals)){
    truevals <- truevals[ord]
    lims <- c(min(c(theta, truevals, lower)), max(c(theta, truevals, upper)))
    dotchart(theta, labels=name.theta, xlim=lims, pch=pch, ...)
    segments(lower, 1:length(theta), upper, 1:length(theta))
    points(truevals, 1:length(theta), col=rgb(139/255,0,0,0.75), pch=pch)
    title(paste('r =', format(cor(truevals, x$theta), digits=4)))
  } else {
    lims <- c(min(c(theta, lower)), max(c(theta, upper)))
    dotchart(theta, labels=name.theta, xlim=lims, pch=pch, ...)
    segments(lower, 1:length(theta), upper, 1:length(theta))
  }
}

##if (grp){
##    group.med <- aggregate(x$theta, by=list(groups), median)
##    groups <- reorder(groups, order(group.med))
##}
##if (scale){
##    sdt <- sd(theta)
##    theta <- (theta - mean(theta)) / sdt
##    if (se){ se.theta <- se.theta / sdt }
##}


#' Simulate data and parameters for a Wordfish model
#' 
#' Simulates data and returns parameter values using Wordfish model
#' assumptions: Counts are sampled under the assumption of independent Poisson
#' draws with log expected means linearly related to a lattice of document
#' positions.
#' 
#' This function draws `docs' document positions from a Normal distribution, or
#' regularly spaced between 1/`docs' and 1.
#' 
#' `vocab'/2 word slopes are 1, the rest -1.  All word intercepts are 0.
#' `doclen' words are then sampled from a multinomial with these parameters.
#' 
#' Document position (theta) is sorted in increasing size across the documents.
#' If `scaled' is true it is normalized to mean zero, unit standard deviation.
#' This is most helpful when dist=normal.
#' 
#' @param docs How many `documents' should be generated
#' @param vocab How many `word' types should be generated
#' @param doclen A scalar `document' length or vector of lengths
#' @param dist the distribution of `document' positions
#' @param scaled whether the document positions should be mean 0, unit sd
#' @return \item{Y}{A sample word-document matrix} \item{theta}{The `document'
#' positions} \item{doclen}{The `document' lengths} \item{beta}{`Word'
#' intercepts} \item{psi}{`Word' slopes}
#' @author Will Lowe
#' @importFrom stats rnorm rmultinom
#' @export sim.wordfish
sim.wordfish <- function(docs=10,
                         vocab=20,
                         doclen=500,
                         dist=c("spaced", "normal"),
                         scaled=TRUE){
  
  if ((vocab %% 4)>0)
    stop("This function assumes you have vocab divisible by 4")
  
  tdist <- match.arg(dist)
  
  if (tdist=="spaced")
    theta <- seq(1/docs, 1, 1/docs)
  else if (tdist=="normal")
    theta <- rnorm(docs)
  else
    stop("Unrecognized dist parameter")
  
  if (scaled){ theta <- scale(theta) }
  
  theta <- theta[order(theta)]  ## sorted for convenience
  
  if (length(doclen)==1)
    doclen <- rep(doclen, docs)
  
  b0 <- c(0, rep(0, vocab/2-1),
          rep(1, vocab/2))
  b1 <- c(0, rep(0, vocab/4-1),
          rep(1, vocab/4),
          rep(0, vocab/4),
          rep(1, vocab/4))
  
  data <- matrix(data=0, nrow=vocab, ncol=docs)
  mu <- matrix(data=0, nrow=vocab, ncol=docs)
  for (d in 1:docs){
    mu[,d] <- exp(b1 * theta[d] + b0)
    data[,d] <- rmultinom(1, doclen[d], mu[,d]) # unnormalized probs
  }
  
  zfill <- function(vals){
    ## assumes vals is an int vector
    v <- as.character(vals)
    wid <- max(apply(as.matrix(v) , 1, nchar))
    formatstr <- paste("%", wid, ".f", sep='')
    newv <- gsub(' ', '0', sprintf(formatstr, vals))
    return(newv)
  }
  
  rownames(data) <- paste("W", zfill(1:vocab), sep="")
  colnames(data) <- paste("D", zfill(1:docs), sep="")
  Y <- wfm(data, word.margin=1)
  
  val <- list(Y=Y,
              theta=theta,
              doclen=colSums(Y),
              psi=b0,
              beta=b1)
  class(val) <- c('wordfish.simdata', class(val))
  
  return(val)
}

#' Get Fitted Values from a Wordfish Model
#' 
#' Extracts the estimated word rates from a fitted Wordfish model
#' 
#' 
#' @param object a fitted Wordfish model
#' @param ... Unused
#' @return Expected counts in the word frequency matrix
#' @author Will Lowe
#' @export
#' @method fitted wordfish
fitted.wordfish <- function(object, ...){
  m <- object
  n <- as.numeric(colSums(m$data))
  eta <- outer(m$theta, m$beta) + outer(m$alpha, m$psi, "+")
  mm <- apply(exp(eta), 1, function(x){ x / sum(x) })
  mm <- mm * outer(rep(1,length(m$beta)), n)
  colnames(mm) <- m$docs
  rownames(mm) <- m$words
  return(t(mm))
}

#' initialize.urfish
#' 
#' Get cheap starting values for a Wordfish model
#' 
#' This function is only called by model fitting routines and does therefore
#' not take a wfm classes.  tY is assumed to be in document by term form.
#' 
#' In the poisson form of the model incidental parameters (alpha) are set to
#' log(rowmeans/rowmeans[1]).  intercept (psi) values are set to log(colmeans)
#' These are subtracted from a the data matrix, which is logged and decomposed
#' by SVD.  Word slope (beta) and document position (theta) are estimated by
#' rescaling SVD output.
#' 
#' @param tY a document by word matrix of counts
#' @return List with elements: \item{alpha}{starting values of alpha
#' parameters} \item{psi}{starting values of psi parameters}
#' \item{beta}{starting values of beta parameters} \item{theta}{starting values
#' for document positions}
#' @author Will Lowe
#' @references This is substantially the method used by Slapin and Proksch's
#' original code.
#' @export initialize.urfish
initialize.urfish <- function(tY){
  ## mostly the initialization routine from S&P
  
  D             <- nrow(tY)
  V             <- ncol(tY)
  numword       <- rep(1:V, each=D)
  numdoc        <- rep(1:D, V)
  
  dat           <- matrix(1, nrow=V*D, ncol=3)
  dat[,1]       <- as.vector(as.matrix(tY))
  dat[,2]       <- as.vector(numword)
  dat[,3]       <- as.vector(numdoc)
  dat           <- data.frame(dat)
  
  colnames(dat) <- c("y", "word", "doc")
  dat$word      <- factor(dat$word)
  dat$doc       <- factor(dat$doc)
  
  b0            <- log(colMeans(tY))
  
  rmeans        <- rowMeans(tY)
  alpha         <- log(rmeans/rmeans[1])
  
  ystar         <- log(dat$y+0.1) - alpha[dat$doc] - b0[dat$word]
  ystarm        <- matrix(ystar, D, V, byrow=FALSE)
  res           <- svd(ystarm, nu=1)
  b             <- as.numeric(res$v[,1]*res$d[1])
  
  ## normalize
  th            <- as.vector(res$u)-res$u[1,1]
  theta         <- th/sd(th)
  b1            <- b*sd(th)
  
  params        <- list(alpha=as.numeric(alpha),
                        psi=as.numeric(b0),
                        beta=b1,
                        theta=theta)
  return(params)
}

#' Compute Bootstrap Standard Errors
#' 
#' Computes bootstrap standard errors for document positions from a fitted
#' Wordfish model
#' 
#' This function computes a parametric bootstrap by resampling counts from the
#' fitted word counts, refitting the model, and storing the document positions.
#' The standard deviations for each resampled document position are returned.
#' 
#' @param object a fitted Wordfish model
#' @param L how many replications
#' @param verbose Give progress updates
#' @param ... Unused
#' @return Standard errors for document positions
#' @author Will Lowe
#' @importFrom stats rpois sd
#' @importFrom methods is
#' @export bootstrap.se
bootstrap.se <- function(object, L=50, verbose=FALSE, ...){
  if (is(object, 'wordfish')){
    m <- object
    b <- m$beta
    p <- m$psi
    a <- m$alpha
    th <- m$theta
    
    D <- length(th)
    V <- length(b)
    
    mtheta <- matrix(0, nrow=D, ncol=L)
    logexpected <- outer(th, b) + outer(a, p, "+")
    lambda <- exp(logexpected)
    for (l in 1:L){
      if (verbose)
        cat(paste("iter:", l, "\n"))
      
      mat <- matrix(rpois(rep(1, D*V), as.vector(lambda)), nrow=D)
      rownames(mat) <- m$docs
      colnames(mat) <- m$words
      newY <- wfm(mat, word.margin=2)
      newparams <- wordfish(newY, dir=m$dir,
                            control=list(sigma=m$sigma,
                                         startparams=m,
                                         tol=m$tol),
                            verbose=FALSE)
      mtheta[,l] <- newparams$theta
    }
  } else {
    stop("Parametric bootstrap is only implemented for wordfish models")
  }
  
  se <- data.frame(boot.se=apply(mtheta, 1, sd))
  return(se)
}