1.2.1

Wenchao-Ma · Feb 12, 2017 · 935dc80 · 935dc80
1 parent 527c793
commit 935dc80
Show file tree

Hide file tree

Showing 10 changed files with 223 additions and 172 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,15 @@
 Package: GDINA
 Type: Package
 Title: The Generalized DINA Model Framework
-Version: 1.2.0
-Date: 2017-1-28
+Version: 1.2.1
+Date: 2017-2-12
 Authors@R: c(person(given = "Wenchao",family = "Ma", role = c("aut", "cre"),email = "wenchao.ma@rutgers.edu"),person(given = "Jimmy", family = "de la Torre", role = "aut"))
-Description: A set of psychometric tools for cognitive diagnostic analyses for
-    both dichotomous and polytomous responses. Various cognitive diagnosis models
-    can be estimated, include the generalized deterministic inputs, noisy
+Description: A set of psychometric tools for cognitive diagnostic analyses
+    for both dichotomous and polytomous responses. Various cognitive diagnosis
+    models can be estimated, include the generalized deterministic inputs, noisy
     and gate (G-DINA) model by de la Torre (2011) <DOI:10.1007/s11336-011-9207-7>,
-    the sequential G-DINA model by Ma and de la Torre (2016) <DOI:10.1111/bmsp.12070>, 
-    and many other models they subsume. Joint attribute distribution can
+    the sequential G-DINA model by Ma and de la Torre (2016) <DOI:10.1111/bmsp.
+    12070>, and many other models they subsume. Joint attribute distribution can
     be saturated, higher-order or structured. Q-matrix validation, item and model
     fit statistics, model comparison at test and item level and differential item
     functioning can also be conducted. A graphical user interface is also provided.
@@ -37,8 +37,8 @@ Suggests:
 LinkingTo: Rcpp, RcppArmadillo
 URL: https://github.com/Wenchao-Ma/GDINA
 BugReports: https://github.com/Wenchao-Ma/GDINA/issues
-RoxygenNote: 5.0.1
-Collate:
+RoxygenNote: 6.0.1
+Collate: 
     'CR.R'
     'GDI.R'
     'GDINA-package.R'

diff --git a/NEWS.md b/NEWS.md
@@ -1,10 +1,22 @@
+# GDINA 1.2.1
+* Fixed     - bugs in model estimation with user specified structures
+* Fixed     - bugs in `summary.GDINA` function for multiple group estimation
+* Changed   - include the pseudo q-vector 0 for the mesa plot
+* Changed   - prior distribution is not re-calculated after likelihood calculation in `GDINA` function
+* Changed   - output for `att.prior` in `extract` function
+* Changed   - print number of group for `GDINA` function
+* Changed   - documents in `simGDINA` and `GDINA`
+* Changed   - examples in `GDINA`
+* Added     - extract `ngroup` using `extract.GDINA` function
+
 # GDINA 1.2.0
 * Fixed   - infinite value issue during M-step optimization
 * Fixed   - `itemfit` function for missing data
 * Fixed   - adding monotonic constraints for the sequential models
 * Fixed   - the maximum likelihood estimation of person attribute through `personparm`
 * Changed - package imports, and suggests
 * Changed - slsqp as the first optimizer used for monotonic G-DINA 
+* Changed - Non zero prior probabilities
 
 # GDINA 1.0.0
 

diff --git a/R/GDINA.R b/R/GDINA.R
diff --git a/R/dif.R b/R/dif.R
@@ -7,10 +7,10 @@
 #' @param parm The type of parameters associated with the Wald test for the DIF detection. It can be either \code{"itemprob"}
 #'  or \code{"delta"} for item probabilities and delta parameters, respectively.
 #' @param difitem Items for the DIF detection. By default, all items will be examined.
-#' @param SE.type Type of standard error estimation methods for Wald test.
+#' @param SE.type Type of standard error estimation methods for the Wald test.
 #' @inheritParams GDINA
-#' @param ... arguments passed to GDINA function for model calibration
-#' @return a data frame giving the Wald statistics and associated p-values.
+#' @param ... Other arguments passed to GDINA function for model calibration
+#' @return A data frame giving the Wald statistics and associated p-values.
 #'
 #' @author {Wenchao Ma, Rutgers University, \email{wenchao.ma@@rutgers.edu} \cr Jimmy de la Torre, The University of Hong Kong}
 #' @seealso \code{\link{GDINA}}
@@ -58,10 +58,7 @@ dif <- function(dat, Q, group, method = "wald", difitem = "all", parm = "delta",
   gr.label <- unique(gr)
   J <- nrow(Q)
   if(difitem == "all") difitem <- 1:J
-gr1dat <- gr2dat <- dat
-gr1dat[gr==gr.label[2],] <- NA
-gr2dat[gr==gr.label[1],] <- NA
-est <- GDINA::GDINA(cbind(gr1dat,gr2dat), rbind(Q,Q), group = gr,...)
+est <- GDINA::GDINA(bdiag(list(dat[gr==gr.label[1],],dat[gr==gr.label[2],]),NA), rbind(Q,Q), group = gr,...)
 
   if(method=="wald"){
 

diff --git a/R/extract.GDINA.R b/R/extract.GDINA.R
@@ -32,14 +32,16 @@
 #'   \item{ncat}{number of categories excluding category zero}
 #'   \item{natt}{number of attributes}
 #'   \item{nitr}{number of iterations}
+#'   \item{ngroup}{number of groups}
 #'   \item{discrim}{GDINA discrimination index}
+#'   \item{posterior.prob}{posterior weights for each latent class}
+#'   \item{att.prior}{attribute prior weights for calculating marginalized likelihood in the last iteration}
 #'   \item{time}{time used}
 #'   \item{start.time}{starting time}
 #'   \item{end.time}{end time}
 #'   \item{dat}{item responses analyzed}
 #'   \item{Q}{Q-matrix}
 #'   \item{Qc}{Qc-matrix}
-#'   \item{posterior.prob}{posterior weights for each latent class}
 #'   \item{prevalence}{prevalence of each attribute}
 #' \item{itemprob.cov}{variance-covariance matrix of item endorsement probabilities for all items}
 #' \item{itemprob.covindex}{index of the variance-covariance matrix of item endorsement probabilities for all items}
@@ -55,7 +57,6 @@
 #' \item{SE}{argument SE}
 #' \item{SE.type}{argument SE.type}
 #' \item{empirical}{argument empirical}
-#' \item{att.prior}{argument att.prior}
 #' \item{att.str}{argument att.str}
 #' \item{nstarts}{argument nstarts}
 #' \item{conv.crit}{argument conv.crit}
@@ -123,6 +124,7 @@ extract.GDINA <- function(object,what,digits=4,...){
                 nitr = object$options$itr,
                 nobs = nrow(object$options$dat),
                 nitem = ncol(object$options$dat),
+                ngroup = object$options$no.group,
                 ncat = nrow(object$options$Q),
                 natt = ifelse(object$options$sequential,ncol(object$options$Q)-2,ncol(object$options$Q)),
                 AIC = object$testfit$AIC,

diff --git a/R/internalExtract.R b/R/internalExtract.R
@@ -36,7 +36,7 @@ internalextract <- function(object, what, ...) {
         names(p) <- paste("Item",1:internalextract(object,"nitem"))
         p
       }else{
-        object$itemprob.parm
+        object$catprob.parm
       }
 
     },

diff --git a/R/plotPVAF.Qval.R b/R/plotPVAF.Qval.R
@@ -70,9 +70,11 @@ mesaplot.Qval <-
 
       }
     }else if(tolower(type)=="best"){
-      Kj <- rowSums(alpha(K)[-1,])
+      fullPVAF <- rbind(0,fullPVAF)
+      Kj <- rowSums(alpha(K))
       bestPVAF <- aggregate(fullPVAF,by=list(Kj),max)[,-1]
-      label.bestPVAF <- rownames(fullPVAF)
+      # bestPVAF <- rbind(0,bestPVAF) # add 0s
+      label.bestPVAF <- apply(alpha(K),1,paste0,collapse = "")
       for(j in item){
         bestloc <- match(bestPVAF[,j],fullPVAF[,j])
         if (auto.ylim) ylim = c(max(0,round(min(bestPVAF[,j])-0.1,1)),1) else ylim=c(0,1)
@@ -82,7 +84,7 @@ mesaplot.Qval <-
         yloc <- bestPVAF[,j]-diff(ylim)/15
         yloc[yloc<=ylim[1]] <- yloc[yloc<=ylim[1]] + 2 * diff(ylim)/15
         if (data.label) text(c(1:nrow(bestPVAF)),yloc,bestPVAF[,j])
-        locy0 <- which(apply(alpha(K)[-1,],1,function(x){
+        locy0 <- which(apply(alpha(K),1,function(x){
           all(x==Q[j,])}))
         if(locy0%in%bestloc) points(which(bestloc==locy0),fullPVAF[locy0,j],col="red",pch=19)
         if (original.q.label) text(K-1,ylim[1]+diff(ylim)/6,paste("original q-vector:\n",names(fullPVAF[,j])[locy0]))

diff --git a/R/print.GDINA.R b/R/print.GDINA.R
@@ -11,6 +11,7 @@ print.GDINA <-
     cat("\nNumber of items       =", extract.GDINA(x,"nitem"), "\n")
     cat("Number of individuals =", extract.GDINA(x,"nobs"), "\n")
     cat("Number of attributes  =", extract.GDINA(x,"natt"), "\n")
+    cat("Number of groups      =", extract.GDINA(x,"ngroup"), "\n")
     M <- c("GDINA", "DINA", "DINO", "ACDM", "LLM", "RRUM")
     cat("Number of iterations  =", extract.GDINA(x,"nitr"), "\n")
     cat("Fitted model(s)       =", unique(extract.GDINA(x,"models")), "\n")

diff --git a/R/simGDINA.R b/R/simGDINA.R
@@ -1,42 +1,43 @@
-#' @title Simulate responses based on the (sequential) G-DINA model, DINA, DINO, ACDM, LLM or RRUM
+#' @title Simulate responses based on the (sequential) G-DINA models
 #'
 #' @description
 #'    Simulate responses based on the G-DINA model (de la Torre, 2011) and sequential G-DINA model
-#'    (Ma & de la Torre, 2016), or CDMs subsumed by them, including DINA, DINO, ACDM,
-#'    LLM and R-RUM. Attributes can be simulated from uniform, higher order or multivariate normal
-#'    distributions or supplied by users. See \code{Examples} and \code{Details} for
-#'    how item parameters should be specified. See the help page of \code{\link{GDINA}}
-#'    function to understand the model parameterizations.
+#'    (Ma & de la Torre, 2016), or CDMs subsumed by them, including the DINA model, DINO model, ACDM,
+#'    LLM and R-RUM. Attributes can be simulated from uniform, higher-order or multivariate normal
+#'    distributions, or be supplied by users. See \code{Examples} and \code{Details} for
+#'    how item parameter specifications. See the help page of \code{\link{GDINA}}
+#'    for model parameterizations.
 #'
 #' @details
 #' Item parameter specifications in \code{simGDINA}:
 #'
-#' Item parameters can be specified in one of three different ways. The easiest way is to specify the
-#' \code{gs.param}, which gives \eqn{P(\bm{\alpha}_{lj}^*=0)} and \eqn{1-P(\bm{\alpha}_{lj}^*=1)}
-#' for all items for dichotomous items and \eqn{S(\bm{\alpha}_{lj}^*=0)} and \eqn{1-S(\bm{\alpha}_{lj}^*=1)}
-#' for all items for polytomous items. Note that \eqn{1-P(\bm{\alpha}_{lj}^*=0)-P(\bm{\alpha}_{lj}^*=1)} or
+#' Item parameters can be specified in one of three different ways.
+#'
+#' The first and probably the easiest way is to specify the guessing and slip parameters for each item or nonzero category using
+#' \code{gs.parm}, which is a matrix or data frame for \eqn{P(\bm{\alpha}_{lj}^*=0)} and \eqn{1-P(\bm{\alpha}_{lj}^*=1)}
+#' for all items for dichotomous items and \eqn{S(\bm{\alpha}_{ljh}^*=0)} and \eqn{1-S(\bm{\alpha}_{ljh}^*=1)}
+#' for all nonzero categories for polytomous items. Note that \eqn{1-P(\bm{\alpha}_{lj}^*=0)-P(\bm{\alpha}_{lj}^*=1)} or
 #' \eqn{1-S(\bm{\alpha}_{lj}^*=0)-S(\bm{\alpha}_{lj}^*=1)} must be greater than 0.
-#' This does not need to be satisfied if item parameters are specified using \code{catprob.parm}.
 #' For generating ACDM, LLM, and RRUM, delta parameters are generated randomly if \code{type="random"},
 #' or in a way that each required attribute contributes equally, as in
 #'  Ma, Iaconangelo, & de la Torre (2016) if \code{type="equal"}. For ACDM, LLM and RRUM, generated
 #'  delta parameters are always positive, which implies that monotonicity constraints are always satisfied.
 #'  If the generating model is the G-DINA model, \code{mono.constraint} can be used to specify whether monotonicity
 #'  constraints should be satisfied.
 #'
-#' The second way to simulate responses is to specify item or category success probabilities (i.e., \eqn{P(\bm{\alpha}_{lj}^*)}
-#' and \eqn{S(\bm{\alpha}_{lj}^*)}) for all latent groups for each item/category directly
+#' The second way of simulating responses is to specify success probabilities (i.e., \eqn{P(\bm{\alpha}_{lj}^*)}
+#' or \eqn{S(\bm{\alpha}_{ljh}^*)}) for each nonzero category of each item directly
 #' using the argument \code{catprob.parm}. If an item or category requires \eqn{K_j^*} attributes, \eqn{2^{K_j^*}} success probabilities
-#' need to be provided. \code{catprob.parm} must be a list, where each element gives the success probabilities of each item or category.
+#' need to be provided. \code{catprob.parm} must be a list, where each element gives the success probabilities for nonzero category of each item.
 #' Note that success probabilities cannot be negative or greater than one.
 #'
-#' The third way is to specify delta parameters for data simulation. For DINA and DINO model, each item/category requires two
-#' delta parameters. For ACDM, LLM and RRUM, if an item/category requires \eqn{K_j^*} attributes, \eqn{K_j^*+1} delta parameters
-#' need to be specified. For the G-DINA model, an item/category requires \eqn{K_j^*} attributes has \eqn{2^{K_j^*}} delta parameters.
-#' It should be noted that specifying delta parameters needs to make sure that the calculated success probabilities are within the \eqn{[0,1]} interval.
+#' The third way is to specify delta parameters for data simulation. For DINA and DINO model, each nonzero category requires two
+#' delta parameters. For ACDM, LLM and RRUM, if a nonzero category requires \eqn{K_j^*} attributes, \eqn{K_j^*+1} delta parameters
+#' need to be specified. For the G-DINA model, a nonzero category requiring \eqn{K_j^*} attributes has \eqn{2^{K_j^*}} delta parameters.
+#' It should be noted that specifying delta parameters needs to ascertain the derived success probabilities are within the \eqn{[0,1]} interval.
 #'
-#' Please note that you need to specify item parameters in ONLY one of these three ways. If \code{gs.parm} is specified, it will be used no matter
-#' whether \code{catprob.parm} and \code{delta.parm} are specified or not. If \code{gs.parm} is not specified, \code{GDINA.sim} will check
+#' Please note that you need to specify item parameters in ONLY one of these three ways. If \code{gs.parm} is specified, it will be used regardless of
+#' the inputs in \code{catprob.parm} and \code{delta.parm}. If \code{gs.parm} is not specified, \code{simGDINA} will check
 #' if \code{delta.parm} is specified; if yes, it will be used for data generation. if both \code{gs.parm} and \code{delta.parm} are not specified,
 #' \code{catprob.parm} is used for data generation.
 #'
@@ -63,24 +64,26 @@
 #'    include \code{"GDINA"},\code{"DINA"},\code{"DINO"},\code{"ACDM"},\code{"LLM"}, and \code{"RRUM"}.
 #'    If \code{model} is a scalar, the specified model is fitted to all items. Different
 #'    models can be assigned to different items or categories.
-#' @param sequential logical; whether a sequential model is fitted for polytomous responses?
+#' @param sequential logical; \code{TRUE} if the sequential model is used for polytomous responses simulation, and \code{FALSE}
+#'    if there is no polytomously scored items.
 #' @param mono.constraint A vector for each item/category or a scalar which will be used for all
 #'    items/categories to specify whether monotonicity constraints should be satisfied if the generating model is the G-DINA model. Note that
 #'    this is applicable only for the G-DINA model when \code{gs.parm} is used. For ACDM, LLM and RRUM, monotonicity constraints
 #'    are always satisfied and therefore this argument is ignored.
 #' @param catprob.parm A list of success probabilities for each latent group for each non-zero category of each item. See \code{Examples} and
 #'    \code{Details} for more information.
 #' @param delta.parm A list of delta parameters for each latent group for each item or category.
-#' @param item.names A vector giving the name of items. If NULL (default), items are named as "Item 1", "Item 2", etc.
-#' @param attribute person attributes. If this is not supplied, it is simulated
+#' @param item.names A vector giving the name of items or categories. If it is \code{NULL} (default), items are named as "Item 1", "Item 2", etc.
+#' @param attribute optional user-specified person attributes. It is a \eqn{N\times K} matrix or data frame. If this is not supplied, attributes are simulated
 #'    from a distribution specified in \code{att.dist}.
-#' @param att.dist the attribute distribution. It can be \code{"uniform"}, \code{"higher.order"} or
-#'    \code{"mvnorm"} for uniform, higher order and multivariate normal distribution, respectively.
-#'    The default is the uniform distribution.
-#' @param higher.order.parm A list specifying parameters for higher order distribution for attributes
-#'    if in \code{att.dist=higher.order}. Particularly, \code{theta} is a
-#'    vector of length \eqn{N} representing the higher order ability
-#'    for each examinee. and \code{lambda} is a \eqn{K \times 2} matrix. Column 1 gives slopes of higher-order
+#' @param att.dist A string indicating the distribution for attribute simulation. It can be \code{"uniform"}, \code{"higher.order"} or
+#'    \code{"mvnorm"} for uniform, higher-order and multivariate normal distribution, respectively.
+#'    The default is the uniform distribution. To specify structural parameters for the higher-order
+#'    and multivariate normal distributions, see \code{higher.order.parm} and \code{mvnorm.parm}, respectively.
+#' @param higher.order.parm A list specifying parameters for higher-order distribution for attributes
+#'    if \code{att.dist=higher.order}. Particularly, \code{theta} is a
+#'    vector of length \eqn{N} representing the higher-order ability
+#'    for each examinee. and \code{lambda} is a \eqn{K \times 2} matrix. Column 1 gives the slopes for the higher-order
 #'    model and column 2 gives the intercepts. See \code{\link{GDINA}} for the formulations of the higher-order
 #'    models.
 #' @param mvnorm.parm a list of parameters for multivariate normal attribute distribution. \code{mean} is a vector of length \eqn{K}
@@ -406,7 +409,7 @@
 #'J <- nrow(Qc)
 #'gs <- data.frame(guess=rep(0.1,J),slip=rep(0.1,J))
 #'# simulate sequential DINA model
-#'simseq <- simGDINA(N,Qc,sequential = TRUE,gs.parm = gs,model = "DINA")
+#'simseq <- simGDINA(N, Qc, sequential = TRUE, gs.parm = gs, model = "DINA")
 #'
 #'# True item success probabilities
 #'extract(simseq,what = "catprob.parm")
@@ -511,10 +514,8 @@ if (!is.null(gs.parm)) {
       att.group <- sample(1:L, N, replace = T)  #uniform distribution
     } else if (tolower(att.dist) == "higher.order")
     {
-      if (max(Q) > 1)
-      {
-        return(warning("Higher order structure is not allowed currently when attributes are polytomous."))
-      }
+      if (max(Q) > 1) stop("Higher order structure is not allowed currently when attributes are polytomous.",call. = FALSE)
+
       if (is.null(higher.order.parm$theta)||is.null(higher.order.parm$lambda))
       {
         stop("Higher-order parameters must be provided.",call. = FALSE)

diff --git a/R/summary.GDINA.R b/R/summary.GDINA.R
@@ -15,8 +15,11 @@ summary.GDINA <-
     cat("BIC penalty   =",format(round(log(extract.GDINA(object,"nobs"))*extract.GDINA(object,"npar"),2), nsmall = 2),"\n")
     cat("  BIC penalty due to item parameters        =", format(round(log(extract.GDINA(object,"nobs"))*extract.GDINA(object,"npar.item"),2), nsmall = 2), "\n")
     cat("  BIC penalty due to population parameters  =", format(round(log(extract.GDINA(object,"nobs"))*extract.GDINA(object,"npar.att"),2), nsmall = 2), "\n")
-    cat("\nAttribute Prevalence\n\n")
-    print(round(extract.GDINA(object,"prevalence"),extract.GDINA(object,"digits")))
+    if(extract.GDINA(object,"ngroup")==1){
+      cat("\nAttribute Prevalence\n\n")
+      print(round(extract.GDINA(object,"prevalence"),extract.GDINA(object,"digits")))
+    }
+
     if( extract.GDINA(object,"natt")<4){
     cat("\nPosterior Weights\n\n")
     print(extract.GDINA(object,"posterior.prob"))