-
Notifications
You must be signed in to change notification settings - Fork 0
/
zibellreg.R
executable file
·152 lines (132 loc) · 5.15 KB
/
zibellreg.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#---------------------------------------------
#' ZiBell regression model
#' @aliases zibellreg
#' @export
#' @description Fits the Bell regression model to overdispersed count data.
#' @param formula an object of class "formula" (or one that can be coerced to that class): a symbolic description of the model to be fitted.
#' @param data an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which ypbp is called.
#' @param approach approach to be used to fit the model (mle: maximum likelihood; bayes: Bayesian approach).
#' @param link1 assumed link function for degenerate distribution (logit, probit, cloglog, cauchy); default is logit.
#' @param link2 assumed link function for count distribution (log, sqrt or identiy); default is log.
#' @param hessian hessian logical; If TRUE (default), the hessian matrix is returned when approach="mle".
#' @param hyperpars a list containing the hyperparameters associated with the prior distribution of the regression coefficients; if not specified then default choice is hyperpars = c(mu_psi = 0, sigma_psi = 10, mu_beta = 0, sigma_beta = 10).
#' @param ... further arguments passed to either `rstan::optimizing` or `rstan::sampling`.
#' @return zibellreg returns an object of class "zibellreg" containing the fitted model.
#'
#' @examples
#' \donttest{
#' # ML approach:
#' data(cells)
#' mle <- zibellreg(cells ~ smoker+gender|smoker+gender, data = cells, approach = "mle")
#' summary(mle)
#'
#' # Bayesian approach:
#' bayes <- zibellreg(cells ~ 1|smoker+gender, data = cells, approach = "bayes", refresh = FALSE)
#' summary(bayes)
#' }
#'
zibellreg<- function(formula, data, approach = c("mle", "bayes"), hessian = TRUE,
link1 = c("logit", "probit", "cloglog", "cauchy"), link2 = c("log", "sqrt", "identity"),
hyperpars = list(mu_psi=0, sigma_psi=10, mu_beta=0, sigma_beta=10), ...){
approach <- match.arg(approach)
link1 <- match.arg(link1)
link2 <- match.arg(link2)
formula <- Formula::Formula(formula)
mf <- stats::model.frame(formula=formula, data=data)
Terms <- stats::terms(mf)
Z <- stats::model.matrix(formula, data = mf, rhs = 1)
X <- stats::model.matrix(formula, data = mf, rhs = 2)
Xlabels <- colnames(X)
Zlabels <- colnames(Z)
y <- stats::model.response(mf)
n <- nrow(X)
p <- ncol(X)
q <- ncol(Z)
if(p>1){
if(match("(Intercept)", Xlabels)==1){
X_std <- scale(X[,-1])
x_mean <- array(c(0, attr(X_std, "scaled:center")))
x_sd <- array(c(1, attr(X_std, "scaled:scale")))
X_std <- cbind(1, X_std)
Delta_x <- diag(1/x_sd)
Delta_x[1,] <- Delta_x[1,] - x_mean/x_sd
}else{
X_std <- scale(X)
x_mean <- array(attr(X_std, "scaled:center"))
x_sd <- array(attr(X_std, "scaled:scale"))
Delta_x <- diag(1/x_sd)
}
}else{
X_std <- X
x_mean <- array(0)
x_sd <- array(1)
Delta_x <- diag(n)
}
if(q>1){
if(match("(Intercept)", Zlabels)==1){
Z_std <- scale(Z[,-1])
z_mean <- array(c(0, attr(Z_std, "scaled:center")))
z_sd <- array(c(1, attr(Z_std, "scaled:scale")))
Z_std <- cbind(1, Z_std)
Delta_z <- diag(1/z_sd)
Delta_z[1,] <- Delta_z[1,] - z_mean/z_sd
}else{
Z_std <- scale(Z)
z_mean <- array(attr(Z_std, "scaled:center"))
z_sd <- array(attr(Z_std, "scaled:scale"))
Delta_z <- diag(1/z_sd)
}
}else{
Z_std <- Z
z_mean <- array(0)
z_sd <- array(1)
Delta_z <- diag(n)
}
Link1 <- switch(link1,
"logit" = 1,
"probit" = 2,
"cloglog" = 3,
"cauchy" = 4
)
Link2 <- switch(link2,
"log" = 1,
"sqrt" = 2,
"identity" = 3
)
stan_data <- list(y=y, X=X_std, Z=Z_std, n=n, p=p, q=q, x_mean=x_mean, x_sd=x_sd, z_mean=z_mean, z_sd=z_sd,
mu_beta = hyperpars$mu_beta, sigma_beta=hyperpars$sigma_beta,
mu_psi = hyperpars$mu_psi, sigma_psi=hyperpars$sigma_psi,
approach=0, link1 = Link1, link2 = Link2)
if(approach=="mle"){
fit <- rstan::optimizing(stanmodels$zibellreg, hessian=hessian,
data=stan_data, verbose=FALSE, ...)
if(hessian==TRUE){
fit$hessian <- - fit$hessian
}
fit$par <- fit$theta_tilde[-(1:(p+q))]
AIC <- -2*fit$value + 2*(p+q)
fit <- list(fit=fit, logLik = fit$value, AIC = AIC, Delta = magic::adiag(Delta_z, Delta_x))
}else{
stan_data$approach <- 1
fit <- rstan::sampling(stanmodels$zibellreg, data=stan_data, verbose=FALSE, ...)
fit <- list(fit=fit)
}
fit$n <- n
fit$p <- p
fit$q <- q
# fit$x_mean <- x_mean
# fit$x_sd <- x_sd
# fit$z_mean <- z_mean
# fit$z_sd <- z_sd
# fit$v_sd <- c(z_sd, x_sd)
fit$call <- match.call()
fit$formula <- stats::formula(Terms)
fit$terms <- stats::terms.formula(formula)
fit$labels1 <- Zlabels
fit$labels2 <- Xlabels
fit$approach <- approach
fit$link1 <- link1
fit$link2 <- link2
class(fit) <- "zibellreg"
return(fit)
}