Skip to content

Commit

Permalink
version 1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
statapps authored and cran-robot committed Oct 12, 2016
0 parents commit 8d05105
Show file tree
Hide file tree
Showing 14 changed files with 892 additions and 0 deletions.
16 changes: 16 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,16 @@
Package: bhm
Type: Package
Title: Biomarker Threshold Models
Version: 1.1
Date: 2016-10-11
Author: Bingshu E. Chen
Maintainer: Bingshu E. Chen <bingshu.chen@queensu.ca>
Depends: coda, survival
Imports: methods
Description: Biomarker threshold models are tools to fit both predictive and prognostic biomarker effects. Both generalized linear models and Cox proportional hazards models can be fitted using either Bayesian method or profile likelihood method.
License: GPL (>= 2)
LazyLoad: yes
NeedsCompilation: no
Packaged: 2016-10-11 19:03:25 UTC; chenbe
Repository: CRAN
Date/Publication: 2016-10-12 00:37:28
13 changes: 13 additions & 0 deletions MD5
@@ -0,0 +1,13 @@
3a76e2492f25bb6d44969c40c169120d *DESCRIPTION
eb0dfd2b461c84a03a31385a49ddf922 *NAMESPACE
8256307d3e2844112cc72bed4f9b7f53 *R/bhm.R
526ba7bd877a171893e8ab1a52d105ed *R/bhm_fit.R
bbbabbd6283f8a861619e707e2cb8b55 *R/bhm_lib.R
77eeedd47dd663cf58bf62999b93cc0a *R/prolik.R
c84d68567329ac77192b098af2c2c943 *data/data.rda
0d5c5a7edc77697af1e36b45d78a2fb1 *inst/CITATION
bfba3eda8867001cffbfd075fe8d4a3a *man/bhm-package.Rd
5d22753181624211dc45567a45f9c3c7 *man/bhm.Rd
bed635491c04690a15ee10610dd15156 *man/control.Rd
b0cc661060af77834ef1c1a0ba09d0ba *man/data.Rd
5ee5937f65d07e4435547acb3f9fba1a *man/print.Rd
12 changes: 12 additions & 0 deletions NAMESPACE
@@ -0,0 +1,12 @@
exportPattern("^[[:alpha:]]+")
import(coda)
import(survival)
importFrom("methods", "is")
importFrom("stats", "cov", "dbeta", "glm", "logLik", "model.frame",
"model.matrix", "model.response", "printCoefmat",
"quantile", "rbinom", "rexp", "rgamma", "rnorm", "runif", "vcov")
S3method(print, bhm)
S3method(bhm, default)
S3method(bhm, formula)
S3method(summary, bhm)
S3method(print, summary.bhm)
179 changes: 179 additions & 0 deletions R/bhm.R
@@ -0,0 +1,179 @@
bhm = function(x, ...) UseMethod("bhm")

bhm.default = function(x, y, family, control, ...) {
cat("bhm: Biomarker thershold models\n")
x = as.matrix(x)
method = control$method

if(method == 'Bayes')
fit = bhmFit(x, y, family, control)
else
fit = prolikFit(x, y, family, control)

fit$family = family
fit$call = match.call()
class(fit) = "bhm"
return(fit)
}

bhm.formula = function(formula, family, data=list(...), control=list(...), ...){
mf = model.frame(formula=formula, data=data)

x = model.matrix(attr(mf, "terms"), data = mf)
y = model.response(mf)

if (class(y) == "Surv") {
family = "surv";
st = sort(y[, 1], decreasing = TRUE, index.return = TRUE)
idx = st$ix
y = y[idx, ]
x = x[idx, ]
}

control = do.call("bhmControl", control)
n.col = ncol(x)

# inverse cdf transformation of biomarker
w = x[, 2]
if((min(w) < 0) | (max(w) > 1)) {
x[, 2] = x.cdf(x[, 2])
transform = TRUE
} else transform = FALSE


#Fit a prognostic model with biomarker term only
if(n.col == 2) {
control$interaction = FALSE
}

# covariate name for interaction term
if(control$interaction) {
int_names = paste(colnames(x)[2], ":", colnames(x)[3], sep="")

# Check if there is a main effect for biomarker variable
if(control$biomarker.main){
var_names = c(colnames(x)[1:3], int_names)
x1 = cbind(x[, 1:3], x[, 2]*x[, 3])
if(n.col>3) {
var_names = c(var_names, colnames(x)[4:n.col])
x1 = cbind(x1, x[4:n.col, ])
}
x = x1
colnames(x) = var_names
} else {
colnames(x)[2] = int_names
}
}
#print(x[1:5, ])

n.col = length(x[1, ])
if(family == "surv"){
n.col = n.col - 1
}

if (length(control$sigma0.inv) == 1 & n.col > 1)
control$sigma0.inv = diag(rep(control$sigma0.inv, n.col))

if (length(control$beta0) == 1)
control$beta0 = rep(control$beta0, n.col)

fit = bhm.default(x, y, family, control, ...)

# transoform the value of biomarker back to original value
if(transform) {
fit$c.max = quantile(w, fit$c.max)
qtlName = rownames(fit$cqtl)
fit$cqtl = matrix(quantile(w, fit$cqtl), nrow = 2)
rownames(fit$cqtl) = qtlName
}

fit$call = match.call()
fit$formula = formula
return(fit)
}

bhmControl=function(method = 'Bayes', interaction = TRUE, biomarker.main = TRUE, alpha = 0.05, B=50, R=100, thin = 1, epsilon = 0.01, c.n = 1, beta0=0, sigma0 = 10000) {

if(method != 'profile' && method != 'Bayes')
stop("Please use either 'Bayes' or 'profile' method for model fit")
if (!is.numeric(B) || B <= 0)
stop("value of 'B' must be > 0")
if ((!is.numeric(R) || R <= 0)&&method == 'Bayes')
stop("number of replication 'R' must be > 0")

if (!is.logical(biomarker.main))
stop("'biomarker.main' must be a logical value: TURE/FALSE")
if (!is.logical(interaction))
stop("'interaction' must be a logical value: TURE/FALSE")
if (interaction == FALSE && biomarker.main == FALSE) {
cat('Interaction = FALSE, biomarker.main effect reset to TRUE\n')
biomarker.main = TRUE
}

if (!is.numeric(alpha) || alpha <= 0||alpha>=1)
stop("number of replication 'alpha' must be between 0 and 1")
if (c.n > 2 || c.n < 1)
stop("number of cutpoints 'c.n' must be either 1 or 2")
if (!is.numeric(sigma0) || sigma0 <= 0)
stop("value of 'sigma' [varince for beta prior] must be > 0")
sigma0.inv = solve(sigma0)

return(list(method = method, interaction = interaction, biomarker.main = biomarker.main, B=B, R=R, thin = thin, epsilon = epsilon, alpha = alpha, c.n=c.n, beta0 = beta0, sigma0.inv = sigma0.inv))
}

summary.bhm=function(object,...){
x = object
family = x$family
c.max = x$c.max

if (family == "binomial") {
TAB1<-t(rbind(Estimate=x$coefficients,StdErr=x$StdErr,CredibleInterval=x$coefqtl))
rownames(TAB1) = x$var_names
TAB2<-t(rbind(Estimate=x$c.max,CredibleInterval=x$cqtl))

#threshold parameter
if (length(TAB2[, 1]) == 2) {
rownames(TAB2)=c("lower","higer")
} else {
rownames(TAB2)=c("cut point")
}
}

if (family == "surv") {
TAB1<-t(rbind(Estimate=x$coefficients,StdErr=x$StdErr,CredibleInterval=x$coefqtl))
TAB2<-t(rbind(Estimate=x$c.max,CredibleInterval=x$cqtl))
}
results = list(call=x$call,TAB1=TAB1, TAB2=TAB2, c.fit=x$c.fit, c.max = x$c.max, var_names = x$var_names)

class(results)<-"summary.bhm"
return(results)
}

print.summary.bhm<-function(x,...){
cat("Call:\n")
print(x$call)
cat("\nRegression coefficients:\n")


printCoefmat(x$TAB1,digits=4,P.values=FALSE)

bname=x$var_names[1]
cat('\n', bname, 'biomarker threshold:\n')
printCoefmat(x$TAB2,digits=4,P.values=FALSE)
c.max = round(x$c.max*10000)/10000
cat('\nConditional regression coefficients given', bname, 'biomarker = ', c.max, '\n')
print(summary(x$c.fit))
}

print.bhm = function(x,...) {
c.max = round(x$c.max*10000)/10000
bname = x$var_names[1]
cat("Call:\n")
print(x$call)
cat("\nCoefficients:\n")
print(x$coefficients)
cat("\n", bname, "Thresholds:\n")
print(c.max)
cat('\nConditional regression coefficient given ', bname, 'biomarker = ', c.max, '\n')
print(x$c.fit)
}
121 changes: 121 additions & 0 deletions R/bhm_fit.R
@@ -0,0 +1,121 @@
#MCMC for Gibbs sampling
bhmGibbs<-function(x, y, family, beta, q, cx, control){
c.n = control$c.n

#generate cut point cx
lik = thm.lik(x, y, family, beta, q, cx, control)
d0 = 0.025
if(c.n==2){
rpt = TRUE
D = min(d0, (cx[2]-cx[1])/3)
D1 = min(d0, cx[1]/2)
D2 = min(d0, (1-cx[2])/2)

while (rpt) {
cu = c(runif(1, cx[1]-D1, cx[1]+D), runif(1, cx[2]-D, cx[2]+D2))
if(cu[2] - cu[1] < 0.05) rpt = TRUE
else {
fit = thm.fit(x, y, family, cu)
rpt = !(fit$converged)
}
}
} else {
D = min(d0, cx/2, (1-cx)/2)
cu = runif(1, max(0.05, cx-D), min(cx+D, 0.95))
}
lik1 = thm.lik(x, y, family, beta, q, cu, control)
alpha1 = exp(lik1 - lik)
if(runif(1, 0, 1) < alpha1) {
cx = cu
lik = lik1
}

#generate beta value using Metropolis-Hasting algorithm.
#Candidate value is obtained from the fitted model with
#current iteraction of cut points.
fit = thm.fit(x, y, family, cx)
#fixed the error that candidate shall be phi(betai^*|beta^k), can not use beta_hat
#bhat = fit$coefficient
vb = vcov(fit)
A = chol(vb)
b1 = beta + t(A)%*%rnorm(length(beta), 0, 1)
lik1 = thm.lik(x, y, family, b1, q, cx, control)
# Note that prior of beta ~ phi(beta|beta0) was calculated in thm.lik()
alpha2 = exp(lik1 - lik)
if(runif(1, 0, 1) < alpha2) {
beta = b1
lik = lik1
}

#generate hyper parameter q
if (c.n == 2) {
sc1 = log(cx[2]/(cx[2]-cx[1]))
sc2 = -log(1-cx[2])
q = c(1+rgamma(1,shape=1,scale=sc1), 1+rgamma(1,shape=1,scale=sc2))
} else {
scale = -log(1-cx)
q = 1+rgamma(1, 2, scale = scale)
}
return(list(cx=cx, beta=beta, q=q, lik=lik))
}

# fit the main threshold model
bhmFit = function(x, y, family, control){
R = control$R
c.n = control$c.n
tn = control$thin
var_names = colnames(x)
if(c.n==1) c_names = c('c') else c_names=c('c1', 'c2')

# use profile likelihood method to get initial value of cut-points
pfit = pro.fit(x, y, family, control=list(R = 0, epsilon=0.02, c.n = c.n))

# generate initial values for parameters
g = list(cx = pfit$c.max, beta = pfit$coefficient, q = rep(2, c.n))

#replication from 1 to B (burn-in)
for (i in 1:control$B) g = bhmGibbs(x, y, family, g$beta, g$q, g$cx, control)

#replications from B+1 to R(total length of Markov Chain is B+R)
R1 = R*tn
bg = matrix(NaN, R, length(g$beta))
cg = matrix(NaN, R, c.n)
qg = matrix(NaN, R, c.n)
i = 1
for (j in 1:R1){
g = bhmGibbs(x, y, family, g$beta, g$q, g$cx, control)

if(j%%tn == 0){
qg[i, ] = g$q
cg[i, ] = g$cx
bg[i, ] = g$beta
i = i + 1
}
}

#estimates and credible interval for the thresholds
c.max= apply(cg,2,mean)
c.fit= thm.fit(x, y, family, c.max)
alpha = control$alpha/2
ptl = c(alpha, 1-alpha)
cqtl = apply(cg, 2, quantile, ptl)

#estimates for the regression coefficients
coef= apply(bg,2,mean)
coefqtl<-apply(bg,2,quantile,ptl)

vcov<-cov(bg)
if (family == "surv") var_names = var_names[-1]
colnames(cg) = c_names
colnames(bg) = var_names
colnames(vcov) = var_names
rownames(vcov) = var_names
se<-sqrt(diag(vcov))

cg = mcmc(cg)
bg = mcmc(bg)
coefqtl = t(HPDinterval(bg))

fit = list(cg=cg,bg=bg,qg=qg,c.max=c.max,cqtl=cqtl,coefficients=coef,coefqtl=coefqtl,vcov=vcov,StdErr=se,var_names=var_names, c.fit = c.fit)
return(fit)
}

0 comments on commit 8d05105

Please sign in to comment.