Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8d05105
Showing
14 changed files
with
892 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
Package: bhm | ||
Type: Package | ||
Title: Biomarker Threshold Models | ||
Version: 1.1 | ||
Date: 2016-10-11 | ||
Author: Bingshu E. Chen | ||
Maintainer: Bingshu E. Chen <bingshu.chen@queensu.ca> | ||
Depends: coda, survival | ||
Imports: methods | ||
Description: Biomarker threshold models are tools to fit both predictive and prognostic biomarker effects. Both generalized linear models and Cox proportional hazards models can be fitted using either Bayesian method or profile likelihood method. | ||
License: GPL (>= 2) | ||
LazyLoad: yes | ||
NeedsCompilation: no | ||
Packaged: 2016-10-11 19:03:25 UTC; chenbe | ||
Repository: CRAN | ||
Date/Publication: 2016-10-12 00:37:28 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
3a76e2492f25bb6d44969c40c169120d *DESCRIPTION | ||
eb0dfd2b461c84a03a31385a49ddf922 *NAMESPACE | ||
8256307d3e2844112cc72bed4f9b7f53 *R/bhm.R | ||
526ba7bd877a171893e8ab1a52d105ed *R/bhm_fit.R | ||
bbbabbd6283f8a861619e707e2cb8b55 *R/bhm_lib.R | ||
77eeedd47dd663cf58bf62999b93cc0a *R/prolik.R | ||
c84d68567329ac77192b098af2c2c943 *data/data.rda | ||
0d5c5a7edc77697af1e36b45d78a2fb1 *inst/CITATION | ||
bfba3eda8867001cffbfd075fe8d4a3a *man/bhm-package.Rd | ||
5d22753181624211dc45567a45f9c3c7 *man/bhm.Rd | ||
bed635491c04690a15ee10610dd15156 *man/control.Rd | ||
b0cc661060af77834ef1c1a0ba09d0ba *man/data.Rd | ||
5ee5937f65d07e4435547acb3f9fba1a *man/print.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
import(coda) | ||
import(survival) | ||
importFrom("methods", "is") | ||
importFrom("stats", "cov", "dbeta", "glm", "logLik", "model.frame", | ||
"model.matrix", "model.response", "printCoefmat", | ||
"quantile", "rbinom", "rexp", "rgamma", "rnorm", "runif", "vcov") | ||
S3method(print, bhm) | ||
S3method(bhm, default) | ||
S3method(bhm, formula) | ||
S3method(summary, bhm) | ||
S3method(print, summary.bhm) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
bhm = function(x, ...) UseMethod("bhm") | ||
|
||
bhm.default = function(x, y, family, control, ...) { | ||
cat("bhm: Biomarker thershold models\n") | ||
x = as.matrix(x) | ||
method = control$method | ||
|
||
if(method == 'Bayes') | ||
fit = bhmFit(x, y, family, control) | ||
else | ||
fit = prolikFit(x, y, family, control) | ||
|
||
fit$family = family | ||
fit$call = match.call() | ||
class(fit) = "bhm" | ||
return(fit) | ||
} | ||
|
||
bhm.formula = function(formula, family, data=list(...), control=list(...), ...){ | ||
mf = model.frame(formula=formula, data=data) | ||
|
||
x = model.matrix(attr(mf, "terms"), data = mf) | ||
y = model.response(mf) | ||
|
||
if (class(y) == "Surv") { | ||
family = "surv"; | ||
st = sort(y[, 1], decreasing = TRUE, index.return = TRUE) | ||
idx = st$ix | ||
y = y[idx, ] | ||
x = x[idx, ] | ||
} | ||
|
||
control = do.call("bhmControl", control) | ||
n.col = ncol(x) | ||
|
||
# inverse cdf transformation of biomarker | ||
w = x[, 2] | ||
if((min(w) < 0) | (max(w) > 1)) { | ||
x[, 2] = x.cdf(x[, 2]) | ||
transform = TRUE | ||
} else transform = FALSE | ||
|
||
|
||
#Fit a prognostic model with biomarker term only | ||
if(n.col == 2) { | ||
control$interaction = FALSE | ||
} | ||
|
||
# covariate name for interaction term | ||
if(control$interaction) { | ||
int_names = paste(colnames(x)[2], ":", colnames(x)[3], sep="") | ||
|
||
# Check if there is a main effect for biomarker variable | ||
if(control$biomarker.main){ | ||
var_names = c(colnames(x)[1:3], int_names) | ||
x1 = cbind(x[, 1:3], x[, 2]*x[, 3]) | ||
if(n.col>3) { | ||
var_names = c(var_names, colnames(x)[4:n.col]) | ||
x1 = cbind(x1, x[4:n.col, ]) | ||
} | ||
x = x1 | ||
colnames(x) = var_names | ||
} else { | ||
colnames(x)[2] = int_names | ||
} | ||
} | ||
#print(x[1:5, ]) | ||
|
||
n.col = length(x[1, ]) | ||
if(family == "surv"){ | ||
n.col = n.col - 1 | ||
} | ||
|
||
if (length(control$sigma0.inv) == 1 & n.col > 1) | ||
control$sigma0.inv = diag(rep(control$sigma0.inv, n.col)) | ||
|
||
if (length(control$beta0) == 1) | ||
control$beta0 = rep(control$beta0, n.col) | ||
|
||
fit = bhm.default(x, y, family, control, ...) | ||
|
||
# transoform the value of biomarker back to original value | ||
if(transform) { | ||
fit$c.max = quantile(w, fit$c.max) | ||
qtlName = rownames(fit$cqtl) | ||
fit$cqtl = matrix(quantile(w, fit$cqtl), nrow = 2) | ||
rownames(fit$cqtl) = qtlName | ||
} | ||
|
||
fit$call = match.call() | ||
fit$formula = formula | ||
return(fit) | ||
} | ||
|
||
bhmControl=function(method = 'Bayes', interaction = TRUE, biomarker.main = TRUE, alpha = 0.05, B=50, R=100, thin = 1, epsilon = 0.01, c.n = 1, beta0=0, sigma0 = 10000) { | ||
|
||
if(method != 'profile' && method != 'Bayes') | ||
stop("Please use either 'Bayes' or 'profile' method for model fit") | ||
if (!is.numeric(B) || B <= 0) | ||
stop("value of 'B' must be > 0") | ||
if ((!is.numeric(R) || R <= 0)&&method == 'Bayes') | ||
stop("number of replication 'R' must be > 0") | ||
|
||
if (!is.logical(biomarker.main)) | ||
stop("'biomarker.main' must be a logical value: TURE/FALSE") | ||
if (!is.logical(interaction)) | ||
stop("'interaction' must be a logical value: TURE/FALSE") | ||
if (interaction == FALSE && biomarker.main == FALSE) { | ||
cat('Interaction = FALSE, biomarker.main effect reset to TRUE\n') | ||
biomarker.main = TRUE | ||
} | ||
|
||
if (!is.numeric(alpha) || alpha <= 0||alpha>=1) | ||
stop("number of replication 'alpha' must be between 0 and 1") | ||
if (c.n > 2 || c.n < 1) | ||
stop("number of cutpoints 'c.n' must be either 1 or 2") | ||
if (!is.numeric(sigma0) || sigma0 <= 0) | ||
stop("value of 'sigma' [varince for beta prior] must be > 0") | ||
sigma0.inv = solve(sigma0) | ||
|
||
return(list(method = method, interaction = interaction, biomarker.main = biomarker.main, B=B, R=R, thin = thin, epsilon = epsilon, alpha = alpha, c.n=c.n, beta0 = beta0, sigma0.inv = sigma0.inv)) | ||
} | ||
|
||
summary.bhm=function(object,...){ | ||
x = object | ||
family = x$family | ||
c.max = x$c.max | ||
|
||
if (family == "binomial") { | ||
TAB1<-t(rbind(Estimate=x$coefficients,StdErr=x$StdErr,CredibleInterval=x$coefqtl)) | ||
rownames(TAB1) = x$var_names | ||
TAB2<-t(rbind(Estimate=x$c.max,CredibleInterval=x$cqtl)) | ||
|
||
#threshold parameter | ||
if (length(TAB2[, 1]) == 2) { | ||
rownames(TAB2)=c("lower","higer") | ||
} else { | ||
rownames(TAB2)=c("cut point") | ||
} | ||
} | ||
|
||
if (family == "surv") { | ||
TAB1<-t(rbind(Estimate=x$coefficients,StdErr=x$StdErr,CredibleInterval=x$coefqtl)) | ||
TAB2<-t(rbind(Estimate=x$c.max,CredibleInterval=x$cqtl)) | ||
} | ||
results = list(call=x$call,TAB1=TAB1, TAB2=TAB2, c.fit=x$c.fit, c.max = x$c.max, var_names = x$var_names) | ||
|
||
class(results)<-"summary.bhm" | ||
return(results) | ||
} | ||
|
||
print.summary.bhm<-function(x,...){ | ||
cat("Call:\n") | ||
print(x$call) | ||
cat("\nRegression coefficients:\n") | ||
|
||
|
||
printCoefmat(x$TAB1,digits=4,P.values=FALSE) | ||
|
||
bname=x$var_names[1] | ||
cat('\n', bname, 'biomarker threshold:\n') | ||
printCoefmat(x$TAB2,digits=4,P.values=FALSE) | ||
c.max = round(x$c.max*10000)/10000 | ||
cat('\nConditional regression coefficients given', bname, 'biomarker = ', c.max, '\n') | ||
print(summary(x$c.fit)) | ||
} | ||
|
||
print.bhm = function(x,...) { | ||
c.max = round(x$c.max*10000)/10000 | ||
bname = x$var_names[1] | ||
cat("Call:\n") | ||
print(x$call) | ||
cat("\nCoefficients:\n") | ||
print(x$coefficients) | ||
cat("\n", bname, "Thresholds:\n") | ||
print(c.max) | ||
cat('\nConditional regression coefficient given ', bname, 'biomarker = ', c.max, '\n') | ||
print(x$c.fit) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
#MCMC for Gibbs sampling | ||
bhmGibbs<-function(x, y, family, beta, q, cx, control){ | ||
c.n = control$c.n | ||
|
||
#generate cut point cx | ||
lik = thm.lik(x, y, family, beta, q, cx, control) | ||
d0 = 0.025 | ||
if(c.n==2){ | ||
rpt = TRUE | ||
D = min(d0, (cx[2]-cx[1])/3) | ||
D1 = min(d0, cx[1]/2) | ||
D2 = min(d0, (1-cx[2])/2) | ||
|
||
while (rpt) { | ||
cu = c(runif(1, cx[1]-D1, cx[1]+D), runif(1, cx[2]-D, cx[2]+D2)) | ||
if(cu[2] - cu[1] < 0.05) rpt = TRUE | ||
else { | ||
fit = thm.fit(x, y, family, cu) | ||
rpt = !(fit$converged) | ||
} | ||
} | ||
} else { | ||
D = min(d0, cx/2, (1-cx)/2) | ||
cu = runif(1, max(0.05, cx-D), min(cx+D, 0.95)) | ||
} | ||
lik1 = thm.lik(x, y, family, beta, q, cu, control) | ||
alpha1 = exp(lik1 - lik) | ||
if(runif(1, 0, 1) < alpha1) { | ||
cx = cu | ||
lik = lik1 | ||
} | ||
|
||
#generate beta value using Metropolis-Hasting algorithm. | ||
#Candidate value is obtained from the fitted model with | ||
#current iteraction of cut points. | ||
fit = thm.fit(x, y, family, cx) | ||
#fixed the error that candidate shall be phi(betai^*|beta^k), can not use beta_hat | ||
#bhat = fit$coefficient | ||
vb = vcov(fit) | ||
A = chol(vb) | ||
b1 = beta + t(A)%*%rnorm(length(beta), 0, 1) | ||
lik1 = thm.lik(x, y, family, b1, q, cx, control) | ||
# Note that prior of beta ~ phi(beta|beta0) was calculated in thm.lik() | ||
alpha2 = exp(lik1 - lik) | ||
if(runif(1, 0, 1) < alpha2) { | ||
beta = b1 | ||
lik = lik1 | ||
} | ||
|
||
#generate hyper parameter q | ||
if (c.n == 2) { | ||
sc1 = log(cx[2]/(cx[2]-cx[1])) | ||
sc2 = -log(1-cx[2]) | ||
q = c(1+rgamma(1,shape=1,scale=sc1), 1+rgamma(1,shape=1,scale=sc2)) | ||
} else { | ||
scale = -log(1-cx) | ||
q = 1+rgamma(1, 2, scale = scale) | ||
} | ||
return(list(cx=cx, beta=beta, q=q, lik=lik)) | ||
} | ||
|
||
# fit the main threshold model | ||
bhmFit = function(x, y, family, control){ | ||
R = control$R | ||
c.n = control$c.n | ||
tn = control$thin | ||
var_names = colnames(x) | ||
if(c.n==1) c_names = c('c') else c_names=c('c1', 'c2') | ||
|
||
# use profile likelihood method to get initial value of cut-points | ||
pfit = pro.fit(x, y, family, control=list(R = 0, epsilon=0.02, c.n = c.n)) | ||
|
||
# generate initial values for parameters | ||
g = list(cx = pfit$c.max, beta = pfit$coefficient, q = rep(2, c.n)) | ||
|
||
#replication from 1 to B (burn-in) | ||
for (i in 1:control$B) g = bhmGibbs(x, y, family, g$beta, g$q, g$cx, control) | ||
|
||
#replications from B+1 to R(total length of Markov Chain is B+R) | ||
R1 = R*tn | ||
bg = matrix(NaN, R, length(g$beta)) | ||
cg = matrix(NaN, R, c.n) | ||
qg = matrix(NaN, R, c.n) | ||
i = 1 | ||
for (j in 1:R1){ | ||
g = bhmGibbs(x, y, family, g$beta, g$q, g$cx, control) | ||
|
||
if(j%%tn == 0){ | ||
qg[i, ] = g$q | ||
cg[i, ] = g$cx | ||
bg[i, ] = g$beta | ||
i = i + 1 | ||
} | ||
} | ||
|
||
#estimates and credible interval for the thresholds | ||
c.max= apply(cg,2,mean) | ||
c.fit= thm.fit(x, y, family, c.max) | ||
alpha = control$alpha/2 | ||
ptl = c(alpha, 1-alpha) | ||
cqtl = apply(cg, 2, quantile, ptl) | ||
|
||
#estimates for the regression coefficients | ||
coef= apply(bg,2,mean) | ||
coefqtl<-apply(bg,2,quantile,ptl) | ||
|
||
vcov<-cov(bg) | ||
if (family == "surv") var_names = var_names[-1] | ||
colnames(cg) = c_names | ||
colnames(bg) = var_names | ||
colnames(vcov) = var_names | ||
rownames(vcov) = var_names | ||
se<-sqrt(diag(vcov)) | ||
|
||
cg = mcmc(cg) | ||
bg = mcmc(bg) | ||
coefqtl = t(HPDinterval(bg)) | ||
|
||
fit = list(cg=cg,bg=bg,qg=qg,c.max=c.max,cqtl=cqtl,coefficients=coef,coefqtl=coefqtl,vcov=vcov,StdErr=se,var_names=var_names, c.fit = c.fit) | ||
return(fit) | ||
} |
Oops, something went wrong.