version 1.1

cran · Oct 12, 2016 · 8d05105 · 8d05105
commit 8d05105
Show file tree

Hide file tree

Showing 14 changed files with 892 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,16 @@
+Package: bhm
+Type: Package
+Title: Biomarker Threshold Models
+Version: 1.1
+Date: 2016-10-11
+Author: Bingshu E. Chen
+Maintainer: Bingshu E. Chen <bingshu.chen@queensu.ca>
+Depends: coda, survival
+Imports: methods
+Description: Biomarker threshold models are tools to fit both predictive and prognostic biomarker effects. Both generalized linear models and Cox proportional hazards models can be fitted using either Bayesian method or profile likelihood method. 
+License: GPL (>= 2)
+LazyLoad: yes
+NeedsCompilation: no
+Packaged: 2016-10-11 19:03:25 UTC; chenbe
+Repository: CRAN
+Date/Publication: 2016-10-12 00:37:28
diff --git a/MD5 b/MD5
@@ -0,0 +1,13 @@
+3a76e2492f25bb6d44969c40c169120d *DESCRIPTION
+eb0dfd2b461c84a03a31385a49ddf922 *NAMESPACE
+8256307d3e2844112cc72bed4f9b7f53 *R/bhm.R
+526ba7bd877a171893e8ab1a52d105ed *R/bhm_fit.R
+bbbabbd6283f8a861619e707e2cb8b55 *R/bhm_lib.R
+77eeedd47dd663cf58bf62999b93cc0a *R/prolik.R
+c84d68567329ac77192b098af2c2c943 *data/data.rda
+0d5c5a7edc77697af1e36b45d78a2fb1 *inst/CITATION
+bfba3eda8867001cffbfd075fe8d4a3a *man/bhm-package.Rd
+5d22753181624211dc45567a45f9c3c7 *man/bhm.Rd
+bed635491c04690a15ee10610dd15156 *man/control.Rd
+b0cc661060af77834ef1c1a0ba09d0ba *man/data.Rd
+5ee5937f65d07e4435547acb3f9fba1a *man/print.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,12 @@
+exportPattern("^[[:alpha:]]+")
+import(coda)
+import(survival)
+importFrom("methods", "is")
+importFrom("stats", "cov", "dbeta", "glm", "logLik", "model.frame",
+           "model.matrix", "model.response", "printCoefmat",
+           "quantile", "rbinom", "rexp", "rgamma", "rnorm", "runif", "vcov")
+S3method(print, bhm)
+S3method(bhm, default)
+S3method(bhm, formula)
+S3method(summary, bhm)
+S3method(print, summary.bhm)
diff --git a/R/bhm.R b/R/bhm.R
@@ -0,0 +1,179 @@
+bhm = function(x, ...) UseMethod("bhm")
+
+bhm.default = function(x, y, family, control, ...) {
+  cat("bhm: Biomarker thershold models\n")
+  x = as.matrix(x)
+  method = control$method
+
+  if(method == 'Bayes')
+    fit = bhmFit(x, y, family, control)
+  else
+    fit = prolikFit(x, y, family, control)
+
+  fit$family = family
+  fit$call = match.call()
+  class(fit) = "bhm"
+  return(fit)
+}
+
+bhm.formula = function(formula, family, data=list(...), control=list(...), ...){
+  mf = model.frame(formula=formula, data=data)
+
+  x = model.matrix(attr(mf, "terms"), data = mf)
+  y = model.response(mf)
+
+  if (class(y) == "Surv") {
+    family = "surv";
+    st = sort(y[, 1], decreasing = TRUE, index.return = TRUE)
+    idx = st$ix
+    y = y[idx, ]
+    x = x[idx, ]
+  }
+
+  control = do.call("bhmControl", control)
+  n.col  = ncol(x)
+
+  # inverse cdf transformation of biomarker
+  w = x[, 2]
+  if((min(w) < 0) | (max(w) > 1)) {
+    x[, 2] = x.cdf(x[, 2])
+    transform = TRUE
+  } else transform = FALSE
+
+
+  #Fit a prognostic model with biomarker term only
+  if(n.col == 2) {
+    control$interaction = FALSE
+  }
+
+  # covariate name for interaction term
+  if(control$interaction) {
+    int_names = paste(colnames(x)[2], ":", colnames(x)[3], sep="")
+
+    # Check if there is a main effect for biomarker variable
+    if(control$biomarker.main){
+      var_names =  c(colnames(x)[1:3], int_names)
+      x1 = cbind(x[, 1:3], x[, 2]*x[, 3])
+      if(n.col>3) {
+        var_names = c(var_names, colnames(x)[4:n.col])
+        x1 = cbind(x1, x[4:n.col, ])
+      }
+      x = x1
+      colnames(x) = var_names
+    } else {
+      colnames(x)[2] = int_names
+    }
+  }
+  #print(x[1:5, ])
+
+  n.col = length(x[1, ])
+  if(family == "surv"){
+    n.col = n.col - 1
+  }
+
+  if (length(control$sigma0.inv) == 1 & n.col > 1)
+    control$sigma0.inv = diag(rep(control$sigma0.inv, n.col))
+
+  if (length(control$beta0) == 1)
+    control$beta0 = rep(control$beta0, n.col)
+
+  fit = bhm.default(x, y, family, control, ...)
+
+  # transoform the value of biomarker back to original value
+  if(transform) {
+    fit$c.max = quantile(w, fit$c.max)
+    qtlName = rownames(fit$cqtl)
+    fit$cqtl = matrix(quantile(w, fit$cqtl), nrow = 2)
+    rownames(fit$cqtl) = qtlName
+  }
+
+  fit$call = match.call()
+  fit$formula = formula
+  return(fit)
+}
+
+bhmControl=function(method = 'Bayes', interaction = TRUE, biomarker.main = TRUE, alpha = 0.05, B=50, R=100, thin = 1, epsilon = 0.01, c.n = 1, beta0=0, sigma0 = 10000) {
+
+  if(method != 'profile' && method != 'Bayes')
+    stop("Please use either 'Bayes' or 'profile' method for model fit")
+  if (!is.numeric(B) || B <= 0) 
+    stop("value of 'B' must be > 0")
+  if ((!is.numeric(R) || R <= 0)&&method == 'Bayes')
+    stop("number of replication 'R' must be > 0")
+
+  if (!is.logical(biomarker.main))
+    stop("'biomarker.main' must be a logical value: TURE/FALSE") 
+  if (!is.logical(interaction))
+    stop("'interaction' must be a logical value: TURE/FALSE")
+  if (interaction == FALSE && biomarker.main == FALSE) {
+    cat('Interaction = FALSE, biomarker.main effect reset to TRUE\n')
+    biomarker.main = TRUE
+  }
+
+  if (!is.numeric(alpha) || alpha <= 0||alpha>=1) 
+    stop("number of replication 'alpha' must be between 0 and 1")
+  if (c.n > 2 || c.n < 1)
+    stop("number of cutpoints 'c.n' must be either 1 or 2")
+  if (!is.numeric(sigma0) || sigma0 <= 0) 
+    stop("value of 'sigma' [varince for beta prior] must be > 0")
+  sigma0.inv = solve(sigma0)
+
+  return(list(method = method,  interaction = interaction, biomarker.main = biomarker.main, B=B, R=R, thin = thin, epsilon = epsilon, alpha = alpha, c.n=c.n, beta0 = beta0, sigma0.inv = sigma0.inv))
+}
+
+summary.bhm=function(object,...){
+  x = object
+  family = x$family
+  c.max = x$c.max
+
+  if (family == "binomial") {
+    TAB1<-t(rbind(Estimate=x$coefficients,StdErr=x$StdErr,CredibleInterval=x$coefqtl))
+    rownames(TAB1) = x$var_names
+    TAB2<-t(rbind(Estimate=x$c.max,CredibleInterval=x$cqtl))
+
+    #threshold parameter
+    if (length(TAB2[, 1]) == 2) {
+      rownames(TAB2)=c("lower","higer")
+    } else {
+      rownames(TAB2)=c("cut point")
+    }
+  }
+
+  if (family == "surv") {
+    TAB1<-t(rbind(Estimate=x$coefficients,StdErr=x$StdErr,CredibleInterval=x$coefqtl))
+    TAB2<-t(rbind(Estimate=x$c.max,CredibleInterval=x$cqtl))
+  }
+  results = list(call=x$call,TAB1=TAB1, TAB2=TAB2, c.fit=x$c.fit, c.max = x$c.max, var_names = x$var_names)
+
+  class(results)<-"summary.bhm"
+  return(results)
+}
+
+print.summary.bhm<-function(x,...){
+  cat("Call:\n")
+  print(x$call)
+  cat("\nRegression coefficients:\n")
+
+
+  printCoefmat(x$TAB1,digits=4,P.values=FALSE)
+
+  bname=x$var_names[1]
+  cat('\n', bname, 'biomarker threshold:\n')
+  printCoefmat(x$TAB2,digits=4,P.values=FALSE)
+  c.max = round(x$c.max*10000)/10000
+  cat('\nConditional regression coefficients given', bname, 'biomarker = ', c.max, '\n')
+  print(summary(x$c.fit))
+}
+
+print.bhm = function(x,...) {
+   c.max = round(x$c.max*10000)/10000
+   bname = x$var_names[1]
+   cat("Call:\n")
+   print(x$call)
+   cat("\nCoefficients:\n")
+   print(x$coefficients)
+   cat("\n", bname, "Thresholds:\n")
+   print(c.max)
+   cat('\nConditional regression coefficient given ', bname, 'biomarker = ', c.max, '\n')  
+   print(x$c.fit)
+}
diff --git a/R/bhm_fit.R b/R/bhm_fit.R
@@ -0,0 +1,121 @@
+#MCMC for Gibbs sampling
+bhmGibbs<-function(x, y, family, beta, q, cx, control){
+  c.n = control$c.n
+
+  #generate cut point cx
+  lik = thm.lik(x, y, family, beta, q, cx, control)
+  d0 = 0.025
+  if(c.n==2){
+    rpt = TRUE
+    D   = min(d0, (cx[2]-cx[1])/3)
+    D1  = min(d0, cx[1]/2)
+    D2  = min(d0, (1-cx[2])/2)
+
+    while (rpt) {
+      cu = c(runif(1, cx[1]-D1, cx[1]+D), runif(1, cx[2]-D, cx[2]+D2))
+      if(cu[2] - cu[1] < 0.05) rpt = TRUE
+      else {
+        fit = thm.fit(x, y, family, cu)
+        rpt = !(fit$converged)
+      }
+    }
+  } else {
+    D = min(d0, cx/2, (1-cx)/2)
+    cu = runif(1, max(0.05, cx-D), min(cx+D, 0.95))
+  }
+  lik1 = thm.lik(x, y, family, beta, q, cu, control)
+  alpha1 = exp(lik1 - lik)
+  if(runif(1, 0, 1) < alpha1) {
+    cx = cu
+    lik = lik1
+  }
+
+#generate beta value using Metropolis-Hasting algorithm. 
+#Candidate value is obtained from the fitted model with 
+#current iteraction of cut points.
+  fit = thm.fit(x, y, family, cx)
+  #fixed the error that candidate shall be phi(betai^*|beta^k), can not use beta_hat
+  #bhat = fit$coefficient
+  vb = vcov(fit)
+  A = chol(vb)
+  b1 = beta + t(A)%*%rnorm(length(beta), 0, 1)
+  lik1 = thm.lik(x, y, family, b1, q, cx, control)
+  # Note that prior of beta ~ phi(beta|beta0) was calculated in thm.lik()
+  alpha2 = exp(lik1 - lik)
+  if(runif(1, 0, 1) < alpha2) {
+    beta = b1
+    lik = lik1
+  }
+
+#generate hyper parameter q
+  if (c.n == 2) {
+    sc1 =  log(cx[2]/(cx[2]-cx[1]))
+    sc2 = -log(1-cx[2])
+    q = c(1+rgamma(1,shape=1,scale=sc1), 1+rgamma(1,shape=1,scale=sc2))   
+  } else {
+    scale = -log(1-cx)
+    q = 1+rgamma(1, 2, scale = scale)
+  }
+  return(list(cx=cx, beta=beta, q=q, lik=lik))
+}
+
+# fit the main threshold model
+bhmFit = function(x, y, family, control){
+  R   = control$R
+  c.n = control$c.n
+  tn  = control$thin
+  var_names = colnames(x)
+  if(c.n==1) c_names = c('c') else c_names=c('c1', 'c2')
+
+# use profile likelihood method to get initial value of cut-points
+  pfit = pro.fit(x, y, family, control=list(R = 0, epsilon=0.02, c.n = c.n))
+
+# generate initial values for parameters
+  g = list(cx = pfit$c.max, beta = pfit$coefficient, q = rep(2, c.n))
+
+#replication from 1 to B (burn-in)
+  for (i in 1:control$B) g = bhmGibbs(x, y, family, g$beta, g$q, g$cx, control)
+
+#replications from B+1 to R(total length of Markov Chain is B+R)
+  R1 = R*tn
+  bg = matrix(NaN, R, length(g$beta))
+  cg = matrix(NaN, R, c.n)
+  qg = matrix(NaN, R, c.n)
+  i = 1
+  for (j in 1:R1){
+    g = bhmGibbs(x, y, family, g$beta, g$q, g$cx, control)
+
+    if(j%%tn == 0){ 
+      qg[i, ] = g$q
+      cg[i, ] = g$cx
+      bg[i, ] = g$beta
+      i = i + 1
+    }
+  }
+
+#estimates and credible interval for the thresholds
+  c.max= apply(cg,2,mean)
+  c.fit= thm.fit(x, y, family, c.max)
+  alpha = control$alpha/2
+  ptl  = c(alpha, 1-alpha)
+  cqtl = apply(cg, 2, quantile, ptl)
+
+#estimates for the regression coefficients
+  coef= apply(bg,2,mean)
+  coefqtl<-apply(bg,2,quantile,ptl)
+
+  vcov<-cov(bg)
+  if (family == "surv") var_names = var_names[-1]
+  colnames(cg)   =   c_names
+  colnames(bg)   = var_names
+  colnames(vcov) = var_names
+  rownames(vcov) = var_names
+  se<-sqrt(diag(vcov))
+
+  cg = mcmc(cg)
+  bg = mcmc(bg)
+  coefqtl = t(HPDinterval(bg))
+
+  fit = list(cg=cg,bg=bg,qg=qg,c.max=c.max,cqtl=cqtl,coefficients=coef,coefqtl=coefqtl,vcov=vcov,StdErr=se,var_names=var_names, c.fit = c.fit)
+  return(fit)
+}