Permalink
Cannot retrieve contributors at this time
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1329 lines (1171 sloc)
61.4 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| make.swt <- function(I=NULL,J=NULL,H=NULL,K,design="cross-sec",mu=NULL,b.trt, | |
| b.time=NULL,sigma.y=NULL,sigma.e=NULL,rho,sigma.a=NULL,rho.ind=NULL, | |
| sigma.v=NULL,X=NULL,family="gaussian",natural.scale=TRUE) { | |
| ## Normal outcome | |
| ## inputs: | |
| # I = number of clusters | |
| # J = number of randomisation time points (excluding baseline) | |
| # K = average sample size in each cluster | |
| # H = number of units randomised at each time point | |
| # design = type of design. Can be "cross-sec" (default) or "cohort" (repeated measurements) | |
| # mu = baseline mean | |
| # b.trt = intervention effect | |
| # b.time = (optional) time effect (on linear time trend scale!) | |
| # sigma.y = *total* individual variability | |
| # sigma.e = *residual* individual variability | |
| # rho = ICC at the cluster level | |
| # sigma.a = the sd of the cluster random effect (default at NULL) | |
| # rho.ind = ICC at the individual level (for cohort design, default at NULL) | |
| # sigma.v = the sd of the treatment random effect (default at NULL) | |
| # X = SW design matrix (default at NULL and will be computed automatically) | |
| # family = type of outcome to be simulated (options are "gaussian", "binomial" and "poisson") | |
| # natural.scale = whether the input values for the trt effect etc are passed on the natural scale or not | |
| # | |
| # GB + Rosalind Leach (November 2015) | |
| # CHECK ON VALID FAMILY TYPE | |
| flag=1 | |
| if(family=="gaussian" | family=="binomial" | family=="poisson"){flag=0} | |
| if(flag==1){ stop("Available options for the argument 'family' are: gaussian, binomial or poisson")} | |
| # CREATE DESIGN MATRIX FROM sw.design.mat IF X HAS NOT BEEN PROVIDED | |
| if(is.null(X)) {X <- sw.design.mat(I=I,J=J,H=NULL)} else {row.names(X) <- sample(1:I,I)} | |
| if(family == "gaussian"){ | |
| # SETS DEFAULT FOR mu AND b.time IF THEY HAVE NOT BEEN PORVIDED | |
| if(is.null(mu)) {mu=0} # Baseline (set to 0 if not specified) | |
| if(is.null(b.time)) { # Time effect (if not specified, set to) | |
| b.time <- .5*b.trt } # some pre-defined proportion of the | |
| # SIMULATES VARIABLES FROM THE INPUTS GIVEN | |
| treatment <- rep(t(X),each=K,1) # Treatment allocation at each time point | |
| time <- rep(seq(0,J),each=K,I) # K measurements during the trial | |
| person <- seq(1,K) # individuals IDs | |
| cluster.order <- rep(sample(1:I)) # order at which clusters switch | |
| cluster <- rep(cluster.order, each=(J+1)*K) # cluster IDs (randomised) | |
| id <- NULL | |
| # Calculate the hyperparameters | |
| mu.a <- 0 # Within cluster (random intercept) mean | |
| # CROSS-SECTIONAL DESIGN ------------------------------------------------- | |
| if(design=="cross-sec") { | |
| # CHECK THE RIGHT COMBINATION OF sigma.e, sigma.y, sigma.a, rho HAS BEEN GIVEN | |
| if((sum(sapply(list(sigma.y,sigma.e),is.null)) !=1) & (sum(sapply(list(sigma.a,rho),is.null)) !=1)) { # Need to pass 1 of 'sigma.y' or 'sigma.e' and 1 of 'sigma.a' or 'rho | |
| stop("Please provide either 'sigma.y' (*total* individual variability) or 'sigma.e' (*residual* variability) | |
| and either 'sigma.a' (*within cluster* variability) or 'rho' (ICC)")} | |
| # IF STATEMENTS TO CALCULATE ALL RELEVANT PARAMETERS FROM THE INPUTS GIVEN | |
| if(is.null(sigma.e) & is.null(sigma.a)) { | |
| sigma.a <- rho*sigma.y | |
| sigma.e <- sqrt(sigma.y^2-sigma.a^2) | |
| } | |
| if(is.null(sigma.e) & is.null(rho)) { | |
| sigma.e <- sqrt(sigma.y^2-sigma.a^2) | |
| rho <- sigma.a^2/sigma.y^2 | |
| } | |
| if(is.null(sigma.y) & is.null(sigma.a)) { | |
| sigma.a <- sqrt(sigma.e^2*rho/(1-rho)) | |
| sigma.y <- sqrt(sigma.a^2+sigma.e^2) | |
| } | |
| if(is.null(sigma.y) & is.null(rho)) { | |
| sigma.y <- sqrt(sigma.e^2+sigma.a^2) | |
| rho <- sigma.a^2/sigma.y^2 | |
| } | |
| # SIMULATE REMAINING VARIABLES | |
| a <- rnorm(I, mu.a, sigma.a) # cluster-level intercept | |
| # CALCULATE LINEAR PREDICTOR | |
| linpred <- mu + a[cluster] + b.trt*treatment + b.time*time # Calculates the linear predictor | |
| } | |
| # COHORT DESIGN ------------------------------------------------- | |
| if (design=="cohort") { | |
| # CHECK THE RIGHT COMBINATION OF sigma.e, sigma.y, sigma.a, rho HAS BEEN GIVEN | |
| if((sum(sapply(list(sigma.y,sigma.e),is.null)) !=1) || (sum(sapply(list(sigma.a,rho),is.null)) !=1) || | |
| (sum(sapply(list(sigma.v,rho.ind),is.null)) !=1)) { | |
| stop("Please provide either 'sigma.y' (*total* individual variability) or 'sigma.e' (*residual* variability) | |
| AND either 'sigma.a' (*within cluster* variability) or 'rho' (cluster-level ICC) | |
| AND either 'sigma.v' (*within cluster-individual* variability) or 'rho.ind (individual-level ICC)") } | |
| # IF STATEMENTS TO CALCULATE ALL RELEVANT PARAMETERS FROM THE INPUTS GIVEN | |
| if(is.null(sigma.y) & is.null(sigma.a) & is.null(sigma.v)) { | |
| r1 <- rho.ind/(1-rho.ind) | |
| r2 <- rho/(1-rho) | |
| sigma.v <- sqrt((r1*(sigma.e^2/(1-rho)))/(1-r1*r2)) | |
| sigma.a <- sqrt(r2*(sigma.e^2+sigma.v^2)) | |
| sigma.y <- sqrt(sigma.e^2+sigma.a^2+sigma.v^2) | |
| } | |
| if(is.null(sigma.y) & is.null(rho) & is.null(sigma.v)) { | |
| r <- rho.ind/(1-rho.ind) | |
| sigma.v <- sqrt(r*(sigma.a^2+sigma.e^2)) | |
| rho <- sigma.a^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| sigma.y <- sqrt(sigma.a^2+sigma.e^2+sigma.v^2) | |
| } | |
| if(is.null(sigma.y) & is.null(sigma.a) & is.null(rho.ind)) { | |
| r <- rho/(1-rho) | |
| sigma.a <- sqrt(r*(sigma.v^2+sigma.e^2)) | |
| sigma.y <- sqrt(sigma.a^2+sigma.e^2+sigma.v^2) | |
| rho.ind <- sigma.v^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| } | |
| if(is.null(sigma.y) & is.null(rho) & is.null(rho.ind)) { | |
| rho <- sigma.a^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| rho.ind <- sigma.v^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| sigma.y <- sqrt(sigma.a^2+sigma.e^2+sigma.v^2) | |
| } | |
| if(is.null(sigma.e) & is.null(sigma.a) & is.null(sigma.v)) { | |
| sigma.a <- sqrt(sigma.y^2*rho) | |
| sigma.v <- sqrt(sigma.y^2*rho.ind) | |
| sigma.e <- sqrt(sigma.y^2-sigma.a^2-sigma.v^2) | |
| } | |
| if(is.null(sigma.e) & is.null(rho) & is.null(sigma.v)) { | |
| rho <- sigma.a^2/sigma.y^2 | |
| sigma.v <- rho.ind/sigma.y^2 | |
| sigma.e <- sqrt(sigma.y^2-sigma.a^2-sigma.v^2) | |
| } | |
| if(is.null(sigma.e) & is.null(sigma.a) & is.null(rho.ind)) { | |
| sigma.a <- sqrt(rho*sigma.y^2) | |
| rho.ind <- sigma.v^2/sigma.y^2 | |
| sigma.e <- sqrt(sigma.y^2-sigma.a^2-sigma.v^2) | |
| } | |
| if(is.null(sigma.e) & is.null(rho) & is.null(rho.ind)) { | |
| rho <- sigma.a^2/sigma.y^2 | |
| rho.ind <- sigma.v^2/sigma.y^2 | |
| sigma.e <- sqrt(sigma.y^2-sigma.a^2-sigma.v^2) | |
| } | |
| # SIMULATE REMAINING VARIABLES | |
| start <- seq(1,I*K,by=K) | |
| id <- numeric # Individual ID (for repeated measurements) | |
| id <- rep(seq(start[cluster.order[1]],start[cluster.order[1]]+K-1),(J+1)) | |
| for (i in 2:I) { | |
| id <- c(id,rep(seq(start[cluster.order[i]],start[cluster.order[i]]+K-1),(J+1))) | |
| } | |
| mu.v <- 0 # within individuals (random intercept) mean | |
| v <- rnorm(K*I,mu.v,sigma.v) # individual-level intercept | |
| sigma.a <- sqrt(sigma.y^2*rho) # Within cluster (random intercept) sd | |
| a <- rnorm (I, mu.a, sigma.a) # cluster-level intercept | |
| # CALCULATE LINEAR PREDICTOR | |
| linpred <- mu + a[cluster] + v[id] + b.trt*treatment + b.time*time | |
| } | |
| # SIMULATES DATA | |
| n <- I*(J+1)*K | |
| y <- rnorm(I*(J+1)*K, linpred, sigma.e) | |
| } | |
| if(family=="binomial"){ | |
| # SETS DEFAULT FOR mu AND b.time IF THEY HAVE NOT BEEN PORVIDED | |
| if(is.null(mu)) { | |
| if (natural.scale==TRUE) { | |
| mu=.5 } # Default baseline probability on [0-1] | |
| else if (natural.scale==FALSE) { | |
| mu=log(.5/.5) } # Default baseline probability on logit scale | |
| } | |
| # SIMULATES VARIABLES FROM THE INPUTS GIVEN | |
| treatment <- rep(t(X),each=K,1) # Treatment allocation at each time point | |
| time <- rep(seq(0,J),each=K,I) # K measurements during the trial | |
| person <- seq(1,K) # individuals IDs | |
| cluster.order <- rep(sample(1:I)) # order at which clusters switch | |
| cluster <- rep(cluster.order, each=(J+1)*K) # cluster IDs (randomised) | |
| id <- NULL | |
| # If natural.scale = T, then assume that the user is giving values for p0 (mu) and OR directly (b.trt) | |
| if (natural.scale==TRUE) { | |
| OR <- b.trt # for simplicity defines the OR | |
| b.trt <- log(b.trt) # logOR to be used in the linear predictor | |
| p0 <- mu # for simplicity defines the baseline probability | |
| mu <- log(mu/(1-mu)) # Converts baseline probability to log odds to be used in the linear predictor | |
| } | |
| # But if natural.scale = F, then the user has passed data on the logit scale for p0 (mu) and on the logOR (b.trt) | |
| if (natural.scale==FALSE) { | |
| OR <- exp(b.trt) # Defines the OR on the natural scale | |
| p0 <- exp(mu)/(1+exp(mu)) # Rescales the baseline to the [0-1] range | |
| } | |
| # Now defines p1 and sigma.e | |
| p1 <- OR*(p0/(1-p0))/(1+OR*(p0/(1-p0))) # Estimates the outcome probability for intervention | |
| sigma.e <- sqrt(((p0*(1-p0))+(p1*(1-p1)))/2) # Pooled estimate of the within cluster sd | |
| if(is.null(b.time)) { # Time effect (if not specified, set to) | |
| b.time <- .5*b.trt } # some pre-defined proportion of the treatment effect | |
| # CALCULATE THE HYPERPARAMETERS | |
| mu.a <- 0 # Within cluster (random intercept) mean | |
| # CROSS-SECTIONAL DESIGN ------------------------------------------------- | |
| if(design=="cross-sec") { | |
| # CHECK THE RIGHT COMBINATION OF sigma.a, rho HAS BEEN GIVEN | |
| if (sum(sapply(list(rho,sigma.a),is.null)) !=1) { #cannot provide both sigma.a and rho | |
| stop("exactly one of 'rho' and 'sigma.a' must be null") } | |
| # IF STATEMENTS TO CALCULATE ALL RELEVANT PARAMETERS FROM THE INPUTS GIVEN | |
| if(is.null(sigma.a)){ # if sigma.a is not given, calculate it as a function of rho | |
| sigma.a <- sqrt(sigma.e^2*rho/(1-rho)) } # Within cluster (random intercept) sd | |
| if(is.null(rho)){ # if sigma.a is not given, calculate it as a function of rho | |
| rho <- (sigma.a^2/(sigma.e^2 + sigma.a^2)) } # if rho is not given, calculate it as a function of sigma.a | |
| # SIMULATE REMAINING VARIABLES | |
| a <- rnorm(I, mu.a, sigma.a) # cluster-level intercept | |
| # CALCULATE LINEAR PREDICTOR ON THE LOGIT SCALE! | |
| linpred <- mu + a[cluster] + b.trt*treatment + b.time*time | |
| } | |
| # COHORT DESIGN ------------------------------------------------- | |
| if (design=="cohort") { | |
| # CHECK THE RIGHT COMBINATION OF sigma.a, sigma.v, rho, rho.ind HAS BEEN GIVEN | |
| if((sum(sapply(list(rho, sigma.a),is.null)) !=1) || (sum(sapply(list(sigma.a,rho),is.null)) !=1) || | |
| (sum(sapply(list(sigma.v,rho.ind),is.null)) !=1)) { | |
| stop("Please provide either 'sigma.a' (*within cluster* variability) or 'rho' (cluster-level ICC) | |
| AND either 'sigma.v' (*within cluster-individual* variability) or 'rho.ind (individual-level ICC)") } | |
| # IF STATEMENTS TO CALCULATE ALL RELEVANT PARAMETERS FROM THE INPUTS GIVEN | |
| if(is.null(sigma.v) & is.null(sigma.a)) { | |
| r1 <- rho.ind/(1-rho.ind) | |
| r2 <- rho/(1-rho) | |
| sigma.v <- sqrt((r1*(sigma.e^2/(1-rho)))/(1-r1*r2)) | |
| sigma.a <- sqrt(r2*(sigma.e^2+sigma.v^2)) | |
| } | |
| if(is.null(rho.ind) & is.null(sigma.a)) { | |
| r <- rho/(1-rho) | |
| sigma.a <- sqrt(r*(sigma.v^2+sigma.e^2)) | |
| #rho.ind <- | |
| } | |
| if(is.null(rho.ind) & is.null(rho)) { | |
| rho <- sigma.a^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| rho.ind <- sigma.v^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| } | |
| if(is.null(rho) & is.null(sigma.v)) { | |
| r <- rho.ind/(1-rho.ind) | |
| sigma.v <- sqrt(r*(sigma.a^2+sigma.e^2)) | |
| rho <- sigma.a^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| } | |
| # SIMULATE REMAINING VARIABLES | |
| start <- seq(1,I*K,by=K) | |
| id <- numeric # Individual ID (for repeated measurements) | |
| id <- rep(seq(start[cluster.order[1]],start[cluster.order[1]]+K-1),(J+1)) | |
| for (i in 2:I) { | |
| id <- c(id,rep(seq(start[cluster.order[i]],start[cluster.order[i]]+K-1),(J+1))) | |
| } | |
| mu.v <- 0 # within individuals (random intercept) mean | |
| v <- rnorm(K*I,mu.v,sigma.v) # individual-level intercept | |
| ### sigma.a <- sqrt(sigma.y^2*rho) # Within cluster (random intercept) sd | |
| a <- rnorm (I, mu.a, sigma.a) # cluster-level intercept | |
| # CALCULATE LINEAR PREDICTOR ON THE LOGIT SCALE! | |
| linpred <- mu + a[cluster] + v[id] + b.trt*treatment + b.time*time | |
| } | |
| # SIMULATES DATA | |
| n <- I*(J+1)*K | |
| # Converts log odds back to probability | |
| p = (exp(linpred))/(1+exp(linpred)) | |
| y <- rbinom(n, 1, p) | |
| } | |
| if(family == "poisson"){ | |
| # SETS DEFAULT FOR mu AND b.time IF THEY HAVE NOT BEEN PROVIDED | |
| if(is.null(mu)) { | |
| if (natural.scale==TRUE) { | |
| mu=1} # Default baseline rate set to 1 | |
| else if (natural.scale==FALSE) { | |
| mu=log(1)} # Default baseline rate on log scale | |
| } | |
| # SIMULATES VARIABLES FROM THE INPUTS GIVEN | |
| treatment <- rep(t(X),each=K,1) # Treatment allocation at each time point | |
| time <- rep(seq(0,J),each=K,I) # K measurements during the trial | |
| person <- seq(1,K) # individuals IDs | |
| cluster.order <- rep(sample(1:I)) # order at which clusters switch | |
| cluster <- rep(cluster.order, each=(J+1)*K) # cluster IDs (randomised) | |
| id <- NULL # initialise ID vector | |
| # If natural.scale = T, then assume that the user is giving values for p0 and OR directly | |
| if (natural.scale==TRUE) { | |
| RR <- b.trt # For convenience defines the RR | |
| b.trt <- log(b.trt) # logRR to be used in the linear predictor | |
| lambda0 <- mu # For convenience defines the baseline rate | |
| mu <- log(mu) # Converts baseline probability to log rate to be used in the linear predictor | |
| } | |
| # But if natural.scale = F, then the user has passed data on the logit scale for p0 and on the logOR | |
| if (natural.scale==FALSE) { | |
| RR <- exp(b.trt) # Defines the RR on the natural scale | |
| lambda0 <- exp(mu) # Rescales the baseline rate to the [0-\infty] range | |
| } | |
| lambda1 <- lambda0*RR # rate for intervention | |
| sigma.e <- sqrt((lambda0+lambda1)/2) # Within cluster sd (estimated using pooled value) | |
| if(is.null(b.time)) { # Time effect (if not specified, set to) | |
| b.time <- .5*b.trt } # some pre-defined proportion of the | |
| # CALCULATE THE HYPERPARAMETERS | |
| mu.a <- 0 # Within cluster (random intercept) mean | |
| # CROSS-SECTIONAL DESIGN ------------------------------------------------- | |
| if(design=="cross-sec") { | |
| # CHECK THE RIGHT COMBINATION OF sigma.a, rho HAS BEEN GIVEN | |
| if (sum(sapply(list(rho,sigma.a),is.null)) !=1) { #cannot provide both sigma.a and rho | |
| stop("exactly one of 'rho' and 'sigma.a' must be null") } | |
| # IF STATEMENTS TO CALCULATE ALL RELEVANT PARAMETERS FROM THE INPUTS GIVEN | |
| if(is.null(sigma.a)){ # if sigma.a is not given, calculate it as a function of rho | |
| sigma.a <- sqrt(sigma.e^2*rho/(1-rho)) } # Within cluster (random intercept) sd | |
| if(is.null(rho)){ # if sigma.a is not given, calculate it as a function of rho | |
| rho <- (sigma.a^2/(sigma.e^2 + sigma.a^2)) } # if rho is not given, calculate it as a function of sigma.a | |
| # SIMULATE REMAINING VARIABLES | |
| a <- rnorm(I, mu.a, sigma.a) # cluster-level intercept | |
| # CALCULATE LINEAR PREDICTOR ON THE LOG SCALE | |
| linpred <- mu + a[cluster] + b.trt*treatment + b.time*time | |
| } | |
| # COHORT DESIGN ------------------------------------------------- | |
| if (design=="cohort") { | |
| # CHECK THE RIGHT COMBINATION OF sigma.a, rho, rho.ind HAS BEEN GIVEN | |
| if((sum(sapply(list(rho, sigma.a),is.null)) !=1) || (sum(sapply(list(sigma.a,rho),is.null)) !=1) || | |
| (sum(sapply(list(sigma.v,rho.ind),is.null)) !=1)) { | |
| stop("Please provide either 'sigma.a' (*within cluster* variability) or 'rho' (cluster-level ICC) | |
| AND either 'sigma.v' (*within cluster-individual* variability) or 'rho.ind (individual-level ICC)") } | |
| # IF STATEMENTS TO CALCULATE ALL RELEVANT PARAMETERS FROM THE INPUTS GIVEN | |
| if(is.null(sigma.v) & is.null(sigma.a)) { | |
| r1 <- rho.ind/(1-rho.ind) | |
| r2 <- rho/(1-rho) | |
| sigma.v <- sqrt((r1*(sigma.e^2/(1-rho)))/(1-r1*r2)) | |
| sigma.a <- sqrt(r2*(sigma.e^2+sigma.v^2)) | |
| } | |
| if(is.null(rho.ind) & is.null(sigma.a)) { | |
| r <- rho/(1-rho) | |
| sigma.a <- sqrt(r*(sigma.v^2+sigma.e^2)) | |
| #rho.ind <- | |
| } | |
| if(is.null(rho.ind) & is.null(rho)) { | |
| rho <- sigma.a^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| rho.ind <- sigma.v^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| } | |
| if(is.null(rho) & is.null(sigma.v)) { | |
| r <- rho.ind/(1-rho.ind) | |
| sigma.v <- sqrt(r*(sigma.a^2+sigma.e^2)) | |
| rho <- sigma.a^2/(sigma.a^2+sigma.e^2+sigma.v^2) | |
| } | |
| # SIMULATE REMAINING VARIABLES | |
| start <- seq(1,I*K,by=K) | |
| id <- numeric # Individual ID (for repeated measurements) | |
| id <- rep(seq(start[cluster.order[1]],start[cluster.order[1]]+K-1),(J+1)) | |
| for (i in 2:I) { | |
| id <- c(id,rep(seq(start[cluster.order[i]],start[cluster.order[i]]+K-1),(J+1))) | |
| } | |
| mu.v <- 0 # within individuals (random intercept) mean | |
| v <- rnorm(K*I,mu.v,sigma.v) # individual-level intercept | |
| ### sigma.a <- sqrt(sigma.y^2*rho) # Within cluster (random intercept) sd | |
| a <- rnorm (I, mu.a, sigma.a) # cluster-level intercept | |
| # CALCULATE LINEAR PREDICTOR | |
| linpred <- mu + a[cluster] + v[id] + b.trt*treatment + b.time*time | |
| } | |
| # SIMULATES DATA | |
| n <- I*(J+1)*K | |
| y <- rpois(I*(J+1)*K, lambda=exp(linpred)) | |
| } | |
| # RETURNS DATA FRAME | |
| data <- data.frame(y, person, time, cluster, treatment, linpred, b.trt) | |
| if (design=="cohort") {data <- data.frame(data,id)} | |
| return(data) | |
| } | |
| sim.power <- function (I,J,H=NULL,K,design="cross-sec",mu=0,b.trt,b.time=NULL, | |
| sigma.y=NULL,sigma.e=NULL,rho=NULL,sigma.a=NULL, | |
| rho.ind=NULL,sigma.v=NULL,n.sims=1000,formula=NULL, | |
| family="gaussian",natural.scale=TRUE,sig.level=0.05,n.cores=NULL, | |
| method="lme",plot=FALSE,...) { | |
| ## Power analysis for repeated cohort design (cross-sectional) - continuous outcome | |
| ## The optional arguments include the specification of a user-defined function data | |
| ## which is used to simulate the data, according to any design the user has. | |
| ## So including data=data, where data=function(){...} specifies the simulation | |
| ## of a suitable dataset allows the user to select *any* kind of model for data | |
| ## generation. If data has inputs, then these must be specified as a list | |
| ## inpts, to be passed as an extra argument to sim.sw.cont. | |
| ## In addition to this, the user should then select a suitable formula | |
| ## which is in line with the model used for the simulation of the virtual trial. | |
| ## Also, the name of the "treatment" variable can be specified as a string in the | |
| ## extra argument treat.name. If not present then it is assumed that the name of the | |
| ## treatment variable is, unsurprisingly, "treatment" | |
| ## method is default at 'lme' indicating that a linear mixed model using lmer will be | |
| ## used for the analysis. The user can also specify 'INLA' or 'inla' which performs | |
| ## the analysis in Bayesian framework with INLA | |
| exArgs <- list(...) | |
| requireNamespace("lme4",quietly=TRUE) | |
| requireNamespace("foreach",quietly=TRUE) | |
| requireNamespace("doParallel",quietly=TRUE) | |
| requireNamespace("parallel",quietly=TRUE) | |
| requireNamespace("iterators",quietly=TRUE) | |
| # If not specified, uses all cores available to the machine | |
| if(is.null(n.cores)) {n.cores <- parallel::detectCores()} | |
| # Usually a good idea to leave 1 core free for the OS to use | |
| doParallel::registerDoParallel(cores=n.cores-1) | |
| # Defines basic formulation for the model | |
| if(is.null(formula)){ | |
| formula=y~treatment+factor(time)+(1|cluster) | |
| if(design=="cohort") {formula <- update.formula(formula,.~.+(1|id))} | |
| # Now updates the formula so it's given in INLA notation if method==INLA | |
| if (method=="INLA" || method=="inla") { | |
| # Checks the the package INLA is installed | |
| if (!isTRUE(requireNamespace("INLA", quietly = TRUE))) { | |
| stop("You need to install the packages 'INLA'. Please run in your R terminal:\n install.packages('INLA', repos='http://www.math.ntnu.no/inla/R/stable')") | |
| } else { | |
| if (!is.element("INLA", (.packages()))) { | |
| attachNamespace("INLA") | |
| } | |
| } | |
| # This substitutes the notation '(1 | var)' to the notation 'f(var)'. Notice that INLA will assume that | |
| # this actually means 'f(var,model="iid")' --- if specific prior are required, they need to be passed | |
| # directly within the formula (which can't be left as NULL), using INLA notation! | |
| formula <- as.formula(gsub("\\(1 \\| ","f(",deparse(formula))) | |
| } | |
| } | |
| # Defines the name of the treatment variable (for which the main analysis is performed) | |
| if(exists("treat.name",where=exArgs)) {treatment <- exArgs$treat.name} else {treatment <- "treatment"} | |
| res <- list() | |
| tic <- proc.time() | |
| # For user-specified data generating processes | |
| if(exists("data",where=exArgs)) { | |
| func <- exArgs$data | |
| if(exists("inpts",where=exArgs)) {inpts=exArgs$inpts} else {inpts=list()} | |
| if (method=="INLA" || method=="inla") { | |
| # INLA-specific options --- can be used to set priors & stuff | |
| # 'control.fixed' can be used to set the priors for the fixed effects in the model | |
| if(exists("control.fixed",where=exArgs)) { | |
| control.fixed <- exArgs$control.fixed | |
| } else { | |
| control.fixed <- INLA::inla.set.control.fixed.default() | |
| # prior mean = 0 for *all* fixed effects | |
| # prior var = 1000 for *all* fixed effects | |
| # prior mean = 0 for the intercept | |
| # prior prec -> 0 for the intercept | |
| } | |
| # 'offset' can be used with Poisson model. This needs to be a string with the name of the relevant variable | |
| if(exists("offset",where=exArgs)) {offset <- exArgs$offset} else {offset <- NULL} | |
| # 'Ntrials' is the Binomial denominator. This needs to be a string with the name of the relevant variable | |
| if(exists("Ntrials",where=exArgs)) {Ntrials <- exArgs$Ntrials} else {Ntrials <- NULL} | |
| res <- foreach::foreach(iterators::icount(n.sims), .combine=cbind, .packages="INLA", | |
| .export=c("make.swt","sw.design.mat")) %dopar% { | |
| fake.data <- do.call(what=func,args=inpts) | |
| inla.args <- list(formula,data=fake.data,family=family,control.fixed=control.fixed) | |
| if (!is.null(offset)) {inla.args$offset <- fake.data[[offset]]} | |
| if (!is.null(Ntrials)) {inla.args$Ntrials <- fake.data[[Ntrials]]} | |
| m <- do.call(INLA::inla,args=inla.args) | |
| theta <- m$summary.fixed[treatment,1] | |
| sd.theta <- m$summary.fixed[treatment,2] | |
| ## This assumes that alpha=0.05 and so can consider the 95% CrI | |
| ci.fixed <- m$summary.fixed[treatment,c(3,5)] | |
| lower.lim.check <- ci.fixed[,1]>0 | |
| upper.lim.check <- ci.fixed[,2]<0 | |
| ## Needs to simulate values from the posterior of theta and then get the quantiles if alpha neq 0.05 | |
| signif <- as.numeric(((sign(theta)==1 & lower.lim.check==TRUE) | (sign(theta)==-1 & upper.lim.check==TRUE))) | |
| tval <- theta / sd.theta | |
| pvalue <- 2*pnorm(abs(tval), lower.tail = FALSE) | |
| # Random effects sds | |
| mat <- INLA::inla.contrib.sd(m)$hyper | |
| rnd.eff.sd <- as.matrix(mat[,1]) | |
| names(rnd.eff.sd) <- rownames(mat) | |
| list(power=signif,theta=theta,sd.theta=sd.theta, | |
| rnd.eff.sd=rnd.eff.sd,method=method,pvalue=pvalue) | |
| } | |
| } | |
| if (method=="lme") { | |
| # Each 10% (default) of the iterations are stored in the results list, labelled res_1, res_2, etc. | |
| res <- foreach::foreach(iterators::icount(n.sims), .combine=cbind, .packages="lme4", | |
| .export=c("make.swt","sw.design.mat")) %dopar% { | |
| fake.data <- do.call(what=func,args=inpts) | |
| # If the formula contains random effects, run lmer | |
| check.random.effect <- !is.null(findbars(formula)) | |
| if(check.random.effect==TRUE) { | |
| if(family=="gaussian") { | |
| m <- lme4::lmer(formula, data=fake.data) | |
| } else { | |
| m <- lme4::glmer(formula, data=fake.data,family=family) | |
| } | |
| which.treat <- which(names(fixef(m))==treatment) | |
| theta <- lme4::fixef(m)[which.treat] | |
| sd.theta <- summary(m)$coefficients[which.treat,2] | |
| Vcov <- vcov(m, useScale = FALSE) | |
| se <- sqrt(diag(Vcov)) | |
| betas <- lme4::fixef(m) | |
| tval <- betas / se | |
| ### NB: issue with p-values in random effect models (cannot determine df easily) | |
| ### An alternative (as recommended by Bates et al) is to use CIs instead of p-values | |
| ci.fixed <- confint(m,names(fixef(m)),method="Wald",level=1-sig.level) | |
| lower.lim.check <- ci.fixed[,1]>0 | |
| upper.lim.check <- ci.fixed[,2]<0 | |
| signif <- as.numeric(((sign(betas)==1 & lower.lim.check==TRUE) | (sign(betas)==-1 & upper.lim.check==TRUE))[2]) | |
| ### This estimates the p-value based on Normal approximation --- may not be too correct... | |
| ### See: http://mindingthebrain.blogspot.co.uk/2014/02/three-ways-to-get-parameter-specific-p.html | |
| pval <- 2*pnorm(abs(tval), lower.tail = FALSE) | |
| pvalue <- pval[which.treat] | |
| ###signif <- pvalue < alpha | |
| VC <- as.data.frame(lme4::VarCorr(m)) | |
| rnd.eff.sd <- VC[,which(colnames(VC)=="sdcor")] | |
| # Finds the rows that needs to be reported | |
| to.report <- c(which(is.na(VC[,3]==T), which(VC[,1]=="Residual"))) | |
| rnd.eff.sd <- VC[to.report,which(colnames(VC)=="sdcor")] | |
| names(rnd.eff.sd)[which(is.na(VC[to.report,3])==T)] <- paste(VC[to.report,"grp"],VC[to.report,"var1"]) | |
| names(rnd.eff.sd) <- gsub(" NA","",names(rnd.eff.sd)) | |
| if(family=="gaussian") {method <- "lmer"} else {method <- "glmer"} | |
| } | |
| # If it doesn't then do lm | |
| if (!check.random.effect) { | |
| if (family=="gaussian") { | |
| m <- lm(formula,data=fake.data) | |
| } else { | |
| m <- glm(formula, data=fake.data,family=family) | |
| } | |
| which.treat <- which(names(m$coefficients)==treatment) | |
| theta <- m$coefficients[which.treat] | |
| sd.theta <- summary(m)$coefficients[which.treat,2] | |
| se <- summary(m)$coefficients[,2] | |
| betas <- summary(m)$coefficients[,1] | |
| tval <- summary(m)$coefficients[,3] | |
| df <- summary(m)$df[2] | |
| pval <- summary(m)$coefficients[,4] | |
| pvalue <- pval[which.treat] | |
| signif <- pvalue < sig.level | |
| if(family=="gaussian") {method <- "lm"} else {method <- "glm"} | |
| rnd.eff.sd=NULL | |
| } | |
| list(power=signif,theta=theta,sd.theta=sd.theta, | |
| rnd.eff.sd=rnd.eff.sd,method=method,pvalue=pvalue) | |
| } | |
| } | |
| } | |
| # For standard SWT data generating processes (uses make.swt) | |
| if(!exists("data",where=exArgs)) { | |
| if(exists("X",where=exArgs)) { | |
| X=exArgs$X | |
| row.names(X) <- sample(1:I,I) | |
| colnames(X) <- c("Baseline",paste0("Time ",1:J)) | |
| } else { | |
| X=NULL | |
| } | |
| if (method=="INLA" || method=="inla") { | |
| # INLA-specific options --- can be used to set priors & stuff | |
| # 'control.fixed' can be used to set the priors for the fixed effects in the model | |
| if(exists("control.fixed",where=exArgs)) { | |
| control.fixed <- exArgs$control.fixed | |
| } else { | |
| control.fixed <- INLA::inla.set.control.fixed.default() | |
| # prior mean = 0 for *all* fixed effects | |
| # prior var = 1000 for *all* fixed effects | |
| # prior mean = 0 for the intercept | |
| # prior prec -> 0 for the intercept | |
| } | |
| # 'offset' can be used with Poisson model. This needs to be a string with the name of the relevant variable | |
| if(exists("offset",where=exArgs)) {offset <- exArgs$offset} else {offset <- NULL} | |
| # 'Ntrials' is the Binomial denominator. This needs to be a string with the name of the relevant variable | |
| if(exists("Ntrials",where=exArgs)) {Ntrials <- exArgs$Ntrials} else {Ntrials <- NULL} | |
| res <- foreach::foreach(iterators::icount(n.sims), .combine=cbind, .packages="INLA", | |
| .export=c("make.swt","sw.design.mat")) %dopar% { | |
| fake.data <- fake.data <- make.swt(I=I,J=J,H=H,K=K,design=design,mu=mu,b.trt=b.trt, | |
| b.time=b.time,sigma.y=sigma.y,sigma.e=sigma.e, | |
| rho=rho,sigma.a=sigma.a,rho.ind=rho.ind,sigma.v=sigma.v, | |
| X=X,family=family,natural.scale=natural.scale) | |
| inla.args <- list(formula,data=fake.data,family=family,control.fixed=control.fixed) | |
| if (!is.null(offset)) {inla.args$offset <- fake.data[[offset]]} | |
| if (!is.null(Ntrials)) {inla.args$Ntrials <- fake.data[[Ntrials]]} | |
| m <- do.call(INLA::inla,args=inla.args) | |
| theta <- m$summary.fixed[treatment,1] | |
| sd.theta <- m$summary.fixed[treatment,2] | |
| ## This assumes that alpha=0.05 and so can consider the 95% CrI | |
| ci.fixed <- m$summary.fixed[treatment,c(3,5)] | |
| lower.lim.check <- ci.fixed[,1]>0 | |
| upper.lim.check <- ci.fixed[,2]<0 | |
| ## Needs to simulate values from the posterior of theta and then get the quantiles if alpha neq 0.05 | |
| signif <- as.numeric(((sign(theta)==1 & lower.lim.check==TRUE) | (sign(theta)==-1 & upper.lim.check==TRUE))) | |
| tval <- theta / sd.theta | |
| pvalue <- 2*pnorm(abs(tval), lower.tail = FALSE) | |
| # Random effects sds | |
| mat <- INLA::inla.contrib.sd(m)$hyper | |
| rnd.eff.sd <- as.matrix(mat[,1]) | |
| names(rnd.eff.sd) <- rownames(mat) | |
| list(power=signif,theta=theta,sd.theta=sd.theta, | |
| rnd.eff.sd=rnd.eff.sd,method=method,pvalue=pvalue) | |
| } | |
| } | |
| if (method=="lme") { | |
| res <- foreach::foreach(iterators::icount(n.sims), .combine=cbind, .packages="lme4", | |
| .export=c("make.swt","sw.design.mat")) %dopar% { | |
| fake.data <- make.swt(I=I,J=J,H=H,K=K,design=design,mu=mu,b.trt=b.trt, | |
| b.time=b.time,sigma.y=sigma.y,sigma.e=sigma.e, | |
| rho=rho,sigma.a=sigma.a,rho.ind=rho.ind,sigma.v=sigma.v, | |
| X=X,family=family,natural.scale=natural.scale) | |
| # If the formula contains random effects, run lmer | |
| check.random.effect <- !is.null(findbars(formula)) | |
| if(check.random.effect==TRUE) { | |
| if(family=="gaussian") { | |
| m <- lme4::lmer(formula, data=fake.data) | |
| } else { | |
| m <- lme4::glmer(formula, data=fake.data,family=family) | |
| } | |
| which.treat <- which(names(fixef(m))==treatment) | |
| theta <- lme4::fixef(m)[which.treat] | |
| sd.theta <- summary(m)$coefficients[which.treat,2] | |
| Vcov <- vcov(m, useScale = FALSE) | |
| se <- sqrt(diag(Vcov)) | |
| betas <- lme4::fixef(m) | |
| tval <- betas / se | |
| ### NB: issue with p-values in random effect models (cannot determine df easily) | |
| ### An alternative (as recommended by Bates et al) is to use CIs instead of p-values | |
| ci.fixed <- confint(m,names(fixef(m)),method="Wald",level=1-sig.level) | |
| lower.lim.check <- ci.fixed[,1]>0 | |
| upper.lim.check <- ci.fixed[,2]<0 | |
| signif <- as.numeric(((sign(betas)==1 & lower.lim.check==TRUE) | (sign(betas)==-1 & upper.lim.check==TRUE))[2]) | |
| ### This estimates the p-value based on Normal approximation --- may not be too correct... | |
| ### See: http://mindingthebrain.blogspot.co.uk/2014/02/three-ways-to-get-parameter-specific-p.html | |
| pval <- 2*pnorm(abs(tval), lower.tail = FALSE) | |
| pvalue <- pval[which.treat] | |
| ###signif <- pvalue < alpha | |
| VC <- as.data.frame(lme4::VarCorr(m)) | |
| rnd.eff.sd <- VC[,which(colnames(VC)=="sdcor")] | |
| # Finds the rows that needs to be reported | |
| to.report <- c(which(is.na(VC[,3]==T), which(VC[,1]=="Residual"))) | |
| rnd.eff.sd <- VC[to.report,which(colnames(VC)=="sdcor")] | |
| names(rnd.eff.sd)[which(is.na(VC[to.report,3])==T)] <- paste(VC[to.report,"grp"],VC[to.report,"var1"]) | |
| names(rnd.eff.sd) <- gsub(" NA","",names(rnd.eff.sd)) | |
| if(family=="gaussian") {method <- "lmer"} else {method <- "glmer"} | |
| } | |
| # If it doesn't then do lm | |
| if (!check.random.effect) { | |
| if (family=="gaussian") { | |
| m <- lm(formula,data=fake.data) | |
| } else { | |
| m <- glm(formula, data=fake.data,family=family) | |
| } | |
| which.treat <- which(names(m$coefficients)==treatment) | |
| theta <- m$coefficients[which.treat] | |
| sd.theta <- summary(m)$coefficients[which.treat,2] | |
| se <- summary(m)$coefficients[,2] | |
| betas <- summary(m)$coefficients[,1] | |
| tval <- summary(m)$coefficients[,3] | |
| df <- summary(m)$df[2] | |
| pval <- summary(m)$coefficients[,4] | |
| pvalue <- pval[which.treat] | |
| signif <- pvalue < sig.level | |
| if(family=="gaussian") {method <- "lm"} else {method <- "glm"} | |
| rnd.eff.sd=NULL | |
| } | |
| list(power=signif,theta=theta,sd.theta=sd.theta, | |
| rnd.eff.sd=rnd.eff.sd,method=method,pvalue=pvalue) | |
| } | |
| } | |
| } | |
| toc <- proc.time(); time2run <- (toc-tic)[3]; names(time2run) <- "Time to run (secs)" | |
| theta=mean(unlist(res[2,]),na.rm=T); names(theta)=NULL | |
| sd.theta <- mean(unlist(res[3,]),na.rm=T); names(sd.theta)=NULL | |
| theta=c(theta,sd.theta); names(theta)=c("Estimate","Standard Error") | |
| pvalue=unlist(res[6,]); names(pvalue)=NULL | |
| method=unlist(res[5,1]); names(method)=NULL | |
| power=mean(unlist(res[1,]),na.rm=T) | |
| ci <- power+c(-qnorm(1-sig.level/2),qnorm(1-sig.level/2))*sqrt(var(pvalue<sig.level,na.rm=T)/n.sims) | |
| ci <- power+c(-qnorm(1-sig.level/2),qnorm(1-sig.level/2))*sqrt(var(unlist(res[1,]),na.rm=T)/n.sims) | |
| if(ci[1]<0) {ci[1] <- 0}; if(ci[2]>1) {ci[2] <- 1} # Constrains ci in [0;1] | |
| if (!exists("data",where=exArgs)) { | |
| setting <- list(n.clusters=I,n.time.points=J,avg.cluster.size=K, | |
| design=design,formula=formula,method=method,family=family) | |
| } else {setting=list(formula=formula,method=method,family=family)} | |
| closeAllConnections() | |
| # If the option 'plot=T' creates a moving average plot of the power | |
| if (plot==TRUE) { | |
| # Defines some cutoff points in the simulations to plot the moving average | |
| percs <- seq(.1,1,by=.1) | |
| # If the first %tile of the simulations is 0, then sets to 1 | |
| steps <- pmax(round(n.sims*percs),1) | |
| # Computes the power moving average | |
| mov_av <- numeric() | |
| for (s in 1:length(steps)) { | |
| mov_av[s] <- mean(unlist(res[1,c(1:steps[s])])) | |
| } | |
| # plot the moving average of power in a graph | |
| plot(mov_av, type="o", ylim=c(0,1), ylab="Power", xlab="Percentage of Iterations", xaxt = 'n', main="Moving Average of Power") | |
| axis(1, at=1:10, labels = c(paste((1:10)*10, "%", sep=""))) | |
| #### NEED TO CHECK THIS | |
| abline(h=power,col="red",lwd=2) | |
| abline(h=ci[1],col="red",lwd=2,lty=2) | |
| abline(h=ci[2],col="red",lwd=2,lty=2) | |
| } | |
| if (!is.null(unlist(res[4,]))) { | |
| rnd.eff.sd=apply(do.call(cbind.data.frame,res[4,]),1,mean,na.rm=T) | |
| } else { | |
| rnd.eff.sd=NULL | |
| } | |
| list(power=power,time2run=time2run,ci.power=ci,theta=theta, | |
| rnd.eff.sd=rnd.eff.sd,setting=setting) # pvalue=pvalue, | |
| } | |
| # | |
| ## OLD VERSION WHICH DOESN'T WORK ON WINDOWS | |
| # sim.swt <- function (I,J,H=NULL,K,design="cross-sec",mu=0,b.trt,b.time=NULL, | |
| # sigma.y=NULL,sigma.e=NULL,rho=NULL,sigma.a=NULL, | |
| # rho.ind=NULL,sigma.v=NULL,n.sims=1000,formula=NULL, | |
| # alpha=0.05,n.cores=NULL,...) { | |
| # | |
| # ## Power analysis for repeated cohort design (cross-sectional) - continuous outcome | |
| # ## The optional arguments include the specification of a user-defined function data | |
| # ## which is used to simulate the data, according to any design the user has. | |
| # ## So including data=data, where data=function(){...} specifies the simulation | |
| # ## of a suitable dataset allows the user to select *any* kind of model for data | |
| # ## generation. If data has inputs, then these must be specified as a list | |
| # ## inpts, to be passed as an extra argument to sim.sw.cont. | |
| # ## In addition to this, the user should then select a suitable formula | |
| # ## which is in line with the model used for the simulation of the virtual trial. | |
| # ## Also, the name of the "treatment" variable can be specified as a string in the | |
| # ## extra argument treat.name. If not present then it is assumed that the name of the | |
| # ## treatment variable is, unsurprisingly, "treatment" | |
| # | |
| # exArgs <- list(...) | |
| # | |
| # ## Uses parallel computation to speed up task | |
| # # required <- c("lme4","foreach","doParallel","parallel","iterators") | |
| # # for (i in 1:length(required)) { | |
| # # txt1 <- paste0("if(!isTRUE(requireNamespace('",required[i],"',quietly=TRUE))) {") | |
| # # txt2 <- paste0("stop('You need to install the package ",required[i],". Please run in your R terminal: | |
| # # install.packages(",required[i],")')") | |
| # # txt3 <- paste0("}") | |
| # # txt <- paste(txt1,txt2,txt3,sep="\n",collapse=" ") | |
| # # eval(parse(text=txt)) | |
| # # } | |
| # requireNamespace("lme4",quietly=TRUE) | |
| # requireNamespace("foreach",quietly=TRUE) | |
| # requireNamespace("doParallel",quietly=TRUE) | |
| # requireNamespace("parallel",quietly=TRUE) | |
| # requireNamespace("iterators",quietly=TRUE) | |
| # | |
| # # If not specified, uses all cores available to the machine | |
| # if(is.null(n.cores)) {n.cores <- parallel::detectCores()} | |
| # # Usually a good idea to leave 1 core free for the OS to use | |
| # doParallel::registerDoParallel(cores=n.cores-1) | |
| # | |
| # # Defines basic formulation for the model | |
| # if(is.null(formula)){ | |
| # formula=y~treatment+factor(time)+(1|cluster) | |
| # if(design=="cohort") {formula <- update.formula(formula,.~.+(1|id))} | |
| # } | |
| # # Defines the name of the treatment variable (for which the main analysis is performed) | |
| # if(exists("treat.name",where=exArgs)) {treatment <- exArgs$treat.name} else {treatment <- "treatment"} | |
| # | |
| # # Simulates the datasets & analysis | |
| # tic <- proc.time() | |
| # ### NB Needs to use the .export argument to the function foreach, to include | |
| # ### variables that are passed outside of the scope (eg b.trt) --- need to figure | |
| # ### this out a bit better. But basically, linux/mac will make stuff available, | |
| # ### while windows won't and so throw an error because some variable that is input | |
| # ### the function in foreach is not loaded in memory... | |
| # res <- foreach::foreach(iterators::icount(n.sims), .combine=cbind, .packages="lme4", | |
| # .export=c("make.swt","sw.design.mat")) %dopar% { | |
| # if(exists("data",where=exArgs)) { | |
| # func <- exArgs$data | |
| # if(exists("inpts",where=exArgs)) {inpts=exArgs$inpts} else {inpts=list()} | |
| # fake.data <- do.call(what=func,args=inpts) | |
| # } | |
| # if(!exists("data",where=exArgs)) { | |
| # if(exists("X",where=exArgs)) { | |
| # X=exArgs$X | |
| # row.names(X) <- sample(1:I,I) | |
| # colnames(X) <- c("Baseline",paste0("Time ",1:J)) | |
| # } else { | |
| # X=NULL | |
| # } | |
| # fake.data <- make.swt(I=I,J=J,H=H,K=K,design=design,mu=mu,b.trt=b.trt, | |
| # b.time=b.time,sigma.y=sigma.y,sigma.e=sigma.e, | |
| # rho=rho,sigma.a=sigma.a,rho.ind=rho.ind, | |
| # sigma.v=sigma.v,X=X) | |
| # } | |
| # | |
| # # If the formula contains random effects, run lmer | |
| # check.random.effect <- !is.null(findbars(formula)) | |
| # if(check.random.effect==TRUE) { | |
| # m <- lme4::lmer(formula, data=fake.data) | |
| # which.treat <- which(names(fixef(m))==treatment) | |
| # theta <- lme4::fixef(m)[which.treat] | |
| # sd.theta <- summary(m)$coefficients[which.treat,2] | |
| # Vcov <- vcov(m, useScale = FALSE) | |
| # se <- sqrt(diag(Vcov)) | |
| # betas <- lme4::fixef(m) | |
| # tval <- betas / se | |
| # ### NB: issue with p-values in random effect models (cannot determine df easily) | |
| # ### An alternative (as recommended by Bates et al) is to use CIs instead of p-values | |
| # ci.fixed <- confint(m,names(fixef(m)),method="Wald",level=1-alpha) | |
| # lower.lim.check <- ci.fixed[,1]>0 | |
| # upper.lim.check <- ci.fixed[,2]<0 | |
| # signif <- as.numeric(((sign(betas)==1 & lower.lim.check==TRUE) | (sign(betas)==-1 & upper.lim.check==TRUE))[2]) | |
| # ### This estimates the p-value based on Normal approximation --- may not be too correct... | |
| # ### See: http://mindingthebrain.blogspot.co.uk/2014/02/three-ways-to-get-parameter-specific-p.html | |
| # pval <- 2*pnorm(abs(tval), lower.tail = FALSE) | |
| # pvalue <- pval[which.treat] | |
| # ###signif <- pvalue < alpha | |
| # VC <- as.data.frame(lme4::VarCorr(m)) | |
| # rnd.eff.sd <- VC[,which(colnames(VC)=="sdcor")] | |
| # # Finds the rows that needs to be reported | |
| # to.report <- c(which(is.na(VC[,3]==T), which(VC[,1]=="Residual"))) | |
| # rnd.eff.sd <- VC[to.report,which(colnames(VC)=="sdcor")] | |
| # names(rnd.eff.sd)[which(is.na(VC[to.report,3])==T)] <- paste(VC[to.report,"grp"],VC[to.report,"var1"]) | |
| # names(rnd.eff.sd) <- gsub(" NA","",names(rnd.eff.sd)) | |
| # method <- "lmer" | |
| # } | |
| # # If it doesn't then do lm | |
| # if (!check.random.effect) { | |
| # m <- lm(formula,data=fake.data) | |
| # which.treat <- which(names(m$coefficients)==treatment) | |
| # theta <- m$coefficients[which.treat] | |
| # sd.theta <- summary(m)$coefficients[which.treat,2] | |
| # se <- summary(m)$coefficients[,2] | |
| # betas <- summary(m)$coefficients[,1] | |
| # tval <- summary(m)$coefficients[,3] | |
| # df <- summary(m)$df[2] | |
| # ## pval <- 2*pt(abs(tval),df=df,lower.tail = FALSE) | |
| # pval <- summary(m)$coefficients[,4] | |
| # pvalue <- pval[which.treat] | |
| # signif <- pvalue < alpha | |
| # method <- "lm" | |
| # rnd.eff.sd=NULL | |
| # } | |
| # list(power=signif,theta=theta,sd.theta=sd.theta,rnd.eff.sd=rnd.eff.sd,method=method,pvalue=pvalue) | |
| # } | |
| # toc <- proc.time(); time2run <- (toc-tic)[3]; names(time2run) <- "Time to run (secs)" | |
| # theta=mean(unlist(res[2,]),na.rm=T); names(theta)=NULL | |
| # sd.theta <- mean(unlist(res[3,]),na.rm=T); names(sd.theta)=NULL | |
| # theta=c(theta,sd.theta); names(theta)=c("Estimate","Standard Error") | |
| # pvalue=unlist(res[6,]); names(pvalue)=NULL | |
| # power=mean(unlist(res[1,]),na.rm=T) | |
| # ci <- power+c(-qnorm(1-alpha/2),qnorm(1-alpha/2))*sqrt(var(pvalue<alpha,na.rm=T)/n.sims) | |
| # if(ci[1]<0) {ci[1] <- 0}; if(ci[2]>1) {ci[2] <- 1} # Constrains ci in [0;1] | |
| # if (!exists("data",where=exArgs)) { | |
| # setting <- list(n.clusters=I,n.time.points=J,avg.cluster.size=K, | |
| # design=design,formula=formula) | |
| # } else {setting=list(formula=formula)} | |
| # | |
| # list(power=power,time2run=time2run,ci.power=ci,theta=theta, | |
| # rnd.eff.sd=unlist(res[4,1]),setting=setting) # pvalue=pvalue, | |
| # } | |
| HH.binary <- function(p1,OR,I,J,K,rho=0,sig.level=0.05,which.var="within",X=NULL) { | |
| # HH sample size calculations for binary outcome | |
| if(is.null(X)) { | |
| X <- sw.design.mat(I=I,J=J,H=NULL) | |
| } else { | |
| row.names(X) <- sample(1:I,I) | |
| colnames(X) <- c("Baseline",paste0("Time ",1:J)) | |
| } | |
| U <- sum(X) | |
| W <- sum(apply(X,2,sum)^2) | |
| V <- sum(apply(X,1,sum)^2) | |
| # Data | |
| p2 <- OR*(p1/(1-p1))/(1+OR*(p1/(1-p1))) | |
| theta=abs(p1-p2) | |
| if (which.var=="within") { | |
| sigma.e=sqrt((p1*(1-p1)+p2*(1-p2))/2) | |
| ####sigma.e=sqrt((p1*(1-p1))) # Consistent with Hemming - stata | |
| sigma.a <- sqrt(rho*sigma.e^2/(1-rho)) | |
| sigma.y <- sqrt(sigma.e^2+sigma.a^2) | |
| } | |
| if (which.var=="total") { | |
| sigma.y=sqrt((p1*(1-p1)+p2*(1-p2))/2) | |
| ####sigma.y=sqrt((p1*(1-p1))) | |
| sigma.a <- sqrt(sigma.y^2*rho) | |
| sigma.e <- sqrt(sigma.y^2-sigma.a^2) | |
| } | |
| sigma <- sqrt(sigma.e^2/K) | |
| # Power calculations | |
| v <- (I*sigma^2*(sigma^2+((J+1)*sigma.a^2)))/((I*U-W)*sigma^2+(U^2+I*(J+1)*U-(J+1)*W-I*V)*sigma.a^2) | |
| power <- pnorm(theta/sqrt(v)-qnorm(1-sig.level/2)) | |
| setting <- list(n.clusters=I,n.time.points=J,avg.cluster.size=K,design.matrix=X) | |
| list(power=power,p1=p1,p2=p2,sigma.y=sigma.y,sigma.e=sigma.e,sigma.a=sigma.a,setting=setting) | |
| } | |
| HH.normal <- function(mu,b.trt,sigma,I,J,K,rho=0,sig.level=0.05,which.var="within",X=NULL) { | |
| # HH sample size calculations for continuous (normal) outcome | |
| # Stepped wedge design matrix | |
| if(is.null(X)) { | |
| X <- sw.design.mat(I=I,J=J,H=NULL) | |
| } else { | |
| row.names(X) <- sample(1:I,I) | |
| colnames(X) <- c("Baseline",paste0("Time ",1:J)) | |
| } | |
| U <- sum(X) | |
| W <- sum(apply(X,2,sum)^2) | |
| V <- sum(apply(X,1,sum)^2) | |
| # Data | |
| mu1 <- mu+b.trt | |
| theta <- abs(mu1-mu) | |
| # Assumes that the input sigma.y is in fact the within cluster sd | |
| if (which.var=="within") { | |
| sigma.e <- sigma | |
| sigma.a <- sqrt(rho*sigma.e^2/(1-rho)) | |
| sigma.y <- sqrt(sigma.e^2+sigma.a^2) | |
| } | |
| # Assumes that the input sigma.y is total sd | |
| if (which.var=="total") { | |
| sigma.a <- sqrt(sigma^2*rho) | |
| sigma.e <- sqrt(sigma^2-sigma.a^2) | |
| sigma.y <- sigma | |
| } | |
| sigma <- sqrt(sigma.e^2/K) | |
| # Power calculations | |
| v <- (I*sigma^2*(sigma^2+((J+1)*sigma.a^2)))/((I*U-W)*sigma^2+(U^2+I*(J+1)*U-(J+1)*W-I*V)*sigma.a^2) | |
| power <- pnorm(theta/sqrt(v)-qnorm(1-sig.level/2)) | |
| setting <- list(n.clusters=I,n.time.points=J,avg.cluster.size=K,design.matrix=X) | |
| list(power=power,sigma.y=sigma.y,sigma.e=sigma.e,sigma.a=sigma.a,setting=setting) | |
| } | |
| HH.count <- function(lambda1,RR,I,J,K,rho=0,sig.level=0.05,which.var="within",X=NULL) { | |
| # HH sample size calculations for continuous (normal) outcome | |
| # Stepped wedge design matrix | |
| if(is.null(X)) { | |
| X <- sw.design.mat(I=I,J=J,H=NULL) | |
| } else { | |
| row.names(X) <- sample(1:I,I) | |
| colnames(X) <- c("Baseline",paste0("Time ",1:J)) | |
| } | |
| U <- sum(X) | |
| W <- sum(apply(X,2,sum)^2) | |
| V <- sum(apply(X,1,sum)^2) | |
| # Data | |
| lambda2 <- RR*lambda1 | |
| theta <- abs(lambda1-lambda2) | |
| if (which.var=="within") { | |
| sigma.e=(sqrt(lambda1)+sqrt(lambda2))/2 # Consistent with Hemming - stata | |
| sigma.a <- sqrt(rho*sigma.e^2/(1-rho)) | |
| sigma.y <- sqrt(sigma.e^2+sigma.a^2) | |
| } | |
| if (which.var=="total") { | |
| sigma.y <- (sqrt(lambda1)+sqrt(lambda2))/2 # Consisten with Hemming - stata | |
| sigma.a <- sqrt(sigma.y^2*rho) | |
| sigma.e <- sqrt(sigma.y^2-sigma.a^2) | |
| } | |
| sigma <- sqrt(sigma.e^2/K) | |
| # Power calculations | |
| v <- (I*sigma^2*(sigma^2+((J+1)*sigma.a^2)))/((I*U-W)*sigma^2+(U^2+I*(J+1)*U-(J+1)*W-I*V)*sigma.a^2) | |
| power <- pnorm(theta/sqrt(v)-qnorm(1-sig.level/2)) | |
| setting <- list(n.clusters=I,n.time.points=J,avg.cluster.size=K,design.matrix=X) | |
| list(power=power,lambda1=lambda1,lambda2=lambda2,sigma.y=sigma.y,sigma.e=sigma.e,sigma.a=sigma.a,setting=setting) | |
| } | |
| sw.design.mat <- function(I,J,H=NULL) { | |
| ## Creates the design matrix for a stepped wedge design | |
| ## Checks to see that data are consistent with SW design | |
| if(sum(sapply(list(I,J,H), is.null)) != 1) { | |
| warning("exactly one of 'I', 'J' and 'H' must be NULL") | |
| } | |
| if (is.null(I)) { | |
| I <- H*J | |
| } | |
| if (is.null(J)) { | |
| J <- I/H | |
| } | |
| if (is.null(H)) { | |
| H <- I/J | |
| } | |
| # Stepped wedge design matrix | |
| X <- matrix(0,I,(J+1)) | |
| for (i in 2:(J+1)) { | |
| X[1:((i-1)*H),i] <- 1 | |
| } | |
| row.names(X) <- sample(1:I,I) | |
| colnames(X) <- c("Baseline",paste0("Time ",1:J)) | |
| return(X) | |
| } | |
| DE.woert <- function(outcome="cont",input,K,J,B=1,T=1,rho,sig.level=0.05,power=.8) { | |
| # Computes the power for a SWT using the corrected form of the Woertman Design Effect | |
| # outcome = a string (default "cont", possible values are "bin" or "count") | |
| # input = a list containing the arguments | |
| # - continuous outcome: | |
| # 1) delta (treatment effect) | |
| # 2) sd (standard deviation) | |
| # - binary outcome: | |
| # 1) p1 (baseline probability of outcome) | |
| # 2) either p2 (treatment probability of outcome), or OR (treatment effect as OR) | |
| # - count outcome: | |
| # 1) r1 (baseline rate of outcome) | |
| # 2) either r2 (treatment rate of outcome), or RR (treatment effect as RR) | |
| # K = average cluster size | |
| # J = number of time points (excluding baseline) | |
| # B = number of baseline measurement times | |
| # T = number of measurement times during each crossover | |
| # rho = ICC | |
| # alpha = significance level (default = 0.05) | |
| # beta = type 2 error (default = 0.2, implying power = 0.8) | |
| if(outcome=="cont") { | |
| n.rct <- 2*ceiling(power.t.test(delta=input$delta,sd=input$sd, | |
| sig.level=sig.level,power=power)$n) | |
| } | |
| if(outcome=="bin") { | |
| if(!is.null(input$OR) & is.null(input$p2)) { | |
| p1 <- input$p1 | |
| p2 <- input$OR*(input$p1/(1-input$p1))/(1+input$OR*(input$p1/(1-input$p1))) | |
| OR <- input$OR | |
| } | |
| n.rct <- 2*ceiling(power.prop.test(p1=p1,p2=p2,sig.level=sig.level,power=power)$n) | |
| } | |
| if(outcome=="count") { | |
| if(!is.null(input$RR) & is.null(input$r2)) { | |
| r1 <- input$r1 | |
| RR <- input$RR | |
| r2 <- input$r1*input$RR | |
| } | |
| if(is.null(input$RR) & !is.null(input$r2)) { | |
| r1 <- input$r1 | |
| r2 <- input$r2 | |
| RR <- input$r2/input$r1 | |
| } | |
| n.rct <- 2*ceiling((((r1*(1+RR))*(qnorm((1-sig.level/2))+(qnorm(power)))^2)/(r1-r1*RR)^2)) | |
| } | |
| DE.crt <- 1+(K*(J+1)-1)*rho | |
| n.cls <- ceiling((n.rct*DE.crt)/(K*(J+1))) | |
| CF <- (1+rho*(J*T*K+B*K-1))/(1+rho*(.5*J*T*K+B*K-1))*(3*(1-rho))/(2*T*(J-(1/J))) | |
| DE.woert <- (B+J*T)*CF | |
| n.woert <- n.rct*DE.woert | |
| n.cls.swt <- ceiling(n.woert/(K*(J+1))) | |
| list(n.cls.swt=n.cls.swt,n.pts.swt=n.woert,DE.woert=DE.woert,CF=CF,n.rct=n.rct) | |
| } | |
| cluster.search <- function(target.power=NULL, I=NULL, J=NULL ,H=NULL,K,design="cross-sec",mu=0,b.trt,b.time=NULL, | |
| sigma.y=NULL,sigma.e=NULL,rho=NULL,sigma.a=NULL, | |
| rho.ind=NULL,sigma.v=NULL,n.sims=1000,formula=NULL, | |
| family="gaussian",natural.scale=TRUE,sig.level=0.05,n.cores=NULL,...){ | |
| # Find the optimum I or J from a given range | |
| # Enter a range, e.g. I=c(1,10), to optimise on that parameter | |
| #start clock | |
| tic <- proc.time() | |
| # Determine which parameter needs to be optimised | |
| if(length(I)==2 & length(J)==1){print("Optimising on I ...") | |
| I.lower <- I[1] | |
| I.upper <- I[2] | |
| # Check they haven't entered 0 as a possible cluster size | |
| if(I.upper==0 | I.lower==0){stop("Error: I must be greater than 0.")} | |
| power.storage <- matrix(c(rep(x=0, times=(I.upper - I.lower)+1))) | |
| rownames(power.storage) <- c(I.lower:I.upper) | |
| # Initially check upper and lower limit | |
| test.lower <- sim.power(I=I.lower,J=J,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores) | |
| power.lower <- test.lower$power | |
| # Store the lower power limit in the storeage list | |
| power.storage[I.lower - (I.lower-1)] <- power.lower | |
| if(power.lower >= target.power){print("Lower bound of range already above power threshold. Try a smaller lower bound.") | |
| return(I.lower) | |
| break} | |
| test.upper <- sim.power(I=I.upper,J=J,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores) | |
| power.upper <- test.upper$power | |
| # Store the upper power limit in the storeage list | |
| power.storage[I.upper - (I.lower-1)] <- power.upper | |
| if(power.upper <= target.power){print("Range does not include optimum. Try a larger upper bound.") | |
| return(I.upper) | |
| break} | |
| # Loop that successively narrows range by half based on power of the midpoint | |
| while(TRUE){ | |
| range <- c(I.lower:I.upper) # Range of I to be checked | |
| midpoint <- ceiling(median(range)) # Middle value - must be an integer | |
| if(length(range) != 2){ | |
| # If power has already been calculated (i.e. is stored in power.storage), use that | |
| if(power.storage[midpoint - (I[1]-1)]!=0){midpoint.power <- power.storage[midpoint+I[1]] | |
| cat("used store value \n") | |
| cat("midpoint is:", midpoint, "range is:", range, "power is:", midpoint.power, "\n") | |
| cat("powers are", power.storage, "\n")} | |
| # Otherwise calculate the power using sim.power | |
| else if(power.storage[midpoint - (I[1]-1)]==0){ | |
| # If the range has more than 2 elements, use the midpoint of the range to discard one half of the range based on the power | |
| # Calculate power for SWT based on midpoint | |
| midpoint.power <- sim.power(I=midpoint,J=J,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores)$power | |
| power.storage[midpoint - (I[1]-1)] <- midpoint.power | |
| } | |
| # If the power of the midpoint is too low, discard the lower half of the range | |
| if (midpoint.power < target.power) {I.upper <- I.upper | |
| I.lower <- midpoint} | |
| # If the power of the midpoint is high enough, discard the upper half of the range | |
| if (midpoint.power >= target.power) {I.upper <- midpoint | |
| I.lower < I.lower} | |
| } | |
| # If the range has only 2 elements, compare them directly | |
| if(length(range) == 2){ | |
| # Calculate power for SWT based on the smaller value | |
| if(power.storage[I.lower - (I[1]-1)]!=0){lower.power <- power.storage[I.lower - (I[1]-1)]} # Utilise stored value if appropriate | |
| else{lower.power <- sim.power(I=I.lower,J=J,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores)$power | |
| } | |
| # Calculate power for SWT based on the larger value | |
| if(power.storage[I.upper - (I[1]-1)]!=0){upper.power <- power.storage[I.upper - (I[1]-1)]} # Utilise stored value if appropriate | |
| else{upper.power <- sim.power(I=I.upper,J=J,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores)$power | |
| } | |
| #end time counter | |
| toc <- proc.time(); time2run <- (toc-tic)[3]; names(time2run) <- "Time to run (secs)" | |
| # If either has power < target.power, discard | |
| if(lower.power < target.power && upper.power < target.power){ return(0)} | |
| # If upper limit has enough power, but lower limit does not, return the upper limit | |
| if(lower.power < target.power && upper.power >= target.power){ return(list(Optimum_I=I.upper, power=upper.power, time2run=time2run))} | |
| # If both lmits have sufficient power, return the lower limit | |
| if(lower.power >= target.power && upper.power >= target.power){ return(list(Optimum_I=I.lower, power=lower.power, time2run=time2run))} | |
| # Other situations should not be possible | |
| else{return(0) | |
| # Optimum has been found, so break the loop and terminate the function | |
| break} | |
| } | |
| } | |
| } | |
| # Determine which paramteter needs to be optimised | |
| if(length(J)==2 & length(I)==1){print("Optimising on J ...")} | |
| J.lower <- J[1] | |
| J.upper <- J[2] | |
| # Check they haven't entered 0 as a possible number of time points | |
| if(J.upper==0 | J.lower==0){stop("Error: J must be greater than 0.")} | |
| power.storage <- matrix(c(rep(x=0, times=(J.upper - J.lower)+1))) | |
| rownames(power.storage) <- c(J.lower:J.upper) | |
| # Initially check upper and lower limit | |
| test.lower <- sim.power(J=J.lower,I=I,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores) | |
| power.lower <- test.lower$power | |
| # Store the lower power limit in the storeage list | |
| power.storage[J.lower - (J.lower-1)] <- power.lower | |
| if(power.lower >= target.power){print("Lower bound of range already above power threshold. Try a smaller lower bound.") | |
| return(J.lower) | |
| break} | |
| test.upper <- sim.power(J=J.upper,I=I,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores) | |
| power.upper <- test.upper$power | |
| # Store the upper power limit in the storeage list | |
| power.storage[J.upper - (J.lower-1)] <- power.upper | |
| if(power.upper <= target.power){print("Range does not include optimum. Try a larger upper bound.") | |
| return(J.upper) | |
| break} | |
| # Loop that successively narrows range by half based on power of the midpoint | |
| while(TRUE){ | |
| range <- c(J.lower:J.upper) # Range of I to be checked | |
| midpoint <- ceiling(median(range)) # Middle value - must be an integer | |
| if(length(range) != 2){ | |
| # If power has already been calculated (i.e. is stored in power.storage), use that | |
| if(power.storage[midpoint - (J[1]-1)]!=0){midpoint.power <- power.storage[midpoint+J[1]] | |
| cat("used store value \n") | |
| cat("midpoint is:", midpoint, "range is:", range, "power is:", midpoint.power, "\n") | |
| cat("powers are", power.storage, "\n")} | |
| # Otherwise calculate the power using sim.power | |
| else if(power.storage[midpoint - (J[1]-1)]==0){ | |
| # If the range has more than 2 elements, use the midpoint of the range to discard one half of the range based on the power | |
| # Calculate power for SWT based on midpoint | |
| midpoint.power <- sim.power(J=midpoint,I=I,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores)$power | |
| power.storage[midpoint - (J[1]-1)] <- midpoint.power | |
| } | |
| # If the power of the midpoint is too low, discard the lower half of the range | |
| if (midpoint.power < target.power) {J.upper <- J.upper | |
| J.lower <- midpoint} | |
| # If the power of the midpoint is high enough, discard the upper half of the range | |
| if (midpoint.power >= target.power) {J.upper <- midpoint | |
| J.lower < J.lower} | |
| } | |
| # If the range has only 2 elements, compare them directly | |
| if(length(range) == 2){ | |
| # Calculate power for SWT based on the smaller value | |
| if(power.storage[J.lower - (J[1]-1)]!=0){lower.power <- power.storage[J.lower - (J[1]-1)]} # Utilise stored value if appropriate | |
| else{lower.power <- sim.power(J=J.lower,I=I,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores)$power | |
| } | |
| # Calculate power for SWT based on the larger value | |
| if(power.storage[J.upper - (J[1]-1)]!=0){upper.power <- power.storage[J.upper - (J[1]-1)]} # Utilise stored value if appropriate | |
| else{upper.power <- sim.power(J=J.upper,I=I,H=H,K=K,design=design,mu=mu,b.trt=b.trt,b.time=b.time, | |
| sigma.y=sigma.y,sigma.e=sigma.e,rho=rho,sigma.a=sigma.a, | |
| rho.ind=rho.ind,sigma.v=sigma.v,n.sims=n.sims,formula=formula, | |
| family=family,natural.scale=natural.scale,sig.level=sig.level,n.cores=n.cores)$power | |
| } | |
| #end timer | |
| toc <- proc.time(); time2run <- (toc-tic)[3]; names(time2run) <- "Time to run (secs)" | |
| # If either has power < target.power, discard | |
| if(lower.power < target.power && upper.power < target.power){ return(0)} | |
| # If upper limit has enough power, but lower limit does not, return the upper limit | |
| if(lower.power < target.power && upper.power >= target.power){ return(list(Optimum_J=J.upper, power=upper.power, time2run=time2run))} | |
| # If both lmits have sufficient power, return the lower limit | |
| if(lower.power >= target.power && upper.power >= target.power){ return(list(Optimum_J=J.lower, power=lower.power, time2run=time2run))} | |
| # Other situations should not be possible | |
| else{return(0) | |
| # Optimum has been found, so break the loop and terminate the function | |
| break} | |
| } | |
| } | |
| if(length(I) != 2 & length(J) != 2){stop("Error: exactly one of I or J must be a vector of length 2.")} | |
| #stop clock | |
| proc.time() - tic | |
| } |