From ce49d5a257461dace9b9933d449c72bf14cba859 Mon Sep 17 00:00:00 2001 From: Amr ElRafey Date: Fri, 22 Jul 2016 14:31:47 +0000 Subject: [PATCH] version 0.2.0 --- DESCRIPTION | 10 +- MD5 | 40 ++++--- R/Covariate.R | 278 +++++++++++++++++++++++++++++++++++++++++--- man/clean.Rd | 3 +- man/dispcont.Rd | 3 +- man/dispdisc.Rd | 3 +- man/gerom.Rd | 3 +- man/makeit.Rd | 3 +- man/oddscont.Rd | 3 +- man/oddsdisc.Rd | 3 +- man/senscont.Rd | 54 +++++++++ man/sensdisc.Rd | 53 +++++++++ man/stratacont.Rd | 40 +++---- man/stratadisc.Rd | 37 +++--- man/stratifycont.Rd | 3 +- man/stratifydisc.Rd | 3 +- man/summarycont.Rd | 3 +- man/summarydisc.Rd | 3 +- man/un1.Rd | 3 +- man/un3.Rd | 3 +- man/weightcont.Rd | 3 +- man/weightdisc.Rd | 2 +- 22 files changed, 444 insertions(+), 112 deletions(-) create mode 100644 man/senscont.Rd create mode 100644 man/sensdisc.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 10324e7..cd69d89 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,14 +2,14 @@ Package: StratifiedBalancing Type: Package Title: Performs Stratified Covariate Balancing for Data with Discrete and Continuous Outcome Variables -Version: 0.1.0 -Author: Amr ElRafey +Version: 0.2.0 +Author: Farrokh Alemi Phd , Amr ElRafey Maintainer: Amr ElRafey -Description: Stratified covariate balancing through naturally occurring strata to adjust for confounding and interaction effects. Contains 2 primary functions which perform stratification and return adjusted odds along with naturally occurring strata. +Description: Stratified covariate balancing through naturally occurring strata to adjust for confounding and interaction effects. Contains 4 primary functions which perform stratification, sensitivity analysis and return adjusted odds along with naturally occurring strata. License: GPL-2 LazyData: TRUE Depends: plyr NeedsCompilation: no -Packaged: 2016-07-07 22:32:51 UTC; HAP +Packaged: 2016-07-22 00:16:50 UTC; HAP Repository: CRAN -Date/Publication: 2016-07-08 11:48:05 +Date/Publication: 2016-07-22 14:31:47 diff --git a/MD5 b/MD5 index a63dd31..884edac 100644 --- a/MD5 +++ b/MD5 @@ -1,20 +1,22 @@ -ab62446abaac0d16e8186ef857619925 *DESCRIPTION +f5d058e439ae10d62a210519d5947799 *DESCRIPTION 7dc72b6620082056c3fbf1313bc2c8e6 *NAMESPACE -a8fa0967f78d024952d2b6d0c573fd55 *R/Covariate.R -b227f91f1da12a02b617bbe81fcf4feb *man/clean.Rd -a83d1760dd0a77fe486e360811315f16 *man/dispcont.Rd -06ee11aea81713a414ffb88876c5670c *man/dispdisc.Rd -c4c0a145dd5ed79744c0e68bd58e3a5b *man/gerom.Rd -b3847217f272eda54b1db0d57b8b02ba *man/makeit.Rd -dad895121e77b96ea5818a2167b36711 *man/oddscont.Rd -7ddf3d0c600ad805f014059618e75cae *man/oddsdisc.Rd -028c307535a96f62c108b2566e1e2707 *man/stratacont.Rd -21054bb9d3692f96cd1bb1943ae602c9 *man/stratadisc.Rd -ce23c871728dbdd0624b7baaf83dc75f *man/stratifycont.Rd -8400ff495e4fe62c76190d7ea3257a93 *man/stratifydisc.Rd -ee17e043c3da3beea0ad59503034b010 *man/summarycont.Rd -903fa9b54c8a97999be50155b2c0c8f3 *man/summarydisc.Rd -04857c9b962cc8b3613dde27f6ad2fb6 *man/un1.Rd -187aee33d5644a310f6b12dff2e92fb2 *man/un3.Rd -38e2f8c12e2e17488a05ba3bdbafd0de *man/weightcont.Rd -e83c967c2f71ad6a51f19ccac64e9dff *man/weightdisc.Rd +8edf8104f7f663ca90f77cbbb6c22cc0 *R/Covariate.R +b378c7f3506782f6c84a14a427649918 *man/clean.Rd +10eef60e34dad1c4587d7ab167f93065 *man/dispcont.Rd +1fccf96c8fa07e0e2485ff11a32ca943 *man/dispdisc.Rd +258df6a7de1416b1c16cdf1c391619c6 *man/gerom.Rd +04254845cbebaefc10e72868bb4cd8fb *man/makeit.Rd +42b8f59821a04911384356d690b05c66 *man/oddscont.Rd +0d50e86f2d23c45be93fc066f1717681 *man/oddsdisc.Rd +11fcb72b7e9b28fb6b80ee526f7858e2 *man/senscont.Rd +d0bb1a8bf44fe316db3537232b288d8d *man/sensdisc.Rd +9bc1582671da238efa37bdaf82f2a294 *man/stratacont.Rd +cab6eeed599ab130c5ce908a675f57d1 *man/stratadisc.Rd +960bd2d12a1d3aa6f7cbc27c0a483131 *man/stratifycont.Rd +e46a3a1d879f9e4957265a3e725a608e *man/stratifydisc.Rd +0bebb2e7594e08cd9c21762156350ad0 *man/summarycont.Rd +6904a81e69a10c7416b6dfdcef36a8ef *man/summarydisc.Rd +560b3beabc4857a304f85d58c44cf0ea *man/un1.Rd +e78890680baba8905e9c4d89a7bb8f6e *man/un3.Rd +663b9c97d4c798b25766de3b8f57181c *man/weightcont.Rd +b3a576a871bb1225fd807c1ca2207d02 *man/weightdisc.Rd diff --git a/R/Covariate.R b/R/Covariate.R index 7b2b0e3..cac4fc8 100644 --- a/R/Covariate.R +++ b/R/Covariate.R @@ -1,6 +1,7 @@ stratadisc=function(Treatment,Outcome,Matrix){ + if(ncol(Matrix) == 3)stop("Warning: Data matrix must contain at least 3 covariates and one outcome variable.") check=as.matrix(unique(Matrix[,Outcome])) if(nrow(check) > 2 & sum(check[,1]) != 1)stop("Warning: Output variable must be discrete for this function. For continuous outputs, please use stratacont().") a=ncol(Matrix)-1 @@ -9,20 +10,42 @@ stratadisc=function(Treatment,Outcome,Matrix){ if(nrow(h) > 5)message("We advise using input variables with at most 5 categories.") if(nrow(h) > 5)dd=sprintf("Column %s contains more than 5 categories" , i) if(nrow(h) > 5)message(dd) - } - g=stratifydisc(Treatment,Outcome,Matrix) - l=weightdisc(g) + if(nrow(h) == 1)message("One or more variables is constant. This may cause problems in stratification.") + } + g=try(stratifydisc(Treatment,Outcome,Matrix),TRUE) + if(class(g) == "try-error")stop("Stratification could not be completed. Strata are too sparsely populated or insufficient strata found. Please use function sensdisc() to perfrom a sensitivity analysis.") + if(class(g) != "try-error"){ + if( nrow(g) <=1)message("Only one or less strata found in data. Function stratadisc() will only return the strata without calculating odds. We strongly recommend performing a snesitivity analysis by applying function sensdisc() before proceeding.") + if(nrow(g) <= 1){ + return(g) + message("Stratification complete. We advise performing a sensitivity analysis using the sensdisc() function.") + } + if ( nrow(g) > 1) { + a=ncol(Matrix)-2 + mmm=g[,-(1:a)] + t=sum(as.numeric(mmm[,5])) + t2=sum(as.numeric(Matrix[,Treatment])) + t=t/t2 + if(t < 0.1){ + message("Less than 10% percent of cases were matched. Function stratadisc() will only return the strata without calculating odds. We strongly recommend performing a snesitivity analysis by applying function sensdisc() before proceeding.") + return(g) + } + if ( t >= 0.1){ + l=weightdisc(g) l3=summarydisc(g) dispdisc(g) k=l3 - message("Stratification complete.") + message("Stratification complete. We advise performing a sensitivity analysis using the sensdisc() function.") print(l) return(k) - + } + } + } } stratacont=function(Treatment,Outcome,Matrix){ + if(ncol(Matrix) == 3)stop("Warning: Data matrix must contain at least 3 covariates and one outcome variable.") check=as.matrix(unique(Matrix[,Outcome])) if(nrow(check) <= 2 )stop("Warning: Output variable must be continuous for this function. For discrete outputs, please use stratadisc().") a=ncol(Matrix)-1 @@ -31,15 +54,37 @@ stratacont=function(Treatment,Outcome,Matrix){ if(nrow(h) > 5)message("We advise using input variables with at most 5 categories.") if(nrow(h) > 5)dd=sprintf("Column %s contains more than 5 categories" , i) if(nrow(h) > 5)message(dd) + if(nrow(h) == 1)message("One or more variables is constant. This may cause problems in stratification.") + } - g=stratifycont(Treatment,Outcome,Matrix) + g=try(stratifycont(Treatment,Outcome,Matrix),TRUE) + if(class(g) == "try-error")stop("Stratification could not be completed. Strata are too sparsely populated or insufficient strata found. Please use function senscont() to perfrom a sensitivity analysis.") + else{ + if( nrow(g) <=1)message("Only one or less strata found in data. Function stratacont() will only return the strata without calculating odds. We strongly recommend performing a snesitivity analysis by applying function senscont() before proceeding.") + if(nrow(g) <= 1){ + return(g) + message("Stratification complete. We advise performing a sensitivity analysis using the senscont() function.") + } + if(nrow(g) > 1){ + a=Treatment + 3 + t=sum(as.numeric(g[,a])) + t2=sum(as.numeric(Matrix[,Treatment])) + t=t/t2 + if(t <= 0.1){ + message("Less than 10% percent of cases were matched. Function stratacont() will only return the strata without calculating odds. We strongly recommend performing a snesitivity analysis by applying function senscont() before proceeding.") + return(g) + } + if(t > 0.1){ l=weightcont(g) l3=summarycont(g) dispcont(g) k=l3 - message("Stratification complete.") + message("Stratification complete. We advise performing a sensitivity analysis using the senscont() function.") print(l) return(k) + } + } + } } @@ -132,9 +177,13 @@ stratifydisc=function(Treatment,Outcome,Matrix){ weightdisc = function(mat){ + if(nrow(mat) == 1)message("Only one strata matched in data, odds ratio may be meaningless on this case.") + cc=0 + if(nrow(mat) == 1)cc=1 cola=ncol(mat) colb=ncol(mat)-7 mat=as.matrix(mat[,(colb:cola)]) + if(cc == 1)mat=t(mat) sss=sum(mat[,5]) l=matrix(nrow=nrow(mat),ncol=ncol(mat)) for(i in 1:ncol(l)){ @@ -194,6 +243,7 @@ weightdisc = function(mat){ oddsdisc=function(mat){ + if(nrow(mat) == 1)stop("Only one strata matched in data, cannot display odds before and after startification.") cola=ncol(mat) colb=ncol(mat)-7 @@ -321,18 +371,29 @@ q=c(1:w) w=length(f) q[(1:w)]=f colnames(gg)=q -gg=clean(gg) -return(gg) +tt=try(clean(gg),TRUE) +if(class(tt) == "try-error") { + return(gg) +} +else { + return(tt) +} } weightcont=function(Matrix){ + if(nrow(Matrix) == 1)message("Only one strata matched in data, adjusted regression coefficient may be meaningless in this case.") + cc=0 + if(nrow(Matrix) == 1)cc=1 resd=matrix(nrow=5) a=ncol(Matrix)-5 b=ncol(Matrix) r=Matrix[,(a:b)] + if(cc == 1){ + r=t(as.matrix(r)) + } f=matrix(nrow=nrow(r)) r=cbind(r,f) t=matrix(nrow=nrow(r),ncol=ncol(r)) @@ -348,12 +409,18 @@ weightcont=function(Matrix){ r[,5]=r[,5]*r[,4] res=sum(r[,2])-sum(r[,5]) a=r[,2]-r[,5] - f=t.test(a,mu=0) + f=try(t.test(a,mu=0),TRUE) + if(class(f) == "try-error") { + resd[4,1]=NA + resd[5,1]=NA + } + else { + resd[4,1]=f$conf.int[2]*nrow(Matrix) + resd[5,1]=f$conf.int[1]*nrow(Matrix) + } resd[1,1]=res resd[2,1]=as.numeric(f[3]) resd[3,1]=round(sum(r[,1]),1) - resd[4,1]=f$conf.int[2]*resd[3,1] - resd[5,1]=f$conf.int[1]*resd[3,1] rownames(resd)=c("Average Of Cases - Average Of Controls" , "t-test p-value", "No. Of Cases Matched", "95% C.I Upper Bound", "95% C.I Lower Bound") return(resd) @@ -361,6 +428,7 @@ weightcont=function(Matrix){ oddscont=function(mat){ + if(nrow(mat) == 1)stop("Only one strata matched in data, cannot display odds before and after startification.") cola=ncol(mat) colb=ncol(mat)-5 @@ -515,20 +583,21 @@ makeit=function(d){ z=z-2 h=z+1 t=ddply(d,(1:z),nrow) - if(nrow(t) < 100){ + pop=mean(as.numeric(t[,h])) + if(nrow(t) < 50 ){ a=matrix(nrow=1,ncol=2) a[1,1]=1 a[1,2]=nrow(t) return(a) } else { - m=trunc(nrow(t)/50) + 1 + m=trunc(nrow(t)/20) + 1 a=matrix(nrow=m,ncol=2) a[1,1]=1 - a[1,2]=50 + a[1,2]=20 for(i in 2:nrow(a)){ - a[i,1]=a[i-1,1]+50 - a[i,2]=a[i-1,2]+50 + a[i,1]=a[i-1,1]+20 + a[i,2]=a[i-1,2]+20 } c=nrow(a) a[c,2]=(nrow(t)) @@ -555,13 +624,21 @@ un3=function(a,d){ z=ncol(d) z=z-2 h=z+1 + bb=nrow(d) tt=ddply(d,(1:z),nrow) gg=matrix(nrow=1,ncol=(z+6)) for(i in 1:nrow(a)){ rer=tt[(a[i,1]:a[i,2]),] ly=gerom(d,rer) gg=rbind(gg,ly) + ll=nrow(a) + c=i + if( c < ll){ + rer=tt[(a[i+1,1]:a[ll,2]),] + z=ncol(d)-2 + d=merge(d, rer[,(1:z)], all.y=TRUE) + } } gg=na.omit(gg) return(gg) @@ -574,9 +651,14 @@ un3=function(a,d){ clean=function(g){ + if(nrow(g)==1) { r=ncol(g)-5 r1=ncol(g) - f=as.matrix(g[,(r:r1)]) + colnames(g)[(r:r1)]=c("Cases","Avg.","Sd(NA)","Controls","Avg.","Sd(NA)") + return(g) + } + else{ + f=as.matrix(g[,(r:r1)]) for(i in 1:nrow(f)){ if(f[i,2] == "NaN")g[i,1]=NA if(f[i,5]=="NaN")g[i,5]=NA @@ -584,6 +666,7 @@ clean=function(g){ g=na.omit(g) colnames(g)[(r:r1)]=c("Cases","Avg.","Sd(NA)","Controls","Avg.","Sd(NA)") return(g) + } } @@ -768,4 +851,163 @@ summarycont=function(mat){ } +sensdisc=function(Treatment,Outcome,Matrix){ + if(ncol(Matrix) > 10)message("Sensitivity analysis may require some time, given the large number of covariates.") + if(nrow(Matrix) > 100000)message("Sensitivity analysis may require some time, given the large number of observations.") + + if(ncol(Matrix) == 3)stop("Warning: Data matrix must contain at least 3 covariates and one outcome variable.") + check=as.matrix(unique(Matrix[,Outcome])) + if(nrow(check) > 2 & sum(check[,1]) != 1)stop("Warning: Output variable must be discrete for this function. For continuous outputs, please use stratacont().") + a=ncol(Matrix)-1 + for(i in 1:a){ + h=as.matrix(unique(Matrix[,i])) + if(nrow(h) > 5)message("We advise using input variables with at most 5 categories.") + if(nrow(h) > 5)dd=sprintf("Column %s contains more than 5 categories" , i) + if(nrow(h) > 5)message(dd) + if(nrow(h) == 1)message("One or more variables is constant. This may cause problems in stratification.") + + } + Matrix=as.data.frame(Matrix) + l=as.data.frame(cbind(Matrix[,Treatment],Matrix[,Outcome])) + r=c(colnames(Matrix)) + p=c(r[Treatment],r[Outcome]) + colnames(l)=p + if(Treatment < Outcome){ + Matrix=Matrix[,-Outcome] + Matrix=Matrix[,-Treatment] + } + if(Outcome < Treatment){ + Matrix=Matrix[,-Treatment] + Matrix=Matrix[,-Outcome] + } + Matrix=cbind(Matrix,l) + for (i in 1:ncol(Matrix)){ + if(colnames(Matrix)[1] == "V1")colnames(Matrix)[1]="Var1" + } + c=ncol(Matrix)-4 + if(c == 0)stop("Data matrix only contains 3 covariates. For sensitivity analysis please use a matrix with at least 4 covariates.") + if(c != 0){ + res2=matrix(nrow=1,ncol=3) + a=ncol(Matrix) + b=a-1 + g=try(stratifydisc(b,a,Matrix),TRUE) + if(class(g) == "try-error")res2[1,2]=0 + + res2[1,1]="None" + ff=b+4 + l=try(weightdisc(g),TRUE) + if ( class(l) == "try-error" )res2[1,3]=0 + if ( class(l) != "try-error" )res2[1,3]=log(l[1,1]) + + + if(class(g) != "try-error")res2[1,2]=sum(as.numeric(g[,ff])) + + message("Sensitivity analysis in progress.") + + res=matrix(nrow=c,ncol=3) + for(i in 1:c){ + nam=colnames(Matrix)[1] + Matrix=Matrix[,-1] + a=ncol(Matrix) + b=a-1 + ff=b+4 + g=try(stratifydisc(b,a,Matrix),TRUE) + if(class(g) == "try-error")bb=0 + if(class(g) != "try-error")bb=sum(as.numeric(g[,ff])) + l=try(weightdisc(g),TRUE) + if(class(l) == "try-error")res[i,3]=0 + + + res[i,2]=bb + if(class(l) != "try-error")res[i,3]=log(l[1,1]) + res[i,1]=nam + message("Another variable has been dropped, sensitivity analysis in progress.") + } + res=rbind(res2,res) + colnames(res)=c("Variable Dropped" , "Number of Cases Matched" , "Adjusted Odds") + message("Sensitivity analysis complete.") + return(res)} +} + + + + + +senscont=function(Treatment,Outcome,Matrix){ + if(ncol(Matrix) > 10)message("Sensitivity analysis may require some time, given the large number of covariates.") + if(nrow(Matrix) > 100000)message("Sensitivity analysis may require some time, given the large number of observations.") + + if(ncol(Matrix) == 3)stop("Warning: Data matrix must contain at least 3 covariates and one outcome variable.") + check=as.matrix(unique(Matrix[,Outcome])) + if(nrow(check) <= 2 )stop("Warning: Output variable must be continuous for this function. For discrete outputs, please use stratadisc().") + a=ncol(Matrix)-1 + for(i in 1:a){ + h=as.matrix(unique(Matrix[,i])) + if(nrow(h) > 5)message("We advise using input variables with at most 5 categories.") + if(nrow(h) > 5)dd=sprintf("Column %s contains more than 5 categories" , i) + if(nrow(h) > 5)message(dd) + if(nrow(h) == 1)message("One or more variables is constant. This may cause problems in stratification.") + + } + Matrix=as.data.frame(Matrix) + l=as.data.frame(cbind(Matrix[,Treatment],Matrix[,Outcome])) + r=c(colnames(Matrix)) + p=c(r[Treatment],r[Outcome]) + colnames(l)=p + if(Treatment < Outcome){ + Matrix=Matrix[,-Outcome] + Matrix=Matrix[,-Treatment] + } + if(Outcome < Treatment){ + Matrix=Matrix[,-Treatment] + Matrix=Matrix[,-Outcome] + } + Matrix=cbind(Matrix,l) + for (i in 1:ncol(Matrix)){ + if(colnames(Matrix)[1] == "V1")colnames(Matrix)[1]="Var1" + } + c=ncol(Matrix)-4 + if(c == 0)stop("Data matrix only contains 3 covariates. For sensitivity analysis please use a matrix with at least 4 covariates.") + if(c != 0){ + res2=matrix(nrow=1,ncol=3) + a=ncol(Matrix) + b=a-1 + ff=b + g=try(stratifycont(b,a,Matrix),TRUE) + if ( class(g) == "try-error" )res2[1,2]=0 + if ( class(g) != "try-error" )res2[1,2]=sum(as.numeric(g[,ff])) + res2[1,1]="None" + + l=try(weightcont(g),TRUE) + if ( class(l) == "try-error" )res2[1,3]=0 + if ( class(l) != "try-error" )res2[1,3]=(l[1,1]) + + message("Sensitivity analysis in progress.") + + res=matrix(nrow=c,ncol=3) + for(i in 1:c){ + nam=colnames(Matrix)[1] + Matrix=Matrix[,-1] + a=ncol(Matrix) + b=a-1 + ff=b + g=try(stratifycont(b,a,Matrix),TRUE) + if(class(g) == "try-error")bb=0 + if(class(g) != "try-error")bb=sum(as.numeric(g[,ff])) + l=try(weightcont(g),TRUE) + if ( class(l) == "try-error")res[i,3]=0 + if ( class(l) != "try-error")res[i,3]=l[1,1] + + res[i,2]=bb + + res[i,1]=nam + message("Another variable has been dropped, sensitivity analysis in progress.") + } + res=rbind(res2,res) + colnames(res)=c("Variable Dropped" , "Number of Cases Matched" , "Avg.Cases - Avg. Controls") + message("Sensitivity analysis complete.") + return(res)} +} + + diff --git a/man/clean.Rd b/man/clean.Rd index fb9169f..8f1994e 100644 --- a/man/clean.Rd +++ b/man/clean.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ clean(g) } diff --git a/man/dispcont.Rd b/man/dispcont.Rd index 74701a3..959d511 100644 --- a/man/dispcont.Rd +++ b/man/dispcont.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ dispcont(mat) } diff --git a/man/dispdisc.Rd b/man/dispdisc.Rd index 8ec255a..de805b6 100644 --- a/man/dispdisc.Rd +++ b/man/dispdisc.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ dispdisc(mat) } diff --git a/man/gerom.Rd b/man/gerom.Rd index 9887378..7fa2ac6 100644 --- a/man/gerom.Rd +++ b/man/gerom.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ gerom(d,rer) } diff --git a/man/makeit.Rd b/man/makeit.Rd index 7758de7..146f9cd 100644 --- a/man/makeit.Rd +++ b/man/makeit.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ makeit(d) } diff --git a/man/oddscont.Rd b/man/oddscont.Rd index f8fbd06..3d5430f 100644 --- a/man/oddscont.Rd +++ b/man/oddscont.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ oddscont(mat) } diff --git a/man/oddsdisc.Rd b/man/oddsdisc.Rd index dfb2f5b..7861f23 100644 --- a/man/oddsdisc.Rd +++ b/man/oddsdisc.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ oddsdisc(mat) } diff --git a/man/senscont.Rd b/man/senscont.Rd new file mode 100644 index 0000000..9564c15 --- /dev/null +++ b/man/senscont.Rd @@ -0,0 +1,54 @@ +\name{senscont} +\alias{senscont} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +senscont() + +} +\description{ +%% ~~ A concise (1-5 lines) description of what the function does. ~~ +This function performs sensitivity analysis on continuous variable outcomes +by dropping varaibles one at a time, in-order to determine which variables can be removed without altering the results of stratified covariate balancing. +} +\usage{ +senscont(Treatment,Outcome,Matrix) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{Treatment}{ +%% ~~Describe \code{x} here~~ +Column number of variable to be used as treatment. +} +\item{Outcome}{ +%% ~~Describe \code{x} here~~ +Column number of variable to be used as outcome. +} +\item{Matrix}{ +%% ~~Describe \code{x} here~~ +Name of matrix or data.frame where data is stored. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +This function performs sensitivity analysis by dropping varaibles one at a time, in-order to determine which variables can be removed without altering the results of stratified covariate balancing. +} +\examples{ +## In this example we will generate a matrix with a large number of +## covariates and a small number of observations. No model will be +## built into the data, our goal here is to demonstrate how sensitivty +## analysis would be performed. + +## Firstly a matrix with 10 columns and 1000 observations will be created +m=matrix(nrow=1000,ncol=10) +for(i in 1:ncol(m)){ +m[,i]=rbinom(1000,1,0.5) +} + +## We will populate the 10th column randomly from the normal distribtuion +m[,10]=rnorm(1000,0,1) +## Next we will run function sensdisc() on the data. +g=senscont(9,10,m) + +} +\keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/man/sensdisc.Rd b/man/sensdisc.Rd new file mode 100644 index 0000000..03dc6dc --- /dev/null +++ b/man/sensdisc.Rd @@ -0,0 +1,53 @@ +\name{sensdisc} +\alias{sensdisc} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +sensdisc() + +} +\description{ +%% ~~ A concise (1-5 lines) description of what the function does. ~~ +This function performs sensitivity analysis on discrete outcome variables +by dropping varaibles one at a time, in-order to determine which variables can be removed without altering the results of stratified covariate balancing. +} +\usage{ +sensdisc(Treatment,Outcome,Matrix) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{Treatment}{ +%% ~~Describe \code{x} here~~ +Column number of variable to be used as treatment. +} +\item{Outcome}{ +%% ~~Describe \code{x} here~~ +Column number of variable to be used as outcome. +} +\item{Matrix}{ +%% ~~Describe \code{x} here~~ +Name of matrix or data.frame where data is stored. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +This function performs sensitivity analysis by dropping varaibles one at a time, in-order to determine which variables can be removed without altering the results of stratified covariate balancing. +} +\examples{ +## In this example we will generate a matrix with a large number of +## covariates and a small number of observations. No model will be +## built into the data, our goal here is to demonstrate how sensitivty +## analysis would be performed. + +## Firstly a matrix with 10 columns and 1000 observations will be created +m=matrix(nrow=1000,ncol=10) +for(i in 1:ncol(m)){ +m[,i]=rbinom(1000,1,0.5) +} + +## Next we will run function sensdisc() on the data. +g=sensdisc(9,10,m) + +} + +\keyword{ ~kwd1 }% use one of RShowDoc("KEYWORDS") +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/man/stratacont.Rd b/man/stratacont.Rd index a0ddd15..0015cd7 100644 --- a/man/stratacont.Rd +++ b/man/stratacont.Rd @@ -33,38 +33,38 @@ This is the primary stratification function for continuous outcome variables.It } \examples{ -## We will first begin by simulating data in 11 covariates and a continuous outcome -## with significant interaction terms and correlations amongst covariates (to simulate a -## non-randomized experiment with a strongly non-linear underlying model). +## We will first begin by simulating data in 5 covariates and a continuous outcome +## with significant interaction terms and correlations amongst covariates (to simulate an +## experiment with a strongly non-linear underlying model). ## First, we will create a matrix with the input variables. The inout variables will all be ## categorical variables. -m=matrix(nrow=1000,ncol=12) -for ( i in 1:10){ -m[,i]=round(runif(1000,min=1,max=3)) +m=matrix(nrow=5000,ncol=6) +for ( i in 1:ncol(m)){ +m[,i]=rbinom(5000,1,0.5) } -m[,11]=rbinom(1000,1,0.5) + ## Next, we will simulate the output variable and include interaction terms for(i in 1:nrow(m)){ -a=(2*m[i,11] + 0.5*m[i,1] - 4*m[i,2] + 2.3*m[i,3] + -0.8*m[i,4] -0.7*m[i,5] - 4*m[i,6] + 3.6*m[i,7] + -1.2*m[i,8] - 11*m[i,9] - 2.1*m[i,10] + 2.3*m[i,3]*m[i,4] --3.5*m[i,5]*m[i,6]*m[i,7] + 8*m[i,1]*m[i,2]*m[i,9] -+ 2.1*m[i,2]*m[i,6]*m[i,8] + 5*m[i,4]*m[i,7]*m[i,9] -+ 8*m[i,3]*m[i,10]*m[i,6] + 11*m[i,7]*m[i,8]*m[i,5] + 8*m[i,3]*m[i,9]*m[i,2]) -m[i,12]=rnorm(1,a,1) +a=(2*m[i,5] + 0.5*m[i,1] + 4*m[i,2] + 2.3*m[i,3] + 5*m[i,4] + +2.3*m[i,3]*m[i,2] +3.5*m[i,1]*m[i,2] + 2.1*m[i,1]*m[i,3] + +5*m[i,1]*m[i,2]*m[i,3] + 6*m[i,1]*m[i,4] +3*m[i,2]*m[i,4] + +2*m[i,3]*m[i,4] + 3.4*m[i,1]*m[i,2]*m[i,3]*m[i,4] + +5*m[i,1]*m[i,2]*m[i,4] + 4*m[i,2]*m[i,3]*m[i,4]) +m[i,6]=rnorm(1,a,1) } -## We are interested in determining the coefficient of covariate 11 which is 2. +## We are interested in determining the coefficient of covariate 5 which is 2. ## Tmost straightforward ## way of doing this is to use simple linear regression as follows m=as.data.frame(m) -k=lm(m[,12]~.,data=m[,(1:11)]) -## The value of the coefficient of variable 11 found by the regression can be retrieved using -k$coeff[12] +k=lm(m[,6]~.,data=m[,(1:5)]) +## The value of the coefficient of variable 5 found by the regression can be retrieved using +k$coeff[6] ## We can now use the stratacont() function to find a more accurate estimation of the coefficient -g=stratacont(11,12,m) - +g=stratacont(5,6,m) +## Note that as the model includes more covariates, the accuracy of the stratification +## techniques is far superior. } % Add one or more standard keywords, see file 'KEYWORDS' in the diff --git a/man/stratadisc.Rd b/man/stratadisc.Rd index e81f5fd..fb63478 100644 --- a/man/stratadisc.Rd +++ b/man/stratadisc.Rd @@ -34,46 +34,41 @@ This is the primary stratification function for discrete outcome variables.It lo } \examples{ -## We will first begin by simulating data in 11 covariates and a discrete outcome, with +## We will first begin by simulating data in 4 covariates and a discrete outcome, with ## significant interaction terms and correlations amongst covariates (to simulate a ## non-randomized experiment with a strongly non-linear underlying model). ## First, we will create a matrix with the input variables -m=matrix(nrow=1000,ncol=12) +m=matrix(nrow=1000,ncol=5) for ( i in 1:ncol(m)){ m[,i]=rbinom(1000,1,0.5) } -## Next, we will create correlations amongst covariates 2,4 and 11 +## Next, we will create correlations amongst covariates 2,1 and 3 for( i in 1:nrow(m)){ -if(m[i,11] == 1)m[i,2]=rbinom(1,1,0.8) -if(m[i,11] == 0)m[i,2]=rbinom(1,1,0.2) -if(m[i,11] == 1)m[i,4]=rbinom(1,1,0.8) -if(m[i,11] == 0)m[i,4]=rbinom(1,1,0.2) +if(m[i,3] == 1)m[i,2]=rbinom(1,1,0.8) +if(m[i,3] == 0)m[i,2]=rbinom(1,1,0.2) +if(m[i,3] == 1)m[i,1]=rbinom(1,1,0.8) +if(m[i,3] == 0)m[i,1]=rbinom(1,1,0.2) } ## Next, we will simulate the output variable and include interaction terms for(i in 1:nrow(m)){ -a=exp(2*m[i,11] + 0.5*m[i,1] - 4*m[i,2] + 2.3*m[i,3] + -0.8*m[i,4] -0.7*m[i,5] - 4*m[i,6] + 3.6*m[i,7] + -1.2*m[i,8] - 11*m[i,9] - 2.1*m[i,10] + 2.3*m[i,3]*m[i,4] --3.5*m[i,5]*m[i,6]*m[i,7] + 8*m[i,1]*m[i,2]*m[i,9] + -2.1*m[i,2]*m[i,6]*m[i,8]) / (1 + exp(2*m[i,11] + 0.5*m[i,1] -- 4*m[i,2] + 2.3*m[i,3] + 0.8*m[i,4] -0.7*m[i,5] - 4*m[i,6] -+ 3.6*m[i,7] + 1.2*m[i,8] - 11*m[i,9] - 2.1*m[i,10] + -2.3*m[i,3]*m[i,4] -3.5*m[i,5]*m[i,6]*m[i,7] + -8*m[i,1]*m[i,2]*m[i,9] + 2.1*m[i,2]*m[i,6]*m[i,8])) -m[i,12]=rbinom(1,1,a) +a=exp(2*m[i,4] + 0.5*m[i,1] - 4*m[i,2] + 2.3*m[i,3] + 2.3*m[i,3]*m[i,2] ++ 8*m[i,1]*m[i,2] + 2.1*m[i,2]*m[i,3] + 9*m[i,3]*m[i,1])/ (1 +exp(2*m[i,4] ++ 0.5*m[i,1] - 4*m[i,2] + 2.3*m[i,3] + +2.3*m[i,3]*m[i,2] + 8*m[i,1]*m[i,2] + 2.1*m[i,2]*m[i,3] + 9*m[i,3]*m[i,1]) ) +m[i,5]=rbinom(1,1,a) } -## We are interested in determining the coefficient of covariate 11 which is 2. +## We are interested in determining the coefficient of covariate 4 which is 2. ##The most straightforward way of doing this ##is to use logistic regression as follows m=as.data.frame(m) -k=glm(m[,12]~.,data=m[,(1:11)],family=binomial) +k=glm(m[,5]~.,data=m[,(1:4)],family=binomial) ## The value of the coefficient of variable 11 found by the ##logistic regression can be retrieved using -k$coeff[12] +k$coeff[5] ## We can now use the stratadisc() function to find a ##more accurate estimation of the coefficient -g=stratadisc(11,12,m) +g=stratadisc(4,5,m) ## We need to take the log() of the first ##number returned "Odds Ratio of Impact Of Treatment On Outcome" ## and the estimated value of the parameter should be more accurate diff --git a/man/stratifycont.Rd b/man/stratifycont.Rd index c0ea36a..014c933 100644 --- a/man/stratifycont.Rd +++ b/man/stratifycont.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ stratifycont(Treatment, Outcome , Matrix) } diff --git a/man/stratifydisc.Rd b/man/stratifydisc.Rd index b69003e..fcc4d96 100644 --- a/man/stratifydisc.Rd +++ b/man/stratifydisc.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ stratifydisc(Treatment, Outcome , Matrix) } diff --git a/man/summarycont.Rd b/man/summarycont.Rd index c513de8..1834a54 100644 --- a/man/summarycont.Rd +++ b/man/summarycont.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ summarycont(mat) } diff --git a/man/summarydisc.Rd b/man/summarydisc.Rd index 1a5235f..83a33e6 100644 --- a/man/summarydisc.Rd +++ b/man/summarydisc.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ summarydisc(mat) } diff --git a/man/un1.Rd b/man/un1.Rd index 659c20a..f65fbd3 100644 --- a/man/un1.Rd +++ b/man/un1.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ un1(d) } diff --git a/man/un3.Rd b/man/un3.Rd index 51c6632..7c3b17a 100644 --- a/man/un3.Rd +++ b/man/un3.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ un3(a,d) } diff --git a/man/weightcont.Rd b/man/weightcont.Rd index 0609845..24956f5 100644 --- a/man/weightcont.Rd +++ b/man/weightcont.Rd @@ -6,8 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). -} +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont().} \usage{ weightcont(Matrix) } diff --git a/man/weightdisc.Rd b/man/weightdisc.Rd index e062c46..454f264 100644 --- a/man/weightdisc.Rd +++ b/man/weightdisc.Rd @@ -6,7 +6,7 @@ } \description{ %% ~~ A concise (1-5 lines) description of what the function does. ~~ -Function for internal use please refer to functions stratadisc() and stratacont(). +Function for internal use please refer to functions stratadisc(), stratacont() , sensdisc() and senscont(). } \usage{ weightdisc(mat)