diff --git a/inst/rdd-simulation/ddsandwich.R b/inst/rdd-simulation/ddsandwich.R new file mode 100644 index 00000000..59ede3fd --- /dev/null +++ b/inst/rdd-simulation/ddsandwich.R @@ -0,0 +1,228 @@ +##' Extract bread matrix from an lmrob fit +##' +##' This is part of a workaround for an issue in the robustbase code +##' affecting sandwich covariance estimation. +##' The issue in question is issue #6471, robustbase project on R-Forge. +##' +##' @title Bread method for lmrob objects +##' @param x an lmrob object produced using an MM/SM estimator chain +##' @param ... +##' @return k by (k+1) matrix, with first column for scale estimate and rows, remaining cols for coefficients +##' +##' @author lrd author 2 +##' @export +bread.lmrob <- function(x, ...) +{ + stopifnot(is.list(ctrl <- x$control)) + if (!(!is.null(ctrl$method) && nchar(ctrl$method)<=2 && + substr(ctrl$method, nchar(ctrl$method),nchar(ctrl$method))=="M") ) + stop("bread.lmrob() supports only SM or MM estimates") + + psi <- chi <- ctrl$psi + if (is.null(psi)) + stop("parameter psi is not defined") + stopifnot(is.numeric(c.chi <- ctrl$tuning.chi), is.numeric(c.psi <- ctrl$tuning.psi)) + r0 <- x$init$resid + r <- resid(x) + scale <- x$scale + xmat <- model.matrix(x) + bb <- 1/2 + n <- length(r) + stopifnot(n == length(r0), is.matrix(xmat), n == nrow(xmat)) + p <- ncol(xmat) + r.s <- r/scale + r0.s <- r0/scale + w <- robustbase::Mpsi(r.s, cc = c.psi, psi = psi, deriv = 1) + w0 <- robustbase::Mchi(r0.s, cc = c.chi, psi = chi, deriv = 1) + x.wx <- crossprod(xmat, xmat * w) + if (inherits(A <- tryCatch(solve(x.wx) * scale, error = function(e) e), + "error")) { + A <- tryCatch(solve(x.wx, tol = 0) * scale, error = function(e) e) + if (inherits(A, "error")) + { stop("X'WX is singular.") } else warning("X'WX is almost singular.") + } + ## At this point A has no sample size scaling, as in robustbase:::.vcov.avar1 + ## The lack of scaling there precisely compensates for the lack of scaling of the crossproduct + a <- A %*% (crossprod(xmat, w * r.s)/mean(w0 * r0.s)) + colnames(a) <- "sigma" + ## Now we restore sample size scaling to A + A <- n * A + + cbind(a, A) +} +##' +##' +##' Only SM or MM estimates supported +##' +##' @title Estfun method for lmrob objects +##' @param x a fitted lmrob +##' @param ... +##' @return an estfun object, as in the sandwich package +##' @author lrd author 2 +##' @export +estfun.lmrob <- function(x, ...) +{ + stopifnot(is.list(ctrl <- x$control)) + if (!(!is.null(ctrl$method) && nchar(ctrl$method)<=2 && + substr(ctrl$method, nchar(ctrl$method),nchar(ctrl$method))=="M") ) + stop("estfun.lmrob() supports only SM or MM estimates") + + xmat <- model.matrix(x) + xmat <- naresid(x$na.action, xmat) + psi <- chi <- ctrl$psi + if (is.null(psi)) + stop("parameter psi is not defined") + stopifnot(is.numeric(c.chi <- ctrl$tuning.chi), is.numeric(c.psi <- ctrl$tuning.psi)) + r0 <- x$init$resid + r <- resid(x) + scale <- x$scale + + n <- length(r) + stopifnot(n == length(r0), is.matrix(xmat), n == nrow(xmat)) + p <- ncol(xmat) + r0.s <- r0/scale + w0 <- robustbase::Mchi(r0.s, cc = c.chi, psi = chi) + Usigma <- scale(w0, center=TRUE, scale=FALSE) + + r.s <- r/scale + w <- robustbase::Mpsi(r.s, cc = c.psi, psi = psi) + + Ubeta <- w * xmat + rval <- cbind("sigma"=Usigma, Ubeta) + attr(rval, "assign") <- NULL + attr(rval, "contrasts") <- NULL + rval + } + +##' Overloading of sandwich::sandwich to accommodate non-square bread +##' +##' The sandwich package's sandwich function presumes the bread matrix +##' to be symmetric. Obviously this won't do if the bread is rectangular but not +##' square. +##' +##' This is part of a workaround for an issue in the robustbase code +##' affecting sandwich covariance estimation. +##' The issue in question is issue #6471, robustbase project on R-Forge. +##' +##' @title Sandwich estimate of covariance +##' @param x a fitted model object, as in sandwich::sandwich +##' @param bread. function or matrix, +##' @param meat. function or matrix, as in sandwich::sandwich +##' @param ... additional arguments to downstream methods, as in sandwich::sandwich +##' @return matrix, bread %*% meat %*% t(bread) +##' @author lrd author 2 +LRDsandwich <- function (x, bread. = sandwich::bread, meat. = sandwich::meat, ...) +{ + if (is.list(x) && !is.null(x$na.action)) + class(x$na.action) <- "omit" + if (is.function(bread.)) + bread. <- bread.(x) + if (is.function(meat.)) + meat. <- meat.(x, ...) + n <- NROW(sandwich::estfun(x)) + ## the t() in the below is the only difference from sandwich::sandwich() + return(1/n * (bread. %*% meat. %*% t(bread.))) +} + + +.vcov.avar2 <- function(obj, x=obj$x, posdef.meth = c("posdefify","orig")) +{ ## was .vcov.MM + stopifnot(is.list(ctrl <- obj$control)) + ## works only for MM & SM estimates: +### if (!is.null(ctrl$method) && !ctrl$method %in% c('SM', 'MM')) +### I replaced line above w/ 2 lines below for own reasons unrelated to bug fix -BH + if (!is.null(ctrl$method) && !(nchar(ctrl$method)==2 && + substr(ctrl$method, nchar(ctrl$method),nchar(ctrl$method))=="M") ) + stop('.vcov.avar2() supports only SM or MM estimates') + ## set psi and chi constants + psi <- chi <- ctrl$psi + if (is.null(psi)) stop('parameter psi is not defined') + stopifnot(is.numeric(c.chi <- ctrl$tuning.chi), + is.numeric(c.psi <- ctrl$tuning.psi)) + + ## need (r0, r, scale, x, c.psi,c.chi, bb) + r0 <- obj$init$resid + r <- resid(obj) + scale <- obj$scale + if (is.null(x)) x <- model.matrix(obj) + bb <- 1/2 ## this is always 1/2 for S estimates by convention +### --- start code from .vcov.MM --- + ## scaled residuals + n <- length(r) + stopifnot(n == length(r0), is.matrix(x), n == nrow(x)) + p <- ncol(x) + ## Next 2 lines added post-.vcov.MM, addressing #6471. + ## This assumes initial S-estimate solved sum( loss )/(n-p) == bb, + ## not mean( loss ) == bb as assumed in .vcov.avar1 + adj <- (n-p)/n + bb <- bb * adj + r.s <- r / scale # final scaled residuals + r0.s <- r0 / scale # initial scaled residuals + w <- Mpsi(r.s, cc = c.psi, psi = psi, deriv = 1) + w0 <- Mchi(r0.s, cc = c.chi, psi = chi, deriv = 1) + ## FIXME for multivariate y : + x.wx <- crossprod(x, x * w) + if(inherits(A <- tryCatch(solve(x.wx) * scale, + error=function(e)e), "error")) { + warning("X'WX is almost singular. Consider rather using cov = \".vcov.w\"") + A <- tryCatch(solve(x.wx, tol = 0) * scale, error=function(e)e) + if(inherits(A, "error")) + stop("X'WX is singular. Rather use cov = \".vcov.w\"") + } + a <- A %*% (crossprod(x, w * r.s) / mean(w0 * r0.s)) + w <- Mpsi( r.s, cc = c.psi, psi = psi) + + ## 3) now the standard part (w, x, r0.s, n, A,a, c.chi, bb) + w0 <- Mchi(r0.s, cc = c.chi, psi = chi) + Xww <- crossprod(x, w*w0) + u1 <- A %*% crossprod(x, x * w^2) %*% (n * A) + u2 <- a %*% crossprod(Xww, A) + u3 <- A %*% tcrossprod(Xww, a) + u4 <- mean(w0^2 - bb^2) * tcrossprod(a) + + ## list(cov = matrix((u1 - u2 - u3 + u4)/n, p, p), + ## wt = w / r.s, a = a) +### --- end code from .vcov.MM --- + ret <- (u1 - u2 - u3 + u4)/n + + ## this might not be a positive definite matrix + ## check eigenvalues (symmetric: ensure non-complex) + ev <- eigen(ret, symmetric = TRUE) + if (any(neg.ev <- ev$values < 0)) { ## there's a problem + posdef.meth <- match.arg(posdef.meth) + if(ctrl$trace.lev) + message("fixing ", sum(neg.ev), + " negative eigen([",p,"])values") + Q <- ev$vectors + switch(posdef.meth, + "orig" = { + ## remove negative eigenvalue: + ## transform covariance matrix into eigenbasis + levinv <- solve(Q) + cov.eb <- levinv %*% ret %*% Q + ## set vectors corresponding to negative ev to zero + cov.eb[, neg.ev] <- 0 + ## cov.eb[cov.eb < 1e-16] <- 0 + ## and transform back + ret <- Q %*% cov.eb %*% levinv + }, + "posdefify" = { + ## Instead of using require("sfsmisc") and + ## ret <- posdefify(ret, "someEVadd",eigen.m = ev,eps.ev = 0) + lam <- ev$values + lam[neg.ev] <- 0 + o.diag <- diag(ret)# original one - for rescaling + ret <- Q %*% (lam * t(Q)) ## == Q %*% diag(lam) %*% t(Q) + ## rescale to the original diagonal values + ## D <- sqrt(o.diag/diag(ret)) + ## where they are >= 0 : + D <- sqrt(pmax.int(0, o.diag)/diag(ret)) + ret[] <- D * ret * rep(D, each = p) ## == diag(D) %*% m %*% diag(D) + }, + stop("invalid 'posdef.meth': ", posdef.meth)) + } + attr(ret,"weights") <- w / r.s + attr(ret,"eigen") <- ev + ret +}## end{.vcov.avar2} + diff --git a/inst/rdd-simulation/displaySim.r b/inst/rdd-simulation/displaySim.r new file mode 100644 index 00000000..0c565690 --- /dev/null +++ b/inst/rdd-simulation/displaySim.r @@ -0,0 +1,210 @@ + + +########################################################################################### +### Table 4 simulation (polynomial) +########################################################################################### + + +#' @export +## dgms <- function(tp){ + +## ### the DGMs +## curve(0.5*x,from=-1,to=1,ylim=c(-2,2),xlab='r',ylab='$\\EE[Y|R=r]$',main='Linear') +## abline(v=0,lty=2) +## curve(ifelse(abs(x)>0.5, 3*x+sign(x)*(0.5-3)*0.5,0.5*x),from=-1,to=1,ylim=c(-2,2),xlab='r',ylab='$\\EE[Y|R=r]$',main='Anti-Symmetric') +## abline(v=0,lty=2) +## #curve(ifelse(x>0.5,3*x+(0.5-3)*0.5,0.5*x),from=-1,to=1,ylim=c(-2,2),xlab='r',ylab='$\\EE[Y|R=r]$',main='One-Sided') +## curve(mu4,from=-1,to=1,ylim=c(-2,2),xlab='r',ylab='$\\EE[Y|R=r]$',main='One-Sided') +## abline(v=0,lty=2) +## } + +dgms <- function(){ + lin <- function(x) 0.5*x + as <- function(x) ifelse(abs(x)>0.5,3*x+sign(x)*(0.5-3)*0.5,lin(x)) + mu4 <- function(x) sin(3*x) + p <- ggplot(data = data.frame(x = 0), mapping = aes(x = x))+xlim(-1,1)+ + theme(axis.line=element_blank(), + axis.text.x=element_blank(), + axis.text.y=element_blank(), + axis.ticks=element_blank(), + #axis.title.x='$r$', + #axis.title.y='$\\EE[Y|R=r]$', + legend.position="none", + ## panel.background=element_blank(), + ## panel.border=element_blank(), + ## panel.grid.major=element_blank(), + ## panel.grid.minor=element_blank(), + ## plot.background=element_blank() + )+xlab('$r$')+ylab('$\\EE[Y|R=r]$') + + gridExtra::grid.arrange(p+stat_function(fun=lin)+ggtitle('Linear'), + p+stat_function(fun=as)+ggtitle('Anti-Symmetric'), + p+stat_function(fun=mu4)+ggtitle('Sine'),nrow=1) +} + +resTab <- function(run,full=FALSE){ + llr <- run[c(nrow(run)-1,nrow(run)),] + run <- run[-c(nrow(run)-1,nrow(run)),] + + SEs <- run[seq(1,nrow(run)-1,2),] + ests <- run[seq(2,nrow(run),2),] + + runSEflex <- SEs[seq(1,nrow(SEs),2),] + runSEols <- SEs[seq(2,nrow(SEs),2),] + + runEstflex <- ests[seq(1,nrow(ests),2),] + runEstols <- ests[seq(2,nrow(ests),2),] + + + tabFun <- function(runSE,runEst,full){ + trueVar <- apply(runEst,1,var,na.rm=TRUE) + + if(full){ + ciL=runEst-1.96*runSE + ciH=runEst+1.96*runSE + cover=sign(ciL*ciH)<0 + } + + tab <- rbind( + + VarRelBias= + runSE^2|> + sweep(1,trueVar)|> + rowMeans(na.rm=TRUE)|> + (\(x) x/trueVar)(), + RMSE=apply(runEst,1,function(x) sqrt(mean(x^2,na.rm=TRUE)))) + + if(full) tab <- rbind(tab, + bias=rowMeans(runEst,na.rm=TRUE), + sd=sqrt(trueVar), + ciCover=rowMeans(cover,na.rm=TRUE)) + tab + } + list(tabFLEX=tabFun(runSEflex,runEstflex,full=full), + tabOLS=tabFun(runSEols,runEstols,full=full), + tabLLR=tabFun(rbind(llr[1,]),rbind(llr[2,]),full=full)) +} + + +## resTab <- function(run,full=FALSE){ +## run <- run[,apply(run,2,function(x) !any(is.na(x)))] +## tabFun <- function(ests) +## rbind( +## bias=rowMeans(rbind(ests)), +## RMSE=sqrt(rowMeans(rbind(ests^2)))) + +## tabFunP <- function(ps) +## apply(rbind(ps),1,function(x) mean(x<0.05,na.rm=TRUE)) + + +## out <- sapply(c('sh','ik','ll'),function(mm) tabFun(run[grep(paste0(mm,'.est'),rownames(run)),]), +## simplify=FALSE) + +## if(full) out <- sapply(c('sh','ik','ll'), +## function(mm) rbind(out[[mm]],level= tabFunP(run[grep(paste0(mm,'.p'),rownames(run)),])), +## simplify=FALSE) + +## out +## } + +#' @export +prntTab <- function(totalPoly,maxDeg=4,full=TRUE,md=FALSE){ + tab <- NULL + + if(maxDeg>(nrow(totalPoly[[1]])-2)/4){ + maxDeg <- (nrow(totalPoly[[1]])-2)/4 + if(!full) + warning(paste('There don\'t seem to be that many degrees in the simulation.\n Setting maxDeg=',maxDeg)) + } + + ctab <- function(runname,full){ + run <- totalPoly[[runname]] + res <- resTab(run,full=full) + + cbind(res[['tabFLEX']][,1:maxDeg], + res[['tabOLS']][,1:maxDeg], + res[['tabLLR']]) + } + for(dgm in c('lin','antiSym','wass')){ + tab <- rbind(tab,ctab(paste0(dgm,'_t'),full)) + if(full) if(paste0(dgm,'_norm')%in%names(totalPoly)) tab <- rbind(tab,ctab(paste0(dgm,'_norm'),full)) + } + if(md){ + colnames(tab) <- c(paste('LRD, deg=',1:maxDeg),paste('OLS, deg=',1:maxDeg),'Loc.Lin') + rownames(tab) <- paste(rep(c('lin','antiSym','sine'), + each=nrow(tab)/sum(rownames(tab)=='bias')),#,times=2), + #rep(c('t err','norm err'),each=nrow(tab)/2), + rownames(tab)) + } + return(tab) +} + +#' @export +polyLatex <- function(tab,full,caption='',label='tab:poly',braks='ht'){ + if(NCOL(tab)!=9) stop('This only works with polynomial degree=1,...,4') + cat(' + \\begin{table}[ht] +\\centering +\\begin{tabular}{cr|llll|llll|l',ifelse(full,'|llll|llll|l}','}'),' + \\hline \n') + if(full) cat('&&\\multicolumn{9}{c|}{$t_3$ Errors} &\\multicolumn{9}{c|}{$\\mathcal{N}(0,1)$ Errors} \\\\ \n') + + cat('&& \\multicolumn{4}{c|}{Limitless} & \\multicolumn{4}{c|}{OLS} &\\makecell[c]{Local\\\\Linear}', + ifelse(full,'\\multicolumn{4}{c|}{Limitless} & \\multicolumn{4}{c|}{OLS} &\\makecell[c]{Local\\\\Linear}',''),'\\\\ + \\multicolumn{2}{r|}{\\makecell[r]{Polynomial\\\\Degree}}&1&2&3&4&1&2&3&4&',ifelse(full,'&1&2&3&4&1&2&3&4&n/a','n/a'),' \\\\ +') + for(rr in 1:nrow(tab)){ + if(rr==1) cat('\\hline\n\\hline\n\\multirow{',ifelse(full,4,2),'}{*}{',ifelse(full,'\\begin{sideways}Linear\\end{sideways}','Linear'),'}') + if(rr==3) cat('\\hline\n\\hline\n\\multirow{',ifelse(full,4,2),'}{*}{',ifelse(full,'\\begin{sideways}Anti-Sym\\end{sideways}','Anti-Sym'),'}') + if(rr==5) cat('\\hline\n\\hline\n\\multirow{',ifelse(full,4,2),'}{*}{',ifelse(full,'\\begin{sideways}One-Side\\end{sideways}','One-Side'),'}') + cat('&',rownames(tab)[rr],'&') + cat(paste(sprintf("%.1f", round(tab[rr,],1)),collapse='&')) + cat('\\\\ \n') + } + cat(' + \\hline +\\end{tabular} +\\caption{',caption,'} +\\label{',label,'} +\\end{table}\n',sep='') + +} + +#' @export +polyLatex5 <- function(tab,full,caption='',label='tab:poly'){ + if(NCOL(tab)!=11) stop('This only works with polynomial degree=1,...,5') + tab2 <- tab + for(i in 1:nrow(tab)) for(j in 1:ncol(tab)) + tab2[i,j] <- ifelse(tab[i,j]>10, + sprintf("%i",as.integer(round(tab[i,j]))), + sprintf("%.1f",round(tab[i,j],1))) + + cat(' + \\begin{table}[ht] +\\centering +\\begin{tabular}{ll|ccccc|ccccc|c',ifelse(full,'|llll|llll|l}','}'),' + \\hline \n') + if(full) cat('&&\\multicolumn{11}{c|}{$t_3$ Errors} &\\multicolumn{11}{c|}{$\\mathcal{N}(0,1)$ Errors} \\\\ \n') + + cat('&& \\multicolumn{5}{c|}{Limitless} & \\multicolumn{5}{c|}{OLS} &Local', + ifelse(full,'\\multicolumn{5}{c|}{Limitless} & \\multicolumn{5}{c|}{OLS} &Local',''),'\\\\ + && \\multicolumn{5}{c|}{Polynomial Degree}&\\multicolumn{5}{c|}{Polynomial Degree}&Linear', + ifelse(full,'\\multicolumn{5}{c|}{Polynomial Degree}&\\multicolumn{5}{c|}{Polynomial Degree}&Linear',''),'\\\\ + DGM&Measure&1&2&3&4&5&1&2&3&4&5&',ifelse(full,'&1&2&3&4&1&2&3&4&5&',''),' \\\\ +') + for(rr in 1:nrow(tab)){ + if(rr==1) cat('\\hline\n\\hline\n\\multirow{',ifelse(full,4,2),'}{*}{',ifelse(full,'\\begin{sideways}Linear\\end{sideways}','Linear'),'}',sep='') + if(rr==3) cat('\\hline\n\\hline\n\\multirow{',ifelse(full,4,2),'}{*}{',ifelse(full,'\\begin{sideways}Anti-Sym\\end{sideways}','\\makecell[c]{Anti-\\\\Sym}'),'}',sep='') + if(rr==5) cat('\\hline\n\\hline\n\\multirow{',ifelse(full,4,2),'}{*}{',ifelse(full,'\\begin{sideways}One-Side\\end{sideways}','Sine'),'}',sep='') + cat('&',rownames(tab)[rr],'&') + cat(paste(tab2[rr,],collapse='&')) + cat('\\\\ \n') + } + cat(' + \\hline +\\end{tabular} +\\caption{',caption,'} +\\label{',label,'} +\\end{table}\n',sep='') + +} diff --git a/inst/rdd-simulation/polynomialSimulation.Rmd b/inst/rdd-simulation/polynomialSimulation.Rmd new file mode 100644 index 00000000..244e51ac --- /dev/null +++ b/inst/rdd-simulation/polynomialSimulation.Rmd @@ -0,0 +1,110 @@ +--- +title: "Polynomial Simulation like in LRD paper" +author: "Adam C Sales & Ben B Hansen" +date: "`r format(Sys.time(), '%d %B, %Y')`" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE,error=FALSE,message=FALSE,warning=FALSE) +knitr::opts_knit$set(root.dir = '.') +``` + +General dependencies. +```{r} +library('knitr') +library('kableExtra') +#library(flexida) +source('simulationFunctions.r') +source('ddsandwich.R') ### +source('displaySim.r') +``` + +```{r} +devtools::load_all("../..") +``` + +Initialization. Note that `nreps=0` corresponds to no simulations, +just print results from previously saved simulations. +In order to re-run the simulations, the `nreps` +variable should have been set to a positive integer before initiating this script. + +To run the simulations in parallel, using the `parallel` package in +`R`, +register a cluster, called `cl` with the desired number of nodes, with +code similar to the following: +```{r eval=FALSE,echo=TRUE} +library(parallel) +cl <- makeCluster(5) +``` + +```{r runLoadOutSim, warning=FALSE, message=FALSE} +if (!exists('nreps') ) nreps <- 0 +nreps +if (nreps) { +library('robustbase') +library('rdd') +library('RItools') +library('sandwich') +library('nnet') + +clust <- FALSE +if(require('parallel')) if(exists('cl')) if(inherits(cl,"cluster")) clust <- TRUE + +if(clust){ + clusterEvalQ(cl,{ + + library('robustbase') + library('rdd') + library('RItools') + library('sandwich') + library('nnet') + devtools::load_all("../..") + # library(flexida) + source('simulationFunctions.r') + source('ddsandwich.R') ### + source('displaySim.r') + }) +} else cl <- NULL +} +``` +## Run the simulation + +```{r runLoadPolySim, warning=FALSE, message=FALSE} +if (nreps) { + set.seed(201609) + st2 <- system.time(totalPoly <- totalPolySim(nreps,cl)) + save(totalPoly,file=paste0("./totalPolySim",Sys.Date(),".RData")) + cat(paste0(date(), ', nreps=', nreps, '\n'), + paste(c(names(st2),'\n', collapse=T)), + st2, + file='totalPolySim-runtime.txt', append=TRUE) +} else{ + psims <- sort(grep('totalPolySim',list.files('.'),value=TRUE),decreasing=TRUE) + + load(paste0('./',psims[1])) +} +``` + +The following gives the results in Table 4 of the paper, in addition +to the break-down of RMSE into bias and variance, and analogous +results for normally-distributed errors. + +```{r} +tab <- prntTab(totalPoly,5,full=TRUE,md=FALSE) +#rownames(tab) <- rep(c('level','RMSE','bias','sd'),6) +colnames(tab) <- gsub('(flex|ols)\\.se.','deg=',colnames(tab))#c(rep(paste0('deg=',1:4),2),'') +colnames(tab)[ncol(tab)] <- 'n/a' +kable(tab,format='html',caption='Full results for polynomial simulation',digits=2)%>% + kable_styling()%>% column_spec( 6,border_right=TRUE)%>%column_spec(11,border_right=TRUE)%>% + add_header_above(c(" " = 1, "flexida" = 5, "OLS" = 5, "Loc. Lin." = 1))%>% + #group_rows("$t_3$ Error",1,12)%>%group_rows("N(0,1) Error",13,24)%>% + group_rows("linear",1,5)%>%group_rows('antiSym',6,10)%>%group_rows('sine',11,15)#%>% + #group_rows("linear",13,16)%>%group_rows('antiSym',17,20)%>%group_rows('oneSide',21,24) +``` + + +Session information +```{r} +sessionInfo() +``` diff --git a/inst/rdd-simulation/polynomialSimulation.md b/inst/rdd-simulation/polynomialSimulation.md new file mode 100644 index 00000000..2936b9e9 --- /dev/null +++ b/inst/rdd-simulation/polynomialSimulation.md @@ -0,0 +1,412 @@ +--- +title: "Polynomial Simulation like in LRD paper" +author: "Adam C Sales & Ben B Hansen" +date: "05 August, 2022" +output: html_document +--- + + + +General dependencies. + +```r +library('knitr') +library('kableExtra') +#library(flexida) +source('simulationFunctions.r') +source('ddsandwich.R') ### +source('displaySim.r') +``` + + +```r +devtools::load_all("../..") +``` + +Initialization. Note that `nreps=0` corresponds to no simulations, +just print results from previously saved simulations. +In order to re-run the simulations, the `nreps` +variable should have been set to a positive integer before initiating this script. + +To run the simulations in parallel, using the `parallel` package in +`R`, +register a cluster, called `cl` with the desired number of nodes, with +code similar to the following: + +```r +library(parallel) +cl <- makeCluster(5) +``` + + +```r +if (!exists('nreps') ) nreps <- 0 +nreps +``` + +``` +## [1] 0 +``` + +```r +if (nreps) { +library('robustbase') +library('rdd') +library('RItools') +library('sandwich') +library('nnet') + +clust <- FALSE +if(require('parallel')) if(exists('cl')) if(inherits(cl,"cluster")) clust <- TRUE + +if(clust){ + clusterEvalQ(cl,{ + + library('robustbase') + library('rdd') + library('RItools') + library('sandwich') + library('nnet') + devtools::load_all("../..") + # library(flexida) + source('simulationFunctions.r') + source('ddsandwich.R') ### + source('displaySim.r') + }) +} else cl <- NULL +} +``` +## Run the simulation + + +```r +if (nreps) { + set.seed(201609) + st2 <- system.time(totalPoly <- totalPolySim(nreps,cl)) + save(totalPoly,file=paste0("./totalPolySim",Sys.Date(),".RData")) + cat(paste0(date(), ', nreps=', nreps, '\n'), + paste(c(names(st2),'\n', collapse=T)), + st2, + file='totalPolySim-runtime.txt', append=TRUE) +} else{ + psims <- sort(grep('totalPolySim',list.files('.'),value=TRUE),decreasing=TRUE) + + load(paste0('./',psims[1])) +} +``` + +The following gives the results in Table 4 of the paper, in addition +to the break-down of RMSE into bias and variance, and analogous +results for normally-distributed errors. + + +```r +tab <- prntTab(totalPoly,5,full=TRUE,md=FALSE) +#rownames(tab) <- rep(c('level','RMSE','bias','sd'),6) +colnames(tab) <- gsub('(flex|ols)\\.se.','deg=',colnames(tab))#c(rep(paste0('deg=',1:4),2),'') +colnames(tab)[ncol(tab)] <- 'n/a' +kable(tab,format='html',caption='Full results for polynomial simulation',digits=2)%>% + kable_styling()%>% column_spec( 6,border_right=TRUE)%>%column_spec(11,border_right=TRUE)%>% + add_header_above(c(" " = 1, "flexida" = 5, "OLS" = 5, "Loc. Lin." = 1))%>% + #group_rows("$t_3$ Error",1,12)%>%group_rows("N(0,1) Error",13,24)%>% + group_rows("linear",1,5)%>%group_rows('antiSym',6,10)%>%group_rows('sine',11,15)#%>% +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Full results for polynomial simulation
flexida
OLS
Loc. Lin.
deg=1 deg=2 deg=3 deg=4 deg=5 deg=1 deg=2 deg=3 deg=4 deg=5 n/a
linear
VarRelBias -0.61 -0.61 -0.76 -0.77 -0.83 0.02 0.00 -0.02 -0.01 0.01 0.25
RMSE 0.25 0.25 0.32 0.32 0.38 0.31 0.47 0.64 0.80 0.96 0.49
bias 0.00 0.00 0.00 0.00 -0.01 0.00 -0.01 -0.01 0.00 0.01 -0.01
sd 0.25 0.25 0.32 0.32 0.38 0.31 0.47 0.64 0.80 0.96 0.49
ciCover 0.77 0.76 0.65 0.64 0.57 0.95 0.95 0.95 0.94 0.94 0.70
antiSym
VarRelBias -0.64 -0.64 -0.78 -0.78 -0.84 0.01 -0.03 -0.02 -0.04 -0.06 0.14
RMSE 0.68 0.68 0.33 0.33 0.41 0.70 0.50 0.65 0.81 0.99 0.51
bias -0.63 -0.63 -0.02 -0.02 0.13 -0.62 0.16 0.17 -0.06 -0.06 0.02
sd 0.26 0.26 0.33 0.33 0.38 0.31 0.47 0.63 0.81 0.99 0.51
ciCover 0.10 0.11 0.63 0.63 0.53 0.46 0.93 0.94 0.94 0.94 0.70
sine
VarRelBias -0.63 -0.63 -0.76 -0.76 -0.83 -0.02 0.00 0.03 0.03 0.00 0.08
RMSE 1.19 1.19 0.37 0.37 0.38 1.21 0.48 0.63 0.79 0.97 0.53
bias 1.17 1.16 0.18 0.18 0.02 1.17 -0.10 -0.06 0.03 0.01 0.09
sd 0.26 0.26 0.32 0.32 0.38 0.32 0.47 0.63 0.79 0.97 0.53
ciCover 0.00 0.00 0.58 0.58 0.56 0.05 0.94 0.95 0.95 0.94 0.67
+ +```r + #group_rows("linear",13,16)%>%group_rows('antiSym',17,20)%>%group_rows('oneSide',21,24) +``` + + +Session information + +```r +sessionInfo() +``` + +``` +## R version 4.1.1 (2021-08-10) +## Platform: x86_64-w64-mingw32/x64 (64-bit) +## Running under: Windows 10 x64 (build 19044) +## +## Matrix products: default +## +## locale: +## [1] LC_COLLATE=English_United States.1252 +## [2] LC_CTYPE=English_United States.1252 +## [3] LC_MONETARY=English_United States.1252 +## [4] LC_NUMERIC=C +## [5] LC_TIME=English_United States.1252 +## +## attached base packages: +## [1] parallel stats graphics grDevices utils datasets methods +## [8] base +## +## other attached packages: +## [1] flexida_0.0.0.9000 nnet_7.3-16 RItools_0.1-18 SparseM_1.81 +## [5] rdd_0.57 Formula_1.2-4 AER_1.2-9 survival_3.2-11 +## [9] car_3.0-11 carData_3.0-4 lmtest_0.9-39 zoo_1.8-9 +## [13] sandwich_3.0-1 robustbase_0.93-9 testthat_3.1.0 kableExtra_1.3.4 +## [17] knitr_1.36 +## +## loaded via a namespace (and not attached): +## [1] svd_0.5 httr_1.4.2 pkgload_1.2.3 viridisLite_0.4.0 +## [5] splines_4.1.1 highr_0.9 cellranger_1.1.0 remotes_2.4.1 +## [9] sessioninfo_1.2.1 pillar_1.6.4 lattice_0.20-44 glue_1.4.2 +## [13] digest_0.6.28 rvest_1.0.1 colorspace_2.0-2 Matrix_1.3-4 +## [17] htmltools_0.5.2 pkgconfig_2.0.3 devtools_2.4.2 haven_2.4.3 +## [21] xtable_1.8-4 purrr_0.3.4 scales_1.1.1 webshot_0.5.2 +## [25] processx_3.5.2 svglite_2.0.0 openxlsx_4.2.4 rio_0.5.27 +## [29] tibble_3.1.5 usethis_2.1.5 ellipsis_0.3.2 cachem_1.0.6 +## [33] withr_2.4.2 cli_3.1.0 magrittr_2.0.1 crayon_1.4.2 +## [37] readxl_1.3.1 memoise_2.0.0 evaluate_0.14 ps_1.6.0 +## [41] fs_1.5.0 fansi_0.5.0 forcats_0.5.1 xml2_1.3.2 +## [45] foreign_0.8-81 pkgbuild_1.2.0 tools_4.1.1 data.table_1.14.2 +## [49] prettyunits_1.1.1 hms_1.1.1 lifecycle_1.0.1 stringr_1.4.0 +## [53] munsell_0.5.0 zip_2.2.0 callr_3.7.0 compiler_4.1.1 +## [57] systemfonts_1.0.3 rlang_0.4.11 grid_4.1.1 rstudioapi_0.13 +## [61] rmarkdown_2.11 abind_1.4-5 curl_4.3.2 R6_2.5.1 +## [65] fastmap_1.1.0 utf8_1.2.2 rprojroot_2.0.2 desc_1.4.0 +## [69] stringi_1.7.5 Rcpp_1.0.7 vctrs_0.3.8 DEoptimR_1.0-9 +## [73] xfun_0.26 +``` diff --git a/inst/rdd-simulation/simulationFunctions.r b/inst/rdd-simulation/simulationFunctions.r new file mode 100644 index 00000000..d88d19b0 --- /dev/null +++ b/inst/rdd-simulation/simulationFunctions.r @@ -0,0 +1,141 @@ +tryNA <- function(expr,num=1){ + out <- try(expr,silent=TRUE) + if(inherits(out,'try-error')) return(rep(NA,num)) + out +} + + +############### +### table 4 simulation (polynomial) +############### +mu3 <- function(x){ + ifelse(x<0,1.27*x-.5*7.18*x^2+0.7*20.21*x^3+1.1*21.54*x^4+1.5*7.33*x^5, + .84*x-0.1*3.00*x^2-0.3*7.99*x^3-0.1*9.01*x^4+3.56*x^5) +} + +### from Wasserman "all of nonparametric stats" p.99 (just the sine function really) +mu4 <- function(x) + sin(3*x) + +makeDataShapes <- function(n,shape,tdist=FALSE,tau=0,plt=FALSE){ + curve <- 3 + R <- runif(n,-1,1) + + yc <- .5*R + if(shape=='antiSym') + yc <- ifelse(abs(R)>0.5,3*R+sign(R)*(0.5-3)*0.5,yc) + if(shape=='oneSide') + yc <- yc+ ifelse(R> 0.5,3*R+(0.5-3)*0.5,yc) + if(shape=='sym') + yc <- ifelse(abs(R)> 0.5,sign(R)*3*R+(sign(R)*0.5-3)*0.5,yc) + if(shape=='cct') + yc <- mu3(R) + if(shape=='poly3') + yc <- 1.75*R^3 + if(shape=='wass') + yc <- mu4(R) + + if(plt) plot(R,yc) + + yc <- yc+if(tdist) rt(n,3) else rnorm(n) + + Z <- R>0 + + Y <- yc+Z*tau + + id <- 1:n + + data.frame(R=R,Z=Z,Y=Y,id=id) +} + + + +olsPoly <- function(dat,deg){ + mod <- lm(Y~poly(R,deg,raw=TRUE)*Z,data=dat) + Z.pos <- which(grepl('Z',names(coef(mod))) & !grepl(':',names(coef(mod)))) + setNames(c(se=summary(mod)$coef[Z.pos,'Std. Error'],est=coef(mod)[Z.pos]), + paste0(c('ols.se.','ols.est.'),deg)) +} + +flexPoly <- function(dat,deg){ + des <- rd_design(Z ~ forcing(R) + unitid(id), data=dat) + +### we can use poly(.) once issue #61 is resolved +### covForm <- Y~Z+poly(R,deg,raw=TRUE) +### until then + covForm <- "Y~R+Z" + if(deg>1) + covForm <- paste0(covForm,'+',paste(paste0("I(R^",2:deg,")"),collapse='+')) + covForm <-as.formula(covForm) + + covMod <- lmrob(covForm,data=dat,method='MM') + + res <- lmitt(Y~adopters(),design=des,offset=cov_adj(covMod),data=dat)|> + summary()|> + getElement('coefficients') + + + setNames(c(res[2,'Std. Error'],res[2,'Estimate']), + paste0(c('flex.se.','flex.est.'),deg)) +} + +llPoly <- function(dat){ + mod <- RDestimate(Y~R,data=dat) + setNames(c(mod$p[1],mod$est[1]),c('ll.p','ll.est')) +} + + +polySim <- function(n,degs=1:5,shape='lin',tdist=TRUE,tau=0){ + dat <- makeDataShapes( n=n,shape=shape,tdist=tdist,tau=tau) + + func <- function(deg){ + c(tryNA(flexPoly(dat,deg),2),tryNA(olsPoly(dat,deg),2)) + } + + c(do.call('c',lapply(degs,func)),tryNA(llPoly(dat),2)) +} + + +polyDisp <- function(sim){ + if(nrow(sim)