Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
anaperezgonzalez authored and cran-robot committed Jan 2, 2019
0 parents commit b993145
Show file tree
Hide file tree
Showing 14 changed files with 1,416 additions and 0 deletions.
16 changes: 16 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,16 @@
Package: IPWboxplot
Type: Package
Title: Adapted Boxplot to Missing Observations
Version: 0.1.0
Author: Ana Maria Bianco [aut], Graciela Boente [aut], Ana Perez-Gonzalez [aut]
Maintainer: Ana Perez-Gonzalez <anapg@uvigo.es>
Description: Boxplots adapted to the happenstance of missing observations where drop-out probabilities can be given by the practitioner or modelled using auxiliary covariates. The paper of "Zhang, Z., Chen, Z., Troendle, J. F. and Zhang, J.(2012) <doi:10.1111/j.1541-0420.2011.01712.x>", proposes estimators of marginal quantiles based on the Inverse Probability Weighting method.
Imports: isotone
Suggests: mice, knitr, rmarkdown
License: GPL (>= 2)
NeedsCompilation: no
Encoding: UTF-8
Repository: CRAN
VignetteBuilder: knitr
Packaged: 2019-01-02 10:22:53 UTC; anapg
Date/Publication: 2019-01-02 13:30:10 UTC
13 changes: 13 additions & 0 deletions MD5
@@ -0,0 +1,13 @@
2a50f16a05b483c7e7b93177c6bcf060 *DESCRIPTION
da40518d85ad7191075c56cb91556d5b *NAMESPACE
411584c0fa2f2e8b268661198764711b *R/IPW_ASYM_Boxplot.R
aefc8c2b35aab90d53e7461422efcd06 *R/IPW_boxplot.R
b86c1d252cc3549353fa23b93c4bf0a4 *R/IPW_quantile.R
83a3d493af55fce379fb605bdcad958f *build/vignette.rds
bbad84c1d78db08d7e77b8ce31fd759b *inst/doc/my-vignette.R
e6f5503498bf8994c0df926c19040190 *inst/doc/my-vignette.Rmd
f7e15eee051677129e6f7a9792f0c76c *inst/doc/my-vignette.pdf
44a388c10e4ffa63c6580f0e5215e520 *man/IPW_ASYM_Boxplot.Rd
97840792501b47416339b6adea3d7813 *man/IPW_boxplot.Rd
2367edb2015f89943668b791e250807a *man/IPW_quantile.Rd
e6f5503498bf8994c0df926c19040190 *vignettes/my-vignette.Rmd
5 changes: 5 additions & 0 deletions NAMESPACE
@@ -0,0 +1,5 @@
exportPattern("^[[:alpha:]]+")

importFrom("graphics", "axis", "lines", "plot", "points")
importFrom("stats", "glm")
importFrom(isotone,weighted.fractile)
318 changes: 318 additions & 0 deletions R/IPW_ASYM_Boxplot.R
@@ -0,0 +1,318 @@
library(isotone)

################################################################################################
# FUNCTION USED TO DRAW THE BOXPLOTS ADAPTED TO MISSING VALUES AND SKEWED DISTRIBUTIONS #
################################################################################################
IPW.ASYM.boxplot=function(y,px=NULL,x=NULL,graph=c("IPW","both"),names=c("IPW Asymmetric Boxplot", "NAIVE Asymmetric Boxplot"), size.letter=1.2,
method=c("quartile","octile"), ctea=-4, cteb=3, lim.inf=NULL,lim.sup=NULL,main=" ",xlab = " ", ylab =" ",color="black")
{
############################################################################################################################################
# Arguments
#
# y Required. Numerical vector of values with possible missing values codified NA or NAN with length n.
# px Optional. Numerical vector of probabilities. If not provided a logistic fit is performed using x
# x Optional. The matrix of fully observed variables used to estimate the missing model with dimension nrows=n and ncol=dimension.
# graph Optional. Character string indicating if the plot contains two boxplots ("both") or
# only the boxplot computed with the inversely probability weighted quantiles("IPW").
# The default is "IPW".
# names Optional. Character string to name the boxplots.
# The default is "IPW Asymmetric Boxplot" when graph="IPW" and
# c("IPW Asymmetric Boxplot", "NAIVE Asymmetric Boxplot") when graph= "both"
# size.letter Optional. The font size of names.
# method Optional. Character string indicating if the measure of asymmetry is based on the quartiles ("quartile") or the octiles ("octile").
# The default is "quartile".
# ctea, cteb Optional. Scaling factors multiplied by the asymmetry measure to determine outlier boundary.
# When ctea=cteb=0 the IPW boxplot is obtained. ctea is a negative value while cteb is positive.
# The default ones correspond to the choices in Hubert and Vandervieren (2008) when using the medcouple.
# lim.inf Optional. The lower limit of the plot if supplied by the user.
# lim.sup Optional. The upper limit of the plot if supplied by the user.
# main Optional. Character string to title the plot. The default is "IPW Boxplot".
# xlab Optional. Character string to indicate the label of the horizontal axis.
# ylab Optional. Character string to indicate the label of the vertical axis.
# color Optional. Color for the IPW Boxplot.
############################################################################################################################################

########################################################################
# Value
#
############################################################################################################################################
# Value
#
# px Numerical vector of probabilities.
# IPW.Quartiles Numerical vector of inversely probability weighted quartiles.
# IPW.whisker Numerical vector of lower and upper whisker calculated from IPW quantiles.
# out.IPW Numerical vector of data points detected as atypical by the IPW boxplot.
# SKEW.IPW Skewness measure based on the IPW quartiles (method="quartile") or IPW octiles (method="octile").
# NAIVE.Quartiles Numerical vector of naive quartiles computed from the subset of non-missing values of y. Returned only when graph="both".
# NAIVE.whisker Numerical vector of lower and upper whisker obtained from the Naive quantiles. Returned only when graph="both".
# out.NAIVE Numerical vector of data points detected as atypical by the Naive boxplot. Returned only when graph="both".
# SKEW.NAIVE Skewness measure based on the Naive quartiles (method="quartile").
# or IPW octiles (method="octile"), computed from the subset of non-missing values of y.
# Returned only when graph="both".
########################################################################

#########################################################################
#---Preliminary checks---
#########################################################################

dimension=NCOL(x)

if (is.null(x)=="TRUE" & is.null(px)=="TRUE")
stop("ERROR: It is neccesary to supply the vector of dropout probabilities for each observation or a covariate to estimate it")

if (is.null(px)=="FALSE")
{
if (min(px)<=0)
stop("ERROR: px should take positive values")
if (NROW(y)!=NROW(px))
stop("ERROR: 'y' and 'px' have different lengths")
if (sum(is.na(px))+sum(is.nan(px))>0)
stop(" ERROR: px has missing values")
}


if (is.null(px)=="TRUE")
{
if (NROW(y)!=NROW(x))
stop("ERROR: 'y' and 'x' have different lengths")
if (sum(is.na(x))+sum(is.nan(x))>0)
stop(" ERROR: The covariates matrix has missing observations")
}

if (dimension==1){x=as.vector(x)}

if (sum(is.na(y))+sum(is.nan(y))==length(y))
stop(" ERROR: All values are missing")

GRAPH=graph[1]
COLOR=color[1]
nsamp=length(y)
METHOD=method[1]

delta=rep(1,nsamp)
for (i in 1: nsamp){
if (is.na(y[i])=="TRUE"|is.nan(y[i])=="TRUE")
{
delta[i]<-0
y[i]=NA
}
}


if(is.null(px)=="FALSE"){PROBS="The dropout probability is given"}

if(is.null(px)){
###############################################################
# Estimation of the dropout probability for each observation #
###############################################################
a=glm(delta~x,family="binomial")
px=a$fitted.values
PROBS="LOGISTIC"
}

###############################################################
# Estimation of the IPW QUANTILES AND OUTLIERS DETECTION #
###############################################################

for (i in 1:nsamp) px[i]= replace(px[i],which(px[i]<=10^(-50)),10^(-50))

peso=delta/px
tau= peso/sum(peso)
yp <- y[ tmp <- (!is.na(y))]

mediana.IPW=weighted.fractile(y, tau, p=0.5)
cuantil025.IPW= weighted.fractile(y, tau, 0.25)
cuantil075.IPW= weighted.fractile(y, tau, 0.75)
theta.IPW=c(cuantil025.IPW,mediana.IPW,cuantil075.IPW)
IQR.IPW=theta.IPW[3]-theta.IPW[1]

if(METHOD=="quartile"){
deno=cuantil075.IPW-cuantil025.IPW
if(deno==0)
print("WARNING: Too many ties. The quartiles are equal.")
deno= replace(deno,which(deno<=10^(-50)),10^(-50))
num=(cuantil075.IPW-mediana.IPW)-(mediana.IPW-cuantil025.IPW)
SKEW.IPW=num/deno
}else
{
cuantil0125.IPW= weighted.fractile(y, tau, 0.125)
cuantil0875.IPW= weighted.fractile(y, tau, 0.875)
deno=cuantil0875.IPW-cuantil0125.IPW
if(deno==0)
print("WARNING: Too many ties. The octiles are equal.")
deno= replace(deno,which(deno<=10^(-50)),10^(-50))
num=(cuantil0875.IPW-mediana.IPW)-(mediana.IPW-cuantil0125.IPW)
SKEW.IPW=num/deno
}

cteinf.IPW=ctea*(SKEW.IPW>0) - cteb*(SKEW.IPW<0)
ctesup.IPW= cteb*(SKEW.IPW>0) - ctea*(SKEW.IPW<0)

bigote.IPW=c(theta.IPW[1]-1.5*exp(cteinf.IPW*SKEW.IPW)*IQR.IPW,theta.IPW[3]+1.5*exp(ctesup.IPW*SKEW.IPW)*IQR.IPW)

bigo.IPW=bigote.IPW
bigo.IPW[1]=min(yp[yp>=bigote.IPW[1]])
bigo.IPW[2]=max(yp[yp<=bigote.IPW[2]])

outsup.IPW=yp[yp>bigote.IPW[2]]
outinf.IPW=yp[yp<bigote.IPW[1]]
out.IPW=c(outinf.IPW,outsup.IPW)

###############################################################
# Estimation of the NAIVE QUANTILES AND OUTLIERS DETECTION #
###############################################################

largo=length(yp)
tau.NAIVE=rep(1,length=largo)/largo
mediana.NAIVE=weighted.fractile(yp, tau.NAIVE, p=0.5)
cuantil025.NAIVE= weighted.fractile(yp, tau.NAIVE, 0.25)
cuantil075.NAIVE= weighted.fractile(yp, tau.NAIVE, 0.75)
theta.NAIVE=c(cuantil025.NAIVE,mediana.NAIVE,cuantil075.NAIVE)
IQR.NAIVE=theta.NAIVE[3]-theta.NAIVE[1]

if(METHOD=="quartile"){
deno=cuantil075.NAIVE-cuantil025.NAIVE
if(deno==0)
print("WARNING: Too many ties. The quartiles are equal.")
deno= replace(deno,which(deno<=10^(-50)),10^(-50))
num=(cuantil075.NAIVE-mediana.NAIVE)-(mediana.NAIVE-cuantil025.NAIVE)
SKEW.NAIVE=num/deno
}else
{
cuantil0125.NAIVE= weighted.fractile(yp, tau.NAIVE, 0.125)
cuantil0875.NAIVE= weighted.fractile(yp, tau.NAIVE, 0.875)
deno=cuantil0875.NAIVE-cuantil0125.NAIVE
if(deno==0)
print("WARNING: Too many ties. The octiles are equal.")
deno= replace(deno,which(deno<=10^(-50)),10^(-50))
num=(cuantil0875.NAIVE-mediana.NAIVE)-(mediana.NAIVE-cuantil0125.NAIVE)
SKEW.NAIVE=num/deno
}

cteinf.NAIVE=ctea*(SKEW.NAIVE>0) - cteb*(SKEW.NAIVE<0)
ctesup.NAIVE=cteb*(SKEW.NAIVE>0) - ctea*(SKEW.NAIVE<0)

bigote.NAIVE=c(theta.NAIVE[1]-1.5*exp(cteinf.NAIVE*SKEW.NAIVE)*IQR.NAIVE,theta.NAIVE[3]+1.5*exp(ctesup.NAIVE*SKEW.NAIVE)*IQR.NAIVE)

bigo.NAIVE=bigote.NAIVE
bigo.NAIVE[1]=min(yp[yp>=bigote.NAIVE[1]])
bigo.NAIVE[2]=max(yp[yp<=bigote.NAIVE[2]])

outsup.NAIVE=yp[yp>bigote.NAIVE[2]]
outinf.NAIVE=yp[yp<bigote.NAIVE[1]]
out.NAIVE=c(outinf.NAIVE,outsup.NAIVE)

####################################################################################
#---Now we draw the boxes---
####################################################################################


if(is.null(lim.inf)=="TRUE") lim.inf=min(yp)
if(is.null(lim.sup)=="TRUE") lim.sup=max(yp)
if(min(yp)<lim.inf|lim.sup<max(yp))
print("WARNING: The box is truncated")

if (GRAPH=="both")
{
###################################################################################
# We draw the NAIVE and IPW boxplots in the same graph
###################################################################################

plot(c(-0.1,3.9),c(lim.inf,lim.sup), main = main, xlab = xlab,ylab = ylab, xaxt = "n" ,pch=21,col="white",bg="white")

axis(1, at=c(1,3), labels=names,las=1,cex.axis=size.letter)

rango=0.5
punto=c(1-rango,1+rango)
lines( punto,c(theta.IPW[3],theta.IPW[3]),col=COLOR,lwd=1)
lines( punto,c(theta.IPW[1],theta.IPW[1]),col=COLOR,lwd=1)
lines( punto,c(theta.IPW[2],theta.IPW[2]),col=COLOR,lwd=3)
lines( c(punto[1],punto[1]),c(theta.IPW[1],theta.IPW[3]),col=COLOR,lwd=1)
lines( c(punto[2],punto[2]),c(theta.IPW[1],theta.IPW[3]),col=COLOR,lwd=1)

rango2=0.3
punto2=c(1-rango2,1+rango2)
lines( punto2,c(bigo.IPW[1],bigo.IPW[1]),col=COLOR,lwd=1)
lines(punto2,c(bigo.IPW[2],bigo.IPW[2]),col=COLOR,lwd=1)
lines(c(1,1),c(theta.IPW[3],bigo.IPW[2]),col=COLOR,lwd=1)
lines(c(1,1),c(theta.IPW[1],bigo.IPW[1]),col=COLOR,lwd=1)

eje=rep(1,length=length(outsup.IPW))
points(eje,outsup.IPW,col=COLOR,pch=1)
eje=rep(1,length=length(outinf.IPW))
points(eje,outinf.IPW,col=COLOR,pch=1)

punto.NAIVE=c(3-rango,3+rango)
lines( punto.NAIVE,c(theta.NAIVE[3],theta.NAIVE[3]),col=COLOR,lwd=1)
lines( punto.NAIVE,c(theta.NAIVE[1],theta.NAIVE[1]),col=COLOR,lwd=1)
lines( punto.NAIVE,c(theta.NAIVE[2],theta.NAIVE[2]),col=COLOR,lwd=3)
lines( c(punto.NAIVE[1],punto.NAIVE[1]),c(theta.NAIVE[1],theta.NAIVE[3]),col=COLOR,lwd=1)
lines( c(punto.NAIVE[2],punto.NAIVE[2]),c(theta.NAIVE[1],theta.NAIVE[3]),col=COLOR,lwd=1)

punto.NAIVE2=c(3-rango2,3+rango2)
lines( punto.NAIVE2,c(bigo.NAIVE[1],bigo.NAIVE[1]),col=COLOR,lwd=1)
lines(punto.NAIVE2,c(bigo.NAIVE[2],bigo.NAIVE[2]),col=COLOR,lwd=1)
lines(c(3,3),c(theta.NAIVE[3],bigo.NAIVE[2]),col=COLOR,lwd=1)
lines(c(3,3),c(theta.NAIVE[1],bigo.NAIVE[1]),col=COLOR,lwd=1)

eje=rep(3,length=length(outsup.NAIVE))
points(eje,outsup.NAIVE,col=COLOR,pch=1)
eje=rep(3,length=length(outinf.NAIVE))
points(eje,outinf.NAIVE,col=COLOR,pch=1)

cat("The method used to estimate the dropout probability is ",PROBS,"\n")

cat("IPW Quartiles","\n","25% 50% 75%","\n",round(theta.IPW,4),"\n")
cat("Naive Quartiles","\n","25% 50% 75%","\n",round(theta.NAIVE,4),"\n")

cat("Lower and upper whiskers of the IPW Boxplot","\n","Lower Upper","\n",round(bigo.IPW,4),"\n")
cat("Lower and upper whiskers of the Naive Boxplot","\n","Lower Upper","\n",round(bigo.NAIVE,4),"\n")

cat("Skewness measure computed from the IPW ",METHOD,"\n",round(SKEW.IPW,4),"\n")
cat("Skewness measure computed from the NAIVE ",METHOD,"\n",round(SKEW.NAIVE,4),"\n")

res=list(px=px, IPW.Quartiles=theta.IPW,IPW.whisker=bigo.IPW,out.IPW=out.IPW,SKEW.IPW=SKEW.IPW,NAIVE.Quartiles=theta.NAIVE,NAIVE.whisker=bigo.NAIVE,out.NAIVE=out.NAIVE,SKEW.NAIVE=SKEW.NAIVE)

}else{

###################################################################################
# We draw the IPW boxplot
###################################################################################

plot(c(0,2),c(lim.inf,lim.sup), main = main, xlab = xlab,ylab = ylab, xaxt = "n" ,pch=21,col="white",bg="white")

axis(1, at=c(1), labels=names[1],las=1,cex.axis=size.letter)

rango=0.5
punto=c(1-rango,1+rango)
lines( punto,c(theta.IPW[3],theta.IPW[3]),col=COLOR,lwd=1)
lines( punto,c(theta.IPW[1],theta.IPW[1]),col=COLOR,lwd=1)
lines( punto,c(theta.IPW[2],theta.IPW[2]),col=COLOR,lwd=3)
lines( c(punto[1],punto[1]),c(theta.IPW[1],theta.IPW[3]),col=COLOR,lwd=1)
lines( c(punto[2],punto[2]),c(theta.IPW[1],theta.IPW[3]),col=COLOR,lwd=1)

rango2=0.3
punto2=c(1-rango2,1+rango2)
lines( punto2,c(bigo.IPW[1],bigo.IPW[1]),col=COLOR,lwd=1)
lines(punto2,c(bigo.IPW[2],bigo.IPW[2]),col=COLOR,lwd=1)
lines(c(1,1),c(theta.IPW[3],bigo.IPW[2]),col=COLOR,lwd=1)
lines(c(1,1),c(theta.IPW[1],bigo.IPW[1]),col=COLOR,lwd=1)

eje=rep(1,length=length(outsup.IPW))
points(eje,outsup.IPW,col=COLOR,pch=1)
eje=rep(1,length=length(outinf.IPW))
points(eje,outinf.IPW,col=COLOR,pch=1)

cat("The method used to estimate the dropout probability is ",PROBS,"\n")

cat("IPW Quartiles","\n","25% 50% 75%","\n",round(theta.IPW,4),"\n")

cat("Lower and upper whiskers of the IPW Boxplot","\n","Lower Upper","\n",round(bigo.IPW,4),"\n")

cat("Skewness measure computed from the IPW ",METHOD,"\n",round(SKEW.IPW,4),"\n")

res=list(px=px, IPW.Quartiles=theta.IPW,IPW.whisker=bigo.IPW,out.IPW=out.IPW,SKEW.IPW=SKEW.IPW)
}

return(res)
}

0 comments on commit b993145

Please sign in to comment.