version 0.2.0

cran · Nov 19, 2016 · 5da062d · 5da062d
1 parent cd1efbc
commit 5da062d
Show file tree

Hide file tree

Showing 12 changed files with 198 additions and 72 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,18 +1,18 @@
 Package: uqr
 Type: Package
 Title: Unconditional Quantile Regression
-Version: 0.1.0
-Date: 2016-10-18
+Version: 0.2.0
+Date: 2016-11-19
 Author: Stefano Nembrini <stefanonembrini@gmail.com>
 Maintainer: Stefano Nembrini <stefanonembrini@gmail.com>
-Description: Estimation and Inference for Unconditional Quantile Regression (see Firpo et al. (2009). <DOI:10.3982/ECTA6822>).
+Description: Estimation and Inference for Unconditional Quantile Regression for cross-sectional and panel data (see Firpo et al. (2009). <DOI:10.3982/ECTA6822>).
 License: GPL (>= 2)
 LazyData: TRUE
 Imports: stats, base
 Depends: R(>= 3.3.1), Hmisc(>= 3.17-4), gtools(>= 3.5.0)
 Suggests: knitr
 RoxygenNote: 5.0.1
 NeedsCompilation: no
-Packaged: 2016-10-30 10:38:18 UTC; Nembrini
+Packaged: 2016-11-19 13:59:28 UTC; Nembrini
 Repository: CRAN
-Date/Publication: 2016-10-30 16:58:08
+Date/Publication: 2016-11-19 15:34:43
diff --git a/MD5 b/MD5
@@ -1,11 +1,14 @@
-245fadf2e85be0245b74c7bc84d463bd *DESCRIPTION
-fa29a58a65859ad5953d644deb6b7819 *NAMESPACE
+b908ab14d23964d82d31b0c7df494269 *DESCRIPTION
+6d4df09a93753bb3b2433383048c0108 *NAMESPACE
 05811d56576c5db60c2a1ed898e7788f *R/engel.R
-c36e02ac88a7264bdf481731c8d933eb *R/urq.R
-53d1cc58de7604b3603e3bc6ebecde80 *R/urqCI.R
-0da5e41bb148ee8d51324c37c57a5765 *R/urqb.R
+f8bf6b110c60582ae8a97654666347b8 *R/trust.R
+6844e8bb00b4dd98a4f610ff856a5696 *R/urq.R
+3992635ed9aea52e4ae65595765f8d4e *R/urqCI.R
+936084ec5292cf56238f820833716fcb *R/urqb.R
 2cb41ec2ee5de9777a3577f2e441e17c *data/engel.rda
+f1680069020d86c8eb8ef2b06c7172b9 *data/trust.rda
 c8b6bbd3098f11a999aa85ec1f93a8b3 *man/Engel-Data.Rd
-5abde58b4881e1465c891ff745b57d28 *man/urq.Rd
-42f92ba60d60749b8f8ce2f6698b7e55 *man/urqCI.Rd
-8814ca35adf7e8dd8a6f4fcf5acb2853 *man/urqb.Rd
+e0474c586ab0c997f1611dfa3f26da7a *man/Trust-Data.Rd
+fa216a5f316b8e110f354bd3a44ad2c6 *man/urq.Rd
+2ed594aee10daaa11b89881671d855a0 *man/urqCI.Rd
+150d5e3a8302fbbebacdbc1d1e543f20 *man/urqb.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(urq)
 export(urqCI)
+export(urqb)
 import(Hmisc)
 import(grDevices)
 import(graphics)

diff --git a/R/trust.R b/R/trust.R
@@ -0,0 +1,13 @@
+#' @title Trust Data
+#' @name Trust Data
+#' @description Data on 12 European Countries 
+#' @format A data frame containing 180 observations for 12 countries. Data taken from the Eurobarometer, The Hertie School of Governance. 
+#' \describe{
+#' \item{Trust_in_the_ECB}{Trust in the European Central Bank}
+#' \item{Trust_in_the_EU}{Trust in the European Union}
+#' \item{countryname}{countryname identifier}
+#' \item{year}{year identifier}
+#' }
+#' @docType data
+#' @usage data(trust)
+"trust"
diff --git a/R/urq.R b/R/urq.R
@@ -1,53 +1,100 @@
 #' @title Unconditional Quantile Regression
 #'
 #'@description Returns an object of class \code{urq}. that represents an Unconditional Quantile Regression Fit
-#'@usage urq(formula,data,tau=NULL,kernel=NULL)
+#'@usage urq(formula,data,tau=NULL,kernel=NULL,cre=NULL,id=NULL)
 #'@param formula a formula object, with the response on the left of a ~ operator, and the terms, separated by + operators, on the right.
 #'@param data a \code{dataframe} in which to interpret the variables named in the formula 
 #'@param tau the quantile(s) to be estimated, this must be a number (or a vector of numbers) strictly between 0 and 1.
 #'@param kernel a character string giving the smoothing kernel to be used. This must match one of "gaussian", "rectangular", "triangular", "epanechnikov", "biweight", "cosine" or "optcosine", with default "gaussian".
-#'@details This function returns a Recentered Influence Function regression of given quantiles as proposed by Firpo, S., Fortin, N. M., & Lemieux, T. (2009).
+#'@param cre The CRE formula (right hand side only) is a specification of the variables in the CRE component. These are possibly endogenous variables (in the sense that they are affected by the fixed effects) and must be time-varying. If left empty, a cross-sectional analysis is performed.
+#'@param id defines the structure of the panel.
+#'@details This function returns a Recentered Influence Function regression of given quantiles as proposed by Firpo, S., Fortin, N. M., & Lemieux, T. (2009). Panel data analysis is performed extending the correlated random effects (CRE)  model by Mundlak (1978) and Chamberlain (1984) to an unconditional quantile regression framework. See Abrevaya and Dahl (2008) and by Bache et al (2011) for more details.
 #'@export 
 #'@import
 #'stats
 #'@references Firpo, S., Fortin, N. M., & Lemieux, T. (2009). Unconditional quantile regressions. Econometrica, 77(3), 953-973.
+#'@references Mundlak, Y. 1978. On the pooling of time series and cross section data. Econometrica 46: 69-85.
+#'@references Chamberlain G (1984) Panel Data. In: Griliches Z, Intriligator MD (eds) Handbook of Econometrics, vol 2, Elsevier Science B. V., pp 1247-1318
+#'@references Abrevaya, Jason and Christian M. Dahl. 2008. The effects of birth inputs on birthweight. Jounal of Business and Economic Statistics. 26-4. Pages 379-397.
+#'@references Bache, Stefan Holst; Christian M. Dahl; Johannes Tang Kristensen. 2011. Headlights on tobacco road to low birthweight - Evidence from a battery of quantile regression estimators and a heterogeneous panel.
 #'@keywords NULL
 #'@seealso \code{\link{density},\link[uqr]{urqCI}}
 #'@return NULL
 #'@examples 
+#' ### example for cross-sectional data ###
+#' 
 #'data(engel)
 #'formula = foodexp ~ income
 #' rifreg=urq(formula,data = engel)
-urq <- function(formula,data,tau=NULL,kernel=NULL) {
+#' 
+#' ### example for panel data ###
+#' 
+#' data(trust)
+#'formula=Trust_in_the_ECB~Trust_in_the_EU+Trust_in_National_Government
+#'cre=~Trust_in_the_EU+Trust_in_National_Government
+#'rif=urq(formula,data=trust,cre=cre,id="countryname")
+urq <- function(formula,data,tau=NULL,kernel=NULL,cre=NULL,id=NULL) {
   #call <- match.call()
   #mf <- match.call(expand.dots = FALSE)
 
   if(is.null(kernel)) kernel<-"gaussian"
   if(is.null(tau)) tau<-1:9/10
   indicator<-function(condition) ifelse(condition,1,0)
   c1=NULL
+  idx.dep=which(colnames(data)==all.vars(formula)[1])
+  quantile(x=data[,idx.dep],probs=tau)->q
+  density(data[,idx.dep],kernel=kernel)->f
+  approx(f$x, f$y, q)$y->fq
   for (i in 1:length(tau)){
     #which(is.na(data[,idx.dep]))->miss
     #data2=data[-miss,]
-    idx.dep=which(colnames(data)==all.vars(formula)[1])
+
+
 
 
-    quantile(x=data[,idx.dep],probs=tau)->q
-    density(data[,idx.dep],kernel=kernel)->f
-    approx(f$x, f$y, q)$y->fq
+
     RIF=q[i]+((tau[i]-indicator(data[,idx.dep]<q[i]))/fq[i])
     data2=data
     data2[,idx.dep]=RIF
+
+    if (!is.null(cre)){
+      if(is.null(id)){print("id variable is missing")}
+      vars=all.vars(cre)
+      new=rep(NA,length(vars))
+      old=all.vars(formula)[2:length(all.vars(formula))]
+      dep=all.vars(formula)[1]
+      for(i in 1:length(vars)){
+
+        data2[,dim(data2)[2]+1] <- ave(data[,which(colnames(data2)==vars[i])], group=data2[,which(colnames(data2)==id)])
+        new[i]=colnames(data2)[dim(data2)[2]]<-paste(vars[i],sep="","_M")
+      }
+      formula_cre=paste(dep, "~", paste(c(old,new), collapse=" + "))
+      lm=lm(formula_cre,data2,weights=NULL)
+      c=coef(lm)
+      c1=rbind(c1,c)
+      #print(i)
+      data2[,idx.dep]=data[,idx.dep]
+    }
+
+  else{
     lm=lm(formula,data2,weights=NULL)
-    c=coef(lm)
-    c1=rbind(c1,c)
-    #print(i)
-    data2=NULL
+  c=coef(lm)
+  c1=rbind(c1,c)
+#print(i)
+  data2=NULL}
+
+
   }
   RIF=t(c1)
   colnames(RIF)<-paste("tau=",tau)
+  if (!is.null(cre)){formula=formula_cre}
+  if (!is.null(cre)){
+  data=data2
+    #data=data.frame(data,data2[,dim(data)[2]+1])
+  #colnames(data[,idx.dep])=all.vars(formula)[1]
+  }
 
-  fit=list (coefficients=RIF,tau=tau,formula=formula)
+  fit=list (coefficients=RIF,tau=tau,formula=formula,id=id,data=data)
   fit$call <- sys.call()
   class(fit) <- "urq"
   #return(call)

diff --git a/R/urqCI.R b/R/urqCI.R
@@ -1,9 +1,8 @@
-#' @title Unconditional Quantile Regression
+#' @title Inference for Unconditional Quantile Regression 
 #'
 #'@description Returns a summary list for an Unconditional Quantile Regression Fit.
-#'@usage urqCI(urq,data,R=20,seed=NULL,colour=NULL,confidence=NULL,graph=TRUE,cluster=NULL)
+#'@usage urqCI(urq,R=20,seed=NULL,colour=NULL,confidence=NULL,graph=TRUE,cluster=NULL)
 #'@param urq an object of class \code{urq}.
-#'@param data a \code{dataframe} in which to interpret the variables named in the \code{formula} .
 #'@param R the number of bootstrap replications to be used.
 #'@param seed random number generator.
 #'@param colour colour of plot: default is lightblue.
@@ -12,20 +11,31 @@
 #'@param cluster column name of variable to be used in order to obtain cluster robust standard errors and confidence intervals.
 #'@details This function provides standard errors and confidence intervals for the Recentered Influence Function regression fit \code{urq}. If the cluster option is used, standard errors are cluster robust according to the variable supplied by the user, otherwise observations are assumed to be iid. 
 #'Inference is obtained though a bayesian bootstrap drawing observation (or cluster) weights from a Dirichlet distribution. 
-#'If the option graph is TRUE, then a quantile plot is provided showing estimates and confidence intervals (t approximation [polygon] and percentile bootstrap [dashed lines]).
+#'If the option graph is TRUE, then a quantile plot is provided showing estimates and confidence intervals (t approximation).
 #' @references Rubin, D. B. (1981). The bayesian bootstrap. The annals of statistics, 9(1), 130-134.
 #'@export 
 #'@import
 #'graphics grDevices
 #'@keywords NULL
 #'@seealso \code{\link{urq}}
 #'@return NULL
-#'@examples data(engel)
+#'@examples 
+#'### example for cross-sectional data ###
+#'
+#'data(engel)
 #'formula=foodexp ~ income
 #'rifreg=urq(formula=formula,data=engel)
-#'summary=urqCI(urq = rifreg,data = engel,R = 100,graph = TRUE,seed = 1234)
+#'summary=urqCI(urq = rifreg,R = 100,graph = TRUE,seed = 1234)
+#'
+#'### example for panel data ###
+#'
+#'data(trust)
+#'formula=Trust_in_the_ECB~Trust_in_the_EU+Trust_in_National_Government
+#'cre=~Trust_in_the_EU+Trust_in_National_Government
+#'rif=urq(formula,data=trust,cre=cre,id="countryname")
+#'summary=urqCI(urq = rif,R = 100,graph = TRUE,seed = 1234,cluster="countryname")
 
-urqCI=function(urq,data,R=20,seed=NULL,colour=NULL,confidence=NULL,graph=TRUE,cluster=NULL){
+urqCI=function(urq,R=20,seed=NULL,colour=NULL,confidence=NULL,graph=TRUE,cluster=NULL){
   if (is.null(seed)) {
     seed <- runif(1, 0, .Machine$integer.max)
   }
@@ -34,26 +44,26 @@ urqCI=function(urq,data,R=20,seed=NULL,colour=NULL,confidence=NULL,graph=TRUE,cl
   set.seed(seed)
   boot=matrix(ncol=dim(urq$coefficients)[1]*length(urq$tau),nrow=R)
 
+  #data=urq$data
   cat("Bootstrapping\n")
   #print(R)
   for(i in 1:R){
-    if (is.null(cluster)){data$wts=as.numeric(rdirichlet(1, rep(1, dim(data)[1])))}
+    if (is.null(cluster)){urq$data$wts=as.numeric(rdirichlet(1, rep(1, dim(urq$data)[1])))}
     else{
-      idx.cluster=which(colnames(data)==cluster)
-      clusters=names(table(data[,idx.cluster]))
-      data=data[order(data[,idx.cluster]),] 
+      idx.cluster=which(colnames(urq$data)==cluster)
+      clusters=names(table(urq$data[,idx.cluster]))
+      urq$data=urq$data[order(urq$data[,idx.cluster]),] 
       freq=as.numeric(rdirichlet(1, rep(1, length(clusters))))
-      freq=rep(freq,times=table(data[,idx.cluster]))
-      data$wts=freq/sum(freq)
-
+      freq=rep(freq,times=table(urq$data[,idx.cluster]))
+      urq$data$wts=freq/sum(freq)
     }
-
-    boot[i,]=as.numeric(urqb(urq$formula,data=data,tau=urq$tau,cluster=cluster)$coefficients)
+    #data=data,tau=tau,formula=formula,kernel=NULL,cluster=cluster
+    boot[i,]=as.numeric(urqb(data=urq$data,tau=urq$tau,formula=urq$formula,kernel=NULL,cluster=cluster)$coefficients)
     #print(i)
     #weights=NULL
     if (!i %% 50)cat(".")
   }
-
+  
   if (R<=50) cat("Number of replications is small, please consider increasing it.")
   names.urq=rownames(urq$coefficients)
   if(is.null(confidence)) confidence<-0.95
@@ -75,8 +85,8 @@ urqCI=function(urq,data,R=20,seed=NULL,colour=NULL,confidence=NULL,graph=TRUE,cl
   tP <- 2 * pt(-abs(urq.vector/se), dim(boot)[1] - 1)
   lower <- urq.vector + qt(alpha/2, dim(boot)[1] - 1) * se
   upper <- urq.vector + qt(1 - alpha/2, dim(boot)[1] - 1) * se
-  recap=cbind(urq.vector,t(apply(boot,2,quantile,c(alpha/2,1-alpha/2),na.rm=TRUE)),lower,upper,tP,se,rep(urq$tau,each=dim(urq$coefficients)[1]))
-  colnames(recap)=c("Coef","Perc. Lower","Perc. Upper","t Lower","t Upper","P>|t|","Std.Err.","tau")
+  recap=cbind(urq.vector,lower,upper,tP,se,rep(urq$tau,each=dim(urq$coefficients)[1]))
+  colnames(recap)=c("Coef","t Lower","t Upper","P>|t|","Std.Err.","tau")
 
   if (isTRUE(graph) && isTRUE(length(urq$tau)>1)){
     for(i in c(2:dim(urq$coefficients)[1])){
@@ -89,10 +99,8 @@ urqCI=function(urq,data,R=20,seed=NULL,colour=NULL,confidence=NULL,graph=TRUE,cl
       plot(rep(urq$tau, 2), c(cfi[, 2], cfi[, 3]),xaxt = "n" ,type = "n",main=cond,xlab="",ylab="",ylim=c(min=min(cfi[,1:5],na.rm=TRUE),max=max(cfi[,1:5],na.rm=TRUE)))
       axis(1, at=urq$tau, labels=urq$tau,cex.axis=.8,las=2)
 
-      polygon(c(urq$tau, rev(urq$tau)),c(cfi[,4],rev(cfi[,5])),col=colour,border=NA)
+      polygon(c(urq$tau, rev(urq$tau)),c(cfi[,2],rev(cfi[,3])),col=colour,border=NA)
       points(urq$tau,cfi[,1],type="b",lty="longdash",cex = 0.5,pch = 20,col="blue")
-      points(urq$tau,cfi[,2],type="l",lty="longdash",cex = 0.5,pch = 20,col="black")
-      points(urq$tau,cfi[,3],type="l",lty="longdash",cex = 0.5,pch = 20,col="black")
       abline(h=0)
     }
   }

diff --git a/R/urqb.R b/R/urqb.R
@@ -1,43 +1,49 @@
 #' @title Unconditional Quantile Regression
 #'
 #'@description Function Not intended for user. Returns an object of class "urq" that represents an Unconditional Quantile Regression Fit. 
-#'@usage urqb(data,tau=NULL,formula,kernel=NULL,cluster=cluster,wts=wts)
+#'@usage urqb(data,tau,formula,kernel=NULL,cluster=cluster)
 #'@param formula a formula object, with the response on the left of a ~ operator, and the terms, separated by + operators, on the right.
 #'@param data a data.frame in which to interpret the variables named in the formula 
 #'@param tau the quantile(s) to be estimated, this must be a number (or a vector of numbers) strictly between 0 and 1.
 #'@param kernel a character string giving the smoothing kernel to be used. This must match one of "gaussian", "rectangular", "triangular", "epanechnikov", "biweight", "cosine" or "optcosine", with default "gaussian".
 #'@param cluster column name of variable to be used in order to obtain cluster robust standard errors.
-#'@param wts weights
 #'@import gtools Hmisc
-#'@keywords internal
+#'@keywords NULL
+#'@export
 #'@seealso \code{\link{density},\link{urq}}
 #'@return NULL
 #'@examples NULL
-urqb <- function(data,tau=NULL,formula,kernel=NULL,cluster=cluster,wts=wts) {
+urqb <- function(data=data,tau=tau,formula=formula,kernel=NULL,cluster=cluster) {
   #library(Hmisc)
   if(is.null(kernel)) kernel<-"gaussian"
-  if(is.null(tau)) tau<-1:9/10
+  #if(is.null(tau)) tau<-1:9/10
   indicator<-function(condition) ifelse(condition,1,0)
   c1=NULL
   for (i in 1:length(tau)){
     #which(is.na(data[,idx.dep]))->miss
     #data2=data[-miss,]
+    formula=as.formula(formula)
+    as.data.frame(data)
     idx.dep=which(colnames(data)==all.vars(formula)[1])
     data2=data
 
-    wtd.quantile(data2[,idx.dep],tau,weights=data2$wts,normwt=TRUE)->q
-    density(data2[,idx.dep],kernel=kernel,weights=data2$wts)->f
+
+    wtd.quantile(data2[,1],tau,weights=data$wts,normwt=TRUE)->q
+    density(data[,1],kernel=kernel,weights=data$wts)->f
     approx(f$x, f$y, q)$y->fq
     RIF=q[i]+((tau[i]-indicator(data2[,idx.dep]<q[i]))/fq[i])
 
     data2[,idx.dep]=RIF
-    lm=lm(formula,data2,weights=wts)
-    c=coef(lm)
-    c1=rbind(c1,c)
-    #print(i)
-    data2=NULL
+
+
+      lm=lm(formula,data2,weights=NULL)
+      c=coef(lm)
+      c1=rbind(c1,c)
+      #print(i)
+      data2=NULL
+    }
 
-  }
+
   RIF=t(c1)
   colnames(RIF)<-paste("tau=",tau)
   fit=list (coefficients=RIF,tau=tau,formula=formula)

diff --git a/data/trust.rda b/data/trust.rda
diff --git a/man/Trust-Data.Rd b/man/Trust-Data.Rd