version 1.0

cran · Oct 19, 2023 · 2f93415 · 2f93415
commit 2f93415
Show file tree

Hide file tree

Showing 9 changed files with 407 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,18 @@
+Package: qcauchyreg
+Title: Quantile Regression Quasi-Cauchy
+Version: 1.0
+Authors@R: 
+    person(given = "Jose Sergio", family = "Case de Oliveira", role = c("aut", "cre"), email = "js_cdo@hotmail.com")
+Description: Quasi-Cauchy quantile regression, proposed by de Oliveira, Ospina, Leiva, Figueroa-Zuniga and Castro (2023) <doi:10.3390/fractalfract7090667>. This regression model is useful for the case where you want to model data of a nature limited to the intervals [0,1], (0,1], [0,1) or (0,1) and you want to use a quantile approach.
+Depends: R (>= 3.5.0)
+License: GPL-3
+Encoding: UTF-8
+Imports: quantreg
+RoxygenNote: 7.2.1
+Maintainer: Jose Sergio Case de Oliveira <js_cdo@hotmail.com>
+Repository: CRAN
+URL: <https://www.r-project.org>
+Date/Publication: 2023-10-19 07:20:05 UTC
+NeedsCompilation: no
+Packaged: 2023-10-18 20:57:30 UTC; js_cd
+Author: Jose Sergio Case de Oliveira [aut, cre]
diff --git a/MD5 b/MD5
@@ -0,0 +1,8 @@
+52b8354057c6a182bbff174df2b3b4bf *DESCRIPTION
+9b719a736cecb54dbacc25b33b68bd6d *NAMESPACE
+872b60133494525fb8528826da36f983 *R/qcreg.R
+18a12572256bc19789f817eaf2a7dd6c *data/Democratization.rda
+e793c03af41e4c6a888adaae24ec6528 *data/Poverty.rda
+e6ef9722b7d881bb7e31b4b98229c4f1 *man/Democratization.Rd
+9cd221194616adf919fd7a7c7122960b *man/Poverty.Rd
+f7fb7cb5ee91461c86ccb122346a14f1 *man/qcreg.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,3 @@
+# Generated by roxygen2: do not edit by hand
+
+export(qcreg)
diff --git a/R/qcreg.R b/R/qcreg.R
@@ -0,0 +1,204 @@
+#' @title Quasi-Cauchy quantile regression
+#' @name qcreg
+#'
+#' @description Returns an object of class \code{rq()} that represents a Quasi-Cauchy quantile regression fit. Quasi-Cauchy quantile regression is useful when you want to perform quantile regression analysis on data limited to the unit range.
+#'
+#'
+#' @usage qcreg(formula, data, tau=0.5, npi=100, criterion="bic", tau_i=0.05, tau_f=0.95) 
+#'
+#' @param formula a formula object, with the response on the left of a ~ operator, and the terms, separated by + operators, on the right.
+#' @param data a \code{data.frame()} composed of the variables that will be used in the model.
+#' @param tau the quantile to be estimated, this is a number strictly between 0 and 1. The default value is 0.5.
+#' @param npi (optional) the number of Pi's that will be considered for choosing the Pi that best fits the model. The default value is 100.
+#' @param criterion (optional) criterion to decide the Pi that fits the model. Choose "aic" for AIC, "bic" for BIC and "R2" for pseudo-R2. Or, indicate a numerical value between 0<Pi<pi to use a particular Pi. The default is the automatic choice of Pi following the BIC criterion.
+#' @param tau_i (optional) if you want to estimate several quantiles simultaneously, enter the lower limit of the range of coatis you want to estimate here. The default value is 0.05.
+#' @param tau_f (optional) if you want to estimate several quantiles simultaneously, enter the upper limit of the range of coatis you want to estimate here. The default value is 0.95.
+#'
+#'
+#'
+#' @details The Quasi-Cauchy quantile regression model is based on the traditional quantile model, proposed by Koenker (2005) (<doi:10.1017/CBO9780511754098>), to which the Quasi-Cauchy link function is added, allowing the estimation of quantile regression when modeling a variable of nature limited to the ranges \[0,1\], (0,1\], \[0,1) or (0,1). For more details on Quasi-Cauchy quantile regression, see de Oliveira, Ospina, Leiva, Figueroa-Zuniga and Castro (2023) (<doi:10.3390/fractalfract7090667>).
+#'
+#'
+#' @return \code{qcreg()} returns an object of class \code{rq()}, hence all outputs of an \code{rq()} object are accessible.
+#' @return \code{index} returns the Pi value used in estimating the model and 4 goodness-of-fit criteria, namely: AIC, BIC, pseudo-R2, adjusted pseudo-R2. 
+#' @return \code{effects} returns the marginal effect on the average.
+#' @return \code{quantregplot} returns argument for graphical visualization of estimates (and confidence intervals) considering a range of values for tau instead of a single value.
+#' @return \code{pis} returns the values of Pi considered in the procedure for choosing the ideal Pi, as well as the corresponding goodness-of-fit criterion values. Available only when Pi is chosen via goodness-of-fit criteria.
+#' @author Jose Sergio Case de Oliveira
+#' 
+#' 
+#' @references \[1\] Koenker, R. W. (2005). Quantile Regression, Cambridge U. Press. <doi:10.1017/CBO9780511754098>
+#' @references \[2\] de Oliveira, J.S.C.; Ospina, R.; Leiva, V.; Figueroa-Zuniga, J.; Castro, C. (2023). Quasi-Cauchy Regression Modeling for Fractiles Based on Data Supported in the Unit Interval. Fractal Fract. 7, 667. <doi:10.3390/fractalfract7090667>
+#' 
+#' 
+#' 
+#' @examples
+#' 
+#' data("Democratization", package = "qcauchyreg")
+#'
+#' fit <- qcreg(democratization ~ schooling + press_freedom, data = Democratization, criterion=1)
+#' summary(fit)
+#' fit$effects
+#' fit$index
+#'
+#' 
+#' 
+#' 
+#' @examples
+#' 
+#'
+#' data("Poverty", package = "qcauchyreg")
+#'
+#' fit2 <- qcreg(poverty ~ population + illiteracy + pc_income, 
+#' data = Poverty, npi=50, criterion="bic")
+#' summary(fit2)
+#' fit2$effects
+#' fit2$index
+#'
+#' plot(fit2$pis, type="l")
+#'
+#' plot(fit2$quantregplot) 
+#' 
+#' 
+#' @export 
+utils::globalVariables(c("stats", "AIC", "as.formula", "model.extract", "model.frame"))
+qcreg=function(formula, data, tau=0.5, npi=100, criterion="bic", tau_i=0.05, tau_f=0.95){
+
+if(inherits(formula, "formula")==0) stop("The argument formula must be a formula. See the documentation!")
+if(inherits(data, "data.frame")==0) stop("The argument data must be a data.frame. See the documentation!")
+if(inherits(tau, "numeric")==0) stop("The argument tau must be a numeric. See the documentation!")
+if(inherits(npi, "numeric")==0) stop("The argument npi must be a numeric. See the documentation!")
+if(inherits(criterion, "character")==1){if(sum(criterion==c("aic","bic","R2"))==0){stop("Invalid character argument. See the documentation!")}}
+if(inherits(criterion, "numeric")==1){if(criterion>=pi){stop("Invalid numeric argument. 0<PI<pi. See the documentation!")}}
+if(inherits(criterion, "numeric")==1){if(criterion<=0){stop("Invalid numeric argument. 0<PI<pi. See the documentation!")}}
+if(inherits(tau_i, "numeric")==0) stop("The argument tau_i must be a numeric. See the documentation!")
+if(inherits(tau_f, "numeric")==0) stop("The argument tau_f must be a numeric. See the documentation!")
+if(tau_i>tau_f) stop("The argument tau_i must be less than tau_f. See the documentation!")
+if(tau_i>0.999) stop("The argument tau_i must be less than 1 and greater than 0. See the documentation!")
+if(tau_i<0.001) stop("The argument tau_i must be less than 1 and greater than 0. See the documentation!")
+if(tau_f>0.999) stop("The argument tau_f must be less than 1 and greater than 0. See the documentation!")
+if(tau_f<0.001) stop("The argument tau_i must be less than 1 and greater than 0. See the documentation!")
+HH=model.frame(formula,data=data)
+Y=as.matrix(model.extract(HH, "response"))
+if(max(Y)>1) stop("The response variable must be in the unit range. See the documentation!")
+if(max(Y)<0) stop("The response variable must be in the unit range. See the documentation!")
+tau=tau
+if(inherits(criterion, "numeric")==1){PI_ext=criterion}
+nrep=npi
+PI=seq(0.001,I(pi-0.001),length=nrep)
+n=dim(HH)[1]
+if(inherits(criterion, "numeric")==0){
+A=matrix(0,nrep,4)
+for(i in 1:nrep){
+G = function(x){I(tan(PI[i]*((x)-0.5)))}
+YY = G(Y)
+HHH=cbind(HH,YY)
+form2 = as.formula(paste("YY~",paste0(formula)[3])) 
+fe = quantreg::rq(form2, tau=tau, data=HHH)
+fe0 = quantreg::rq(YY~1, tau=tau, data=HHH)
+R2 = 1 - fe$rho/fe0$rho
+A[i,1]=PI[i]
+A[i,2]=R2
+A[i,3]=AIC(fe)[1]
+A[i,4]=AIC(fe, k=log(n))[1]}
+colnames(A) = c("PI","R2","AIC","BIC")
+A=data.frame(A)
+PI_o = A$PI[which.max(A$R2)]
+R_o = A[which.max(A[,2]),2]
+PI_o2 = A$PI[which.min(A$AIC)]
+AIC_o = A[which.min(A[,3]),3]
+PI_o3 = A$PI[which.min(A$BIC)]
+BIC_o = A[which.min(A[,4]),4]
+Ind = rbind(c(round(R_o,6), PI_o),c(round(AIC_o,6), PI_o2), c(round(BIC_o,6), PI_o3))
+colnames(Ind) = c("Index", "PI")
+rownames(Ind) = c("Pseudo-R2","AIC","BIC")}
+if(criterion=="R2"){ 
+G = function(x){I(tan(PI_o*((x)-0.5)))} 
+Pp=cbind(A$PI, A$R2)
+colnames(Pp) = c("PI", "Pseudo-R2")
+YY = G(Y)
+HHH=cbind(HH,YY)
+form3 = as.formula(paste("YY~",paste0(formula)[3]))
+gy_o <- quantreg::rq(form3, tau=tau, data=HHH)
+fe0 = quantreg::rq(YY~1, tau=tau, data=HHH)
+R2 = 1 - gy_o$rho/fe0$rho
+AA=matrix(0,1,5)
+colnames(AA) = c("PI","Pseudo-R2","Adjust. pseudo-R2","AIC","BIC")
+rownames(AA) = c("Values")
+AA[1,1]=PI_o
+AA[1,2]=R2
+AA[1,3]=1-(1-R2)*(n-1)/(n-dim(HH)[2])
+AA[1,4]=AIC(gy_o)[1]
+AA[1,5]=AIC(gy_o, k=log(n))[1]}
+if(criterion=="aic"){ 
+G = function(x){I(tan(PI_o2*((x)-0.5)))} 
+Pp=cbind(A$PI, A$AIC)
+colnames(Pp) = c("PI", "AIC")
+YY = G(Y)
+HHH=cbind(HH,YY)
+form3 = as.formula(paste("YY~",paste0(formula)[3]))
+gy_o <- quantreg::rq(form3, tau=tau, data=HHH)
+fe0 = quantreg::rq(YY~1, tau=tau, data=HHH)
+R2 = 1 - gy_o$rho/fe0$rho
+AA=matrix(0,1,5)
+colnames(AA) = c("PI","Pseudo-R2","Adjust. pseudo-R2","AIC","BIC")
+rownames(AA) = c("Values")
+AA[1,1]=PI_o2
+AA[1,2]=R2
+AA[1,3]=1-(1-R2)*(n-1)/(n-dim(HH)[2])
+AA[1,4]=AIC(gy_o)[1]
+AA[1,5]=AIC(gy_o, k=log(n))[1]}
+if(criterion=="bic"){ 
+G = function(x){I(tan(PI_o3*((x)-0.5)))} 
+Pp=cbind(A$PI, A$BIC)
+colnames(Pp) = c("PI", "BIC")
+YY = G(Y)
+HHH=cbind(HH,YY)
+form3 = as.formula(paste("YY~",paste0(formula)[3]))
+gy_o <- quantreg::rq(form3, tau=tau, data=HHH)
+fe0 = quantreg::rq(YY~1, tau=tau, data=HHH)
+R2 = 1 - gy_o$rho/fe0$rho
+AA=matrix(0,1,5)
+colnames(AA) = c("PI","Pseudo-R2","Adjust. pseudo-R2","AIC","BIC")
+rownames(AA) = c("Values")
+AA[1,1]=PI_o3
+AA[1,2]=R2
+AA[1,3]=1-(1-R2)*(n-1)/(n-dim(HH)[2])
+AA[1,4]=AIC(gy_o)[1]
+AA[1,5]=AIC(gy_o, k=log(n))[1]}
+if(inherits(criterion, "numeric")==1){ 
+G = function(x){I(tan(PI_ext*((x)-0.5)))} 
+YY = G(Y)
+HHH=cbind(HH,YY)
+form3 = as.formula(paste("YY~",paste0(formula)[3]))
+gy_o <- quantreg::rq(form3, tau=tau, data=HHH)
+fe0 = quantreg::rq(YY~1, tau=tau, data=HHH)
+R2 = 1 - gy_o$rho/fe0$rho
+A=matrix(0,1,5)
+colnames(A) = c("PI","Pseudo-R2","Adjust. pseudo-R2","AIC","BIC")
+rownames(A) = c("Values")
+A[1,1]=PI_ext
+A[1,2]=R2
+A[1,3]=1-(1-R2)*(n-1)/(n-dim(HH)[2])
+A[1,4]=AIC(gy_o)[1]
+A[1,5]=AIC(gy_o, k=log(n))[1]}
+betas=as.matrix(gy_o$coef)
+Xhat = apply(gy_o$x,2,mean)
+if(criterion=="R2"){
+F_E2 = (PI_o*(1+(sum(Xhat*betas))^2))^(-1)}
+if(criterion=="aic"){
+F_E2 = (PI_o2*(1+(sum(Xhat*betas))^2))^(-1)}
+if(criterion=="bic"){
+F_E2 = (PI_o3*(1+(sum(Xhat*betas))^2))^(-1)}
+if(inherits(criterion, "numeric")==1){
+F_E2 = (PI_ext*(1+(sum(Xhat*betas))^2))^(-1)}
+E2 = F_E2*betas
+Ef=cbind(E2) 
+colnames(Ef) = c("Em")
+quantreg.plot=summary(quantreg::rq(form3, data = HHH, tau = seq(tau_i, tau_f, by = 0.05)))
+if(inherits(criterion, "numeric")==0){gy_o$index=t(AA)}
+if(inherits(criterion, "numeric")==1){gy_o$index=t(A)}
+gy_o$effects=Ef
+gy_o$quantregplot=quantreg.plot
+if(inherits(criterion, "numeric")==0){gy_o$pis=Pp}
+return(gy_o)}
diff --git a/data/Democratization.rda b/data/Democratization.rda
diff --git a/data/Poverty.rda b/data/Poverty.rda
diff --git a/man/Democratization.Rd b/man/Democratization.Rd
@@ -0,0 +1,52 @@
+\name{Democratization}
+\alias{Democratization}
+
+\title{Estimation of Democratization Index}
+
+\description{
+Democratization index from "The Quality of Government Basic Dataset” of the University of Gothenburg.
+}
+
+\usage{data("Democratization")}
+
+\format{
+A data frame containing 138 observations on 4 variables.
+\describe{
+  \item{\code{democratization}}{democratization index of 138 countries, which can take values in [0, 1].}
+  \item{\code{gdp}}{real gross domestic product per capita in thousands of dollars.}
+  \item{\code{schooling}}{average schooling (in years) of people aged 25 years or more.}
+  \item{\code{press_freedom}}{press freedom index of 138 countries. A lower value indicating greater press freedom, while a higher value indicates limited press freedom.}
+}
+}
+
+\details{
+The data set was collected by "The Quality of Government Basic Dataset” of the University of Gothenburg. The data refers to 138 countries observed in 2010. The dependent variable is the democratization index (\code{democratization}), which takes values between 0 and 1 . The closer to 1, the greater the democratization. The explanatory variables are real gross domestic product per capita in thousands of dollars (\code{gdp}), average schooling (in years) of people aged 25 years or more (\code{schooling}) and press freedom index (\code{press_freedom}), a lower value indicating greater press freedom, while a higher value indicates limited press freedom.
+}
+
+\source{
+\url{https://www.gu.se/en/quality-government/qog-data/data-downloads/basic-dataset}
+}
+
+\references{
+de Oliveira, J.S.C.; Ospina, R.; Leiva, V.; Figueroa-Zuniga, J.; Castro, C. (2023).
+  \emph{Quasi-Cauchy Regression Modeling for Fractiles Based on Data Supported in the Unit Interval}. Fractal Fract. 7, 667. \url{doi:10.3390/fractalfract7090667}
+
+}
+
+
+\examples{
+
+data("Democratization", package = "qcauchyreg")
+
+## de Oliveira, J.S.C.; Ospina, R.; Leiva, V.; Figueroa-Zuniga, J.; Castro, C. (2023)
+fit <- qcreg(democratization ~ schooling + press_freedom, data = Democratization, npi=50)
+summary(fit)
+
+fit$effects
+
+plot(fit$pis, type="l")
+
+plot(fit$quantregplot)
+
+
+}
diff --git a/man/Poverty.Rd b/man/Poverty.Rd
@@ -0,0 +1,46 @@
+\name{Poverty}
+\alias{Poverty}
+
+\title{Percentage of extremely poor.}
+
+\description{
+Percentage of extremely poor, by Brazilian municipality for the year 2010. Data made available by \url{http://www.atlasbrasil.org.br/}.
+}
+
+
+
+\usage{data("Poverty")}
+
+\format{
+A data frame containing 5501 observations on 4 variables.
+\describe{
+  \item{\code{poverty}}{Percentage of extremely poor, by Brazilian municipality for the year 2010.}
+  \item{\code{population}}{Total municipal population in 2010, in thousands.}
+  \item{\code{illiteracy}}{Municipal illiteracy rate of people aged 15 or over, in 2010.}
+  \item{\code{pc_income}}{Municipal per capita income (in Brazilian reais), in 2010.}
+}
+}
+
+\details{
+The dataset is available at \url{http://www.atlasbrasil.org.br/}. The data refer to 5501 Brazilian municipalities observed in 2010. The dependent variable is the percentage of people in extreme poverty (\code{poverty}). Naturally, it is a limited variable that takes values in the unit range. The explanatory variables are total population in thousands of people (\code{population}), illiteracy rate of people aged 15 or over (\code{illiteracy}) and per capita income in Brazilian reais (\code{pc_income}). Municipalities with missing information on any of the variables were excluded from the sample.
+}
+
+\source{
+\url{http://www.atlasbrasil.org.br/}
+}
+
+
+
+\examples{
+
+data("Poverty", package = "qcauchyreg")
+
+reg <- qcreg(poverty ~ population + illiteracy + pc_income, data = Poverty, npi=50)
+summary(reg)
+reg$effects
+
+plot(reg$pis, type="l")
+
+plot(reg$quantregplot)
+
+}