version 1.0.0

cran · Jan 20, 2024 · 2e04897 · 2e04897
commit 2e04897
Show file tree

Hide file tree

Showing 26 changed files with 972 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,19 @@
+Package: GiniDecompLY
+Title: Gini Decomposition by Income Sources
+Version: 1.0.0
+Author: Abdessamad Ait Mbarek 
+Maintainer: Abdessamad Ait Mbarek <abdessamad.ambarek@gmail.com>
+Description: Estimation of the effect of each income source on income inequalities based on the decomposition of Lerman and Yitzhaki (1985) <doi:10.2307/1928447>. 
+License: GPL-3
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 7.2.3
+Imports: dplyr, tidyr, magrittr,
+Suggests: knitr, rmarkdown, testthat (>= 3.0.0)
+Config/testthat/edition: 3
+VignetteBuilder: knitr
+Depends: R (>= 2.10)
+NeedsCompilation: no
+Packaged: 2024-01-19 10:22:47 UTC; HP
+Repository: CRAN
+Date/Publication: 2024-01-19 11:10:09 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,25 @@
+2817a72efbfde78f04650e45086a3f8a *DESCRIPTION
+1d44c66b7a6ea1dd7bfe1eb167c9824e *NAMESPACE
+6663a0e3b5683b07bcb78e3587eb817b *R/data.R
+6fbad91743b78db84d5bb8bce26fb530 *R/gini_correlation.R
+40db9696b791d053c4a18c4b11611ddb *R/gini_decomposition_funs.R
+4b4b873ee24074e04aa619e72ecd1f5f *R/gini_source_decomp_comp.R
+ec36d145ccb149a09fcc6214590228a9 *R/gini_wtd_ord.R
+d434ddc3486986d098b0e8e494720cee *R/utils-globals.R
+1c6e8b08e856faebb9d1bb6d43787883 *R/utils-pipe.R
+1dd3e994a0f6e76b1cfea65d25469d94 *README.md
+2eceed669e366f63bd9e7dbd831b156e *build/vignette.rds
+9c0fcffd73e08250842286159998d4e7 *data/sample_income_data.rda
+73e539e1cdec8b66bbe34473d7983415 *inst/doc/GiniDecompLY.R
+ef78ae3396a6c85df30f287130ba81a9 *inst/doc/GiniDecompLY.Rmd
+a57c10c82756d9d568fb744d0726f437 *inst/doc/GiniDecompLY.html
+2ff058fceeb9830f0426665f6957a03b *man/gini_corr.Rd
+0cc976969b3a4b95fb2c05ff95b43db9 *man/gini_decomp_source.Rd
+1d699dff2c31fd3fd37abb1d97b26442 *man/gini_income_elasticity.Rd
+d755af146aadbe1d4d7e940a610f6bbe *man/pipe.Rd
+1ac8f02aed157213eb9335e1ae6b3954 *man/sample_income_data.Rd
+e71e1d396eb3d91e7a72d9187b4e074d *man/social_welfare_impact.Rd
+19636cd1ad09d4061741306f950869ed *tests/testthat.R
+225e3812b897786befc574f805c96dc2 *tests/testthat/test-gini_wtd_ord.R
+225e3812b897786befc574f805c96dc2 *tests/testthat/test-test_file.R
+ef78ae3396a6c85df30f287130ba81a9 *vignettes/GiniDecompLY.Rmd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,10 @@
+# Generated by roxygen2: do not edit by hand
+
+export("%>%")
+export(gini_corr)
+export(gini_decomp_source)
+export(gini_income_elasticity)
+export(social_welfare_impact)
+importFrom(magrittr,"%>%")
+importFrom(stats,weighted.mean)
+importFrom(tidyr,pivot_longer)
diff --git a/R/data.R b/R/data.R
@@ -0,0 +1,18 @@
+#' Sample income data
+#'
+#' A simulated data for households income sources
+#'
+#'
+#' @format ##
+#' A data frame with 200 rows and 6 columns:
+#' \describe{
+#'   \item{region}{Whether the households residence is urban or rural}
+#'   \item{sample_wgt}{Sample weight}
+#'   \item{wage}{Wage and salary}
+#'   \item{self_employment_rev}{Earnings from self-employment }
+#'   \item{farming_rev}{Income derived from agricultural activities}
+#'   \item{other_rev}{Other income sources}
+#'   ...
+#' }
+#'
+"sample_income_data"
diff --git a/R/gini_correlation.R b/R/gini_correlation.R
@@ -0,0 +1,32 @@
+#' Gini correlation index
+#'
+#' This function calculates the Gini correlation between two distributions.
+#' @param x a numeric vector containing at least non-negative elements.
+#' @param y a numeric vector containing the distribution with the rank information.
+#' @param weights an optional vector of weights to apply in computation. Should be NULL or a numeric vector.
+#' @returns The value of the Gini correlation, should be between -1 and 1.
+#' @export
+#' @examples
+#'
+#' # Calculate the gini correlation between the salary and total income distributions
+#'
+#'
+#' Salary_distribution = sample_income_data$wage
+#' Total_income_distribution = rowSums(sample_income_data[3:6])
+#'
+#' gini_corr(Salary_distribution, Total_income_distribution)
+#'
+#'
+#' @references
+#'
+#' E. Schechtman and S. Yitzhaki (1999) \emph{On the proper bounds of the Gini correlation},
+#' Economics Letters,Volume 63, Issue 2, p. 133-138, ISSN 0165-1765
+#'
+#' Handcock, M. (2016), \emph{Relative Distribution Methods in the Social Sciences}, Springer-Verlag, Inc., New York, 1999 ISBN 0387987789
+#'
+#'
+
+
+gini_corr <- function(x, y, weights = NULL) {
+  gini_wtd_ord(x, y, weights) / gini_wtd_ord(x, weights = weights)
+}
diff --git a/R/gini_decomposition_funs.R b/R/gini_decomposition_funs.R
@@ -0,0 +1,120 @@
+#' Gini decomposition by income sources
+#'
+#' This function provides a decomposition of Gini index by income sources based on the approach of Lerman and Yitzhaki (1985) <doi:10.2307/1928447> .
+#' It provides a set of indicators :
+#' - `income_source`: Column indicating each income source passed into the function call.
+#' - `Share`: Column indicating the share of the income source to the total income.
+#' - `Gini`: Column showing the Gini index for each income source.
+#' - `Gini_corr`: Column showing the Gini correlation between the income source and the total income.
+#' - `Absolute_Contribution`: Column showing the  absolute contribution of each income source to the global Gini index.
+#' - `Relative_Contribution`: Column indicating the relative contribution of each income source to the global Gini index.
+#'
+#' @param .data A data frame, or data frame extension (e.g. a tibble)
+#' @param ... One or more unquoted expressions separated by commas indicating income sources to consider in the decomposition. Variable names can be used as if they were positions in the data frame.
+#' @param .by A column to group the calculations by.
+#' @param .wgt an optional vector of weights to apply in computation. Should be NULL or a numeric vector.
+#' @returns An object of class `data.frame` containing all the calculated indicators. The data.frame is grouped by the columns passed into `.by` argument.
+#'
+#' @export
+#' @examples
+#'
+#' sample_income_data %>%
+#'   gini_decomp_source(wage, self_employment_rev, farming_rev, other_rev)
+#'
+#' gini_decomp_source(sample_income_data, 3:6, .by = region, .wgt = sample_wgt)
+
+
+gini_decomp_source <- function(.data, ..., .by = NULL, .wgt = NULL) {
+
+  decomp_components <- gini.source.decomp.comp(.data, ..., .by = {{.by}}, .wgt = {{.wgt}})
+
+  decomp_results <- decomp_components %>%
+    dplyr::group_by(dplyr::across({{.by}})) %>%
+    dplyr::mutate(Absolute_Contribution = Share * Gini * Gini_corr,
+                  Relative_Contribution = Absolute_Contribution / sum(Absolute_Contribution, na.rm = T))
+
+  return(decomp_results)
+
+}
+
+
+#' Gini income elasticity
+#'
+#' This function computes the elasticity of Gini index associated with a percentage change in the mean income (for each income source).
+#' It provides a set of indicators :
+#' - `income_source`: Column indicating each income source passed into the function call.
+#' - `Share`: Column indicating the share of the income source to the total income.
+#' - `Gini`: Column showing the Gini index for each income source.
+#' - `Gini_corr`: Column showing the Gini correlation between the income source and the total income.
+#' - `Elasticity`: Column indicating the elasticity of Gini index associated with a percentage change in the mean income source.
+#' - `Marginal_Impact`: Column indicating the marginal impact a change in the mean income source on the overall Gini index.
+
+#' @param .data A data frame, or data frame extension (e.g. a tibble)
+#' @param ... One or more unquoted expressions separated by commas indicating income sources to consider in the decomposition. Variable names can be used as if they were positions in the data frame.
+#' @param .by A column to group the calculations by.
+#' @param .wgt an optional vector of weights to apply in computation. Should be NULL or a numeric vector.
+#' @returns An object of class `data.frame` containing all the calculated indicators. The data.frame is grouped by the columns passed into `.by` argument.
+#' @export
+#' @examples
+#'
+#' sample_income_data %>%
+#'   gini_income_elasticity(wage, self_employment_rev, farming_rev, other_rev,
+#'   .by = region)
+#'
+#' gini_income_elasticity(sample_income_data, 3:6, .by = region, .wgt = sample_wgt)
+
+
+
+gini_income_elasticity <- function(.data, ..., .by = NULL, .wgt = NULL) {
+
+  decomp_components <- gini.source.decomp.comp(.data, ..., .by = {{.by}}, .wgt = {{.wgt}})
+
+  gini_income_elasticity <- decomp_components %>%
+    dplyr::group_by(dplyr::across({{.by}})) %>%
+    dplyr::mutate(Elasticity = Gini * Gini_corr / sum(Share * Gini * Gini_corr, na.rm = T),
+                  Marginal_Impact = Share * (Elasticity - 1))
+
+  return(gini_income_elasticity)
+}
+
+
+#' Growth-redistribution impacts on social welfare function.
+#'
+#' This function provides a Growth-redistribution decomposition of effects (for each income source) on social welfare function defined by Amartya Sen (1970, ISBN:978-0-444-85127-7).
+#'
+#' It provides a set of indicators :
+#' - `income_source`: Column indicating each income source passed into the function call.
+#' - `Share`: Column indicating the share of the income source to the total income.
+#' - `Gini`: Column showing the Gini index for each income source.
+#' - `Gini_corr`: Column showing the Gini correlation between the income source and the total income.
+#' - `Growth_Effect`: Column indicating the effect of growth in the income source on the Social Welfare Function.
+#' - `Redistribution_Effect`: Column indicating the effect of redistribution of the income source on the Social Welfare Function.
+#' - `Total_Variation`: Column adding up both effects to calculate the overall effect of each income source on the Social Welfare Function.
+
+#' @param .data A data frame, or data frame extension (e.g. a tibble)
+#' @param ... One or more unquoted expressions separated by commas indicating income sources to consider in the decomposition. Variable names can be used as if they were positions in the data frame.
+#' @param .by A column to group the calculations by.
+#' @param .wgt an optional vector of weights to apply in computation. Should be NULL or a numeric vector.
+#' @returns An object of class `data.frame` containing all the calculated indicators. The data.frame is grouped by the columns passed into `.by` argument.
+#' @export
+#' @examples
+#'
+#' sample_income_data %>%
+#'   social_welfare_impact(wage, self_employment_rev, farming_rev, other_rev,
+#'   .wgt = sample_wgt)
+#'
+#' social_welfare_impact(sample_income_data, 3:6, .by = region, .wgt = sample_wgt)
+
+
+social_welfare_impact <- function(.data, ..., .by = NULL, .wgt = NULL) {
+
+  decomp_components <- gini.source.decomp.comp(.data, ..., .by = {{.by}}, .wgt = {{.wgt}})
+
+  social_welfare_impact <- decomp_components %>%
+    dplyr::group_by(dplyr::across({{.by}})) %>%
+    dplyr::mutate(Growth_Effect = Share / (1 - sum(Share * Gini * Gini_corr, na.rm = T)),
+                  Redistribution_Effect = - Share * Gini * Gini_corr / (1 - sum(Share * Gini * Gini_corr, na.rm = T)),
+                  Total_Variation = Growth_Effect + Redistribution_Effect)
+
+  return(social_welfare_impact)
+}
diff --git a/R/gini_source_decomp_comp.R b/R/gini_source_decomp_comp.R
@@ -0,0 +1,43 @@
+#' Gini decomposition components
+#'
+#' This function calculates the Gini correlation between two distributions.
+#' @param .data A data frame, or data frame extension (e.g. a tibble)
+#' @param ... One or more unquoted expressions separated by commas indicating income sources to consider in the decomposition. Variable names can be used as if they were positions in the data frame.
+#' @param .by A column to group the calculations by.
+#' @param .wgt an optional vector of weights to apply in computation. Should be NULL or a numeric vector.
+#' @return A tibble containing three components (for each source) of income decomposition.
+#' @importFrom stats weighted.mean
+#' @importFrom tidyr pivot_longer
+#' @noRd
+#'
+
+
+
+
+
+gini.source.decomp.comp <- function(.data, ..., .by = NULL, .wgt = NULL) {
+  sources <- dplyr::select(.data, ..., {{.by}})
+
+  sources$Total_Income <- sources %>%
+    dplyr::select(-{{.by}}) %>%
+    rowSums()
+
+
+  if(ncol(dplyr::select(.data, {{.wgt}})) == 0)
+    sources$W <- 1
+  else
+    sources$W  <- dplyr::pull(.data, {{.wgt}})
+
+
+  decomp_components <- sources %>%
+    pivot_longer(cols = - c(Total_Income, W, {{.by}}), names_to = "income_source", values_to = "valeur_revenu") %>%
+    dplyr::group_by(dplyr::across({{.by}}), income_source) %>%
+    dplyr::summarise(Share = stats::weighted.mean(valeur_revenu, w = W) / weighted.mean(Total_Income, w = W),
+              Gini = gini_wtd_ord(valeur_revenu, weights = W),
+              Gini_corr = gini_corr(valeur_revenu, Total_Income, weights = W),
+              .groups = "drop")
+
+
+  return(decomp_components)
+
+}
diff --git a/R/gini_wtd_ord.R b/R/gini_wtd_ord.R
@@ -0,0 +1,37 @@
+#' Gini index calculation (option of enforcing order)
+#'
+#' This function calculates the Gini index for a distribution, with an option of sorting according to another distribution.
+#' @param x a numeric vector containing at least non-negative elements.
+#' @param ord a numeric vector containing the distribution to sort with.
+#' @param weights an optional vector of weights of x to be used in the computation of the Gini coefficient. Should be NULL or a numeric vector.
+#' @return The value of the Gini index.
+#' @noRd
+#'
+
+gini_wtd_ord <- function (x, ord = NULL, weights = NULL)
+{
+  if (is.null(weights)) {
+    weights <- rep(1, length(x))
+  }
+  missing <- !(is.na(x) | is.na(weights))
+  x <- x[missing]
+  weights <- weights[missing]
+  if (!all(weights >= 0))
+    stop("At least one weight is negative", call. = FALSE)
+  if (all(weights == 0))
+    stop("All weights are zero", call. = FALSE)
+  weights <- weights/sum(weights)
+
+  if (is.null(ord)) {
+    ord <- x
+  }
+  order <- order(ord)
+  x <- x[order]
+  weights <- weights[order]
+  p <- cumsum(weights)
+  nu <- cumsum(weights * x)
+  n <- length(nu)
+  nu <- nu/nu[n]
+  gini <- sum(nu[-1] * p[-n]) - sum(nu[-n] * p[-1])
+  return(gini)
+}
diff --git a/R/utils-globals.R b/R/utils-globals.R
@@ -0,0 +1 @@
+utils::globalVariables(c("Total_Income", "W", "income_source", "valeur_revenu", "Share", "Gini", "Gini_corr", "Elasticity", "Growth_Effect", "Redistribution_Effect", "Absolute_Contribution"))
diff --git a/R/utils-pipe.R b/R/utils-pipe.R
@@ -0,0 +1,12 @@
+#' Pipe operator
+#'
+#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
+#'
+#' @name %>%
+#' @rdname pipe
+#' @keywords internal
+#' @export
+#' @importFrom magrittr %>%
+#' @usage lhs \%>\% rhs
+#' @returns The result of calling 'rhs(lhs)'.
+NULL
diff --git a/README.md b/README.md
@@ -0,0 +1,25 @@
+
+# GiniDecompLY
+
+<!-- badges: start -->
+<!-- badges: end -->
+
+The goal of GiniDecompLY is to use the approach of Lerman and Yitzhaki (1985) to decompose the Gini Index by income sources.
+
+The package provides a set of functions that quantify the effect of each income source on inequalities.
+
+## Installation
+
+You can install the released version of GiniDecompLY from GitHub with:
+
+``` r
+devtools::install_github("A-A-Mbarek/GiniDecompLY")
+```
+
+
+
+## Reference
+
+Yitzhaki S., Lerman R. (1985) Income Inequality Effects by Income Source: A New Approach and Applications to the United States. *The Review of Economics and Statistics , Feb., 1985, Vol. 67, No. 1 (Feb., 1985),
+pp. 151-156*
+
diff --git a/build/vignette.rds b/build/vignette.rds
diff --git a/data/sample_income_data.rda b/data/sample_income_data.rda
diff --git a/inst/doc/GiniDecompLY.R b/inst/doc/GiniDecompLY.R
@@ -0,0 +1,9 @@
+## ---- include = FALSE---------------------------------------------------------
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+
+## ----setup--------------------------------------------------------------------
+library(GiniDecompLY)
+
diff --git a/inst/doc/GiniDecompLY.Rmd b/inst/doc/GiniDecompLY.Rmd
@@ -0,0 +1,19 @@
+---
+title: "GiniDecompLY"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{GiniDecompLY}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(GiniDecompLY)
+```