Prueba de cálculo de salario promedio general  y desagregado por sexo en la Ciudad de México

Serie longitudinal: últimos trimestres de 2021-2024

In [1]:
library(tidyverse)
if(! require('pacman')) install.packages('pacman')
pacman::p_load(srvyr, janitor, stringr, writexl)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.4     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
Cargando paquete requerido: pacman



In [2]:
dir_datos <- 'enoe_datos'
if (!dir.exists(dir_datos)) {
    dir.create(dir_datos)
}

In [3]:
descargar_enoe_csv <- function() {
    options(timeout = 90000)

    descargar_extraer <- function(url, destino) {
        temp <- tempfile()
        download.file(url, temp, mode = 'wb')
        unzip(temp, exdir = destino)
        unlink(temp)

    }
    
    #iterar los años de descarga    
    for(i in 21:22) {
        url <- paste0('https://www.inegi.org.mx/contenidos/programas/enoe/15ymas/microdatos/enoe_n_20', i, '_trim4_csv.zip')
        descargar_extraer(url, dir_datos)
    }

    for(i in 23:24) {
        url <- paste0('https://www.inegi.org.mx/contenidos/programas/enoe/15ymas/microdatos/enoe_20', i, '_trim4_csv.zip')
        descargar_extraer(url, dir_datos)
    }
}

In [4]:
descargar_enoe_csv()

In [5]:
archivos <- list.files(path = dir_datos, pattern = 'sdem.*\\.csv$', full.names = TRUE, ignore.case = TRUE)

datos <- archivos %>%
lapply(read_csv, show_col_types = FALSE)

"[1m[22mOne or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)"
"[1m[22mOne or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)"
"[1m[22mOne or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)"
"[1m[22mOne or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)"


In [6]:
tablas <- lapply(datos, function(df) {
    df %>%
    clean_names() %>%
    filter(r_def ==00, (c_res == 1 | c_res == 3),
         eda >= 15  & eda <= 98,
         clase2 == 1) %>%
    select(ent, sex, eda, fac_tri, ingocup)
})


In [7]:
#Listas vacías
variables <- list()
Pocupada <- list()
mujeres <- list()
hombres <- list()

In [8]:
for (i in seq_along(tablas)) {
  # Extraer el año desde el nombre del archivo
  year <- str_extract(archivos[[i]], "\\d{2}(?=\\D*$)")
  fecha <- paste0("31/12/20", year)
  
  variables[[i]] <- tablas[[i]] %>%
    mutate(fecha = fecha)

  Pocupada[[i]] <- variables[[i]] %>%
    summarise(SalarioPromedio = sum(fac_tri * ingocup, na.rm = TRUE) / sum(fac_tri, na.rm = TRUE),
              .groups = "drop") %>%
    mutate(fecha = fecha)

  mujeres[[i]] <- variables[[i]] %>%
    filter(sex == 2) %>%
    summarise(Salario_Promedio_Mujeres = sum(fac_tri * ingocup, na.rm = TRUE) / sum(fac_tri, na.rm = TRUE),
              .groups = "drop") %>%
    mutate(fecha = fecha)

  hombres[[i]] <- variables[[i]] %>%
    filter(sex == 1) %>%
    summarise(Salario_Promedio_Hombres = sum(fac_tri * ingocup, na.rm = TRUE) / sum(fac_tri, na.rm = TRUE),
              .groups = "drop") %>%
    mutate(fecha = fecha)
}

In [9]:
total <- mapply(function(p, m, h){
reduce(list(p, m, h), left_join, by = 'fecha')},
Pocupada, mujeres, hombres, SIMPLIFY = FALSE)

salario_historico <- bind_rows(total)

In [10]:
print(salario_historico)

[90m# A tibble: 4 × 4[39m
  SalarioPromedio fecha      Salario_Promedio_Mujeres Salario_Promedio_Hombres
            [3m[90m<dbl>[39m[23m [3m[90m<chr>[39m[23m                         [3m[90m<dbl>[39m[23m                    [3m[90m<dbl>[39m[23m
[90m1[39m           [4m5[24m751. 31/12/2023                    [4m4[24m983.                    [4m6[24m280.
[90m2[39m           [4m6[24m034. 31/12/2024                    [4m5[24m218.                    [4m6[24m608.
[90m3[39m           [4m4[24m893. 31/12/2021                    [4m4[24m232.                    [4m5[24m324.
[90m4[39m           [4m5[24m248. 31/12/2022                    [4m4[24m476.                    [4m5[24m775.
