In [13]:
import importlib
import analisis_pob_mon_lab
importlib.reload((analisis_pob_mon_lab))

<module 'analisis_pob_mon_lab' from 'E:\\papx\\end_to_end_ml\\nb_pr\\enahopy\\examples\\investigacion\\analisis_pob_mon_lab.py'>

In [14]:
# Instalación (si es necesario)
# !pip install enahopy


import enahopy


# ========== USANDO ENAHOPY ENAHO LOADER ==========
from enahopy.loader import ENAHODataDownloader
from enahopy.loader.io import ENAHOLocalReader


# ========== USANDO ENAHOPY's ENAHOModuleMerger ==========
from enahopy.merger import ENAHOModuleMerger
from enahopy.merger.config import ModuleMergeConfig, ModuleMergeLevel
import logging


# ========== USANDO ENAHOPY ENAHO NULL_ANALYSIS ==========
from enahopy.null_analysis import ENAHONullAnalyzer

import pandas as pd
import numpy as np
from datetime import datetime
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Optional


warnings.filterwarnings('ignore')

print(f"enahopy versión: {enahopy.__version__}")
print(f"Inicio del análisis: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

enahopy versión: 0.6.0
Inicio del análisis: 2025-10-29 19:30:19


In [15]:
# ========== CONFIGURACIÓN ENAHOModuleMerger ==========

# Configurar el merger para nivel individual (persona)
config = ModuleMergeConfig(merge_level=ModuleMergeLevel.PERSONA)
logger = logging.getLogger('enaho_merger')
merger = ENAHOModuleMerger(config, logger)


# Configurar el merger para nivel hogar
config_hogar = ModuleMergeConfig(merge_level=ModuleMergeLevel.HOGAR)
merger_hogar = ENAHOModuleMerger(config_hogar, logger)




In [17]:
def descargar_modulos_enaho(año: str = '2024') -> Dict:
    """
    Descarga múltiples módulos ENAHO usando enahopy.

    Args:
        año: Año de los datos ENAHO

4

        Dict con datasets por módulo
    """
    from enahopy.loader import ENAHODataDownloader

    print(f"\n{'=' * 70}")
    print(f"DESCARGANDO MÓDULOS ENAHO {año}".center(70))
    print(f"{'=' * 70}\n")

    modulos_interes = {
        "01": "Características de vivienda y hogar",
        "02": "Características de los Miembros del Hogar",
        "03": "Educación",
        "04": "Salud",
        "05": "Empleo e Ingresos",
        "34": "Sumarias (Variables Calculadas)"
    }

    downloader = ENAHODataDownloader(verbose=True)

    print("Módulos a descargar:")
    for code, desc in modulos_interes.items():
        print(f"  {code}: {desc}")

    data_multi = downloader.download(
        modules=list(modulos_interes.keys()),
        years=[año],
        output_dir=r"E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data",
        decompress=True,
        only_dta=True,
        load_dta=True,
        parallel=True,  # ¡Descarga paralela!
        max_workers=3,  # 3 módulos en paralelo
        verbose=True
    )
    print(f"\n✓ Descarga completada")
    return data_multi, modulos_interes


def extraer_datasets(data_multi: Dict, año: str = '2024') -> Dict:
    """
    Extrae datasets individuales del diccionario descargado.
    """
    print("Extrayendo datasets...")

    # Variables básicas para cada dataset
    vars_hogar = ['conglome', 'vivienda', 'hogar', 'p102', 'p103', 'p104', 'p105a', 'p110', 'p111a', 'p113a', 'p1121',
                  'p1144']
    vars_persona = ['conglome', 'vivienda', 'hogar', 'codperso', 'p203', 'p203b', 'p207', 'p208a', 'p209']
    vars_educa = ['conglome', 'vivienda', 'hogar', 'codperso', 'p301a', 'p301b', 'p301c', 'p307']
    vars_salud = ['conglome', 'vivienda', 'hogar', 'codperso', 'p401', 'p4191', 'p4192', 'p4193', 'p4194',
                  'p4195','p4196', 'p4197', 'p4198', 'p401h1', 'p401h2', 'p401h3', 'p401h4', 'p401h5', 'p401h6', 'p401f', 'p401g']
    vars_empleo = ['conglome', 'vivienda', 'hogar', 'codperso', 'ocu500', 'p507', 'i524e1', 'i524a1', 'p513t', 'p510a1',
                   'p512a', 'p558a5', 'i530a', 'p530a', 'i538e1', 'p538e1', 'i541a', 'p541a', 'p505r4', 'p506r4',
                   'p511a']
    vars_sumaria = ['conglome', 'vivienda', 'hogar', 'ubigeo', 'dominio', 'estrato', 'pobreza', 'inghog2d', 'gashog2d',
                    'factor07', 'mieperho', 'linea', 'linpe']

    # Cargar datasets
    data_caracteristica_hogar = data_multi[(año, '01')]['enaho01-' + año + '-100'][vars_hogar]
    data_caracteristica_persona = data_multi[(año, '02')]['enaho01-' + año + '-200'][vars_persona]
    data_educacion = data_multi[(año, '03')]['enaho01a-' + año + '-300'][vars_educa]
    data_salud = data_multi[(año, '04')]['enaho01a-' + año + '-400'][vars_salud]
    data_empleo = data_multi[(año, '05')]['enaho01a-' + año + '-500'][vars_empleo]
    data_sumaria = data_multi[(año, '34')]['sumaria-' + año][vars_sumaria]

    datasets = {
        'hogar': data_caracteristica_hogar,
        'persona': data_caracteristica_persona,
        'educacion': data_educacion,
        'salud': data_salud,
        'empleo': data_empleo,
        'sumaria': data_sumaria
    }

    # Mostrar resumen simple
    for nombre, df in datasets.items():
        print(f"  {nombre}: {df.shape[0]:,} filas × {df.shape[1]} columnas")

    return datasets


In [18]:

def fusionar_modulos_individuales(data_persona: pd.DataFrame,
                                  data_educacion: pd.DataFrame,
                                  data_salud: pd.DataFrame,
                                  data_empleo: pd.DataFrame) -> pd.DataFrame:
    """
    Fusiona todos los módulos individuales en un único DataFrame.
    Usa enahopy ENAHOModuleMerger para hacer merge correcto.
    """
    from enahopy.merger import ENAHOModuleMerger
    from enahopy.merger.config import ModuleMergeConfig, ModuleMergeLevel

    print(f"\n{'=' * 70}")
    print("PASO 1: FUSIÓN DE MÓDULOS INDIVIDUALES".center(70))
    print(f"{'=' * 70}\n")

    print("Fusionando datos individuales (persona + educación + salud + empleo)...\n")

    # Configurar merger para nivel PERSONA
    config = ModuleMergeConfig(merge_level=ModuleMergeLevel.PERSONA)
    logger_merger = logging.getLogger('enaho_merger')
    merger = ENAHOModuleMerger(config, logger_merger)

    modules_dict = {
        '02': data_persona,  # Características de miembros del hogar
        '04': data_salud,  # Salud
        '03': data_educacion,  # Educación
        '05': data_empleo  # Empleo
    }

    print("📦 Usando ENAHOModuleMerger de enahopy para fusionar módulos 02, 03, 04, 05...")
    merge_result = merger.merge_multiple_modules(
        modules_dict=modules_dict,
        base_module='02',  # Características como base
        merge_config=config
    )
    # Realizar merge usando enahopy
    data_individuos = merge_result.merged_df

    print(f"\n✓ Módulos individuales fusionados: {data_individuos.shape}")
    return data_individuos


In [19]:
def merge_con_hogar(data_caracteristica_hogar: pd.DataFrame,
                      data_sumaria: pd.DataFrame) -> pd.DataFrame:
    """
    Merge de datos agregados de nivel individual con sumaria (módulo 01).
    Usa enahopy para hacer merge correcto.
    """
    from enahopy.merger import ENAHOModuleMerger
    from enahopy.merger.config import ModuleMergeConfig, ModuleMergeLevel


    # Configurar merger para nivel HOGAR
    config_hogar = ModuleMergeConfig(merge_level=ModuleMergeLevel.HOGAR)
    logger_merger = logging.getLogger('enaho_merger')
    merger_hogar = ENAHOModuleMerger(config_hogar, logger_merger)



    print("\n📦 Usando ENAHOModuleMerger de enahopy para fusionar sumaria con caracteristicas del hogar...")
    merge_con_hogar = merger_hogar.merge_modules(
        left_df=data_sumaria,           # Sumaria como base (left)
        right_df=data_caracteristica_hogar,         # Datos agregados (right)
        left_module='34',           # Módulo sumaria
        right_module='01',    # Identificador para datos agregados
        merge_config=config_hogar
    )

    # Extraer el DataFrame resultante
    data_con_hogar = merge_con_hogar.merged_df

    print(f"\n✓ Módulos a nivel Hogares fusionados: {data_con_hogar.shape}")
    return data_con_hogar

In [20]:
# PASO 1: Descargar y extraer la información de INEI con ENAHOPY

data_multi, modulos = descargar_modulos_enaho('2024')
datasets = extraer_datasets(data_multi, '2024')

# PASO 2: Fusionar módulos individuales

data_individuos = fusionar_modulos_individuales(
    datasets['persona'],
    datasets['educacion'],
    datasets['salud'],
    datasets['empleo']
)

# PASO 3: Fusionar módulos de hogares 1 y sumaria

data_hogares = merge_con_hogar(
    datasets['hogar'],
    datasets['sumaria']
)


                    DESCARGANDO MÓDULOS ENAHO 2024                    

2025-10-29 19:30:37 | INFO     | enahopy | ENAHOPY logging initialized
2025-10-29 19:30:37 | DEBUG    | enahopy.loader.core.cache | No expired cache entries to clean
Módulos a descargar:
  01: Características de vivienda y hogar
  02: Características de los Miembros del Hogar
  03: Educación
  04: Salud
  05: Empleo e Ingresos
  34: Sumarias (Variables Calculadas)
2025-10-29 19:30:37 | INFO     | enaho_downloader | === Iniciando descarga ENAHO corte transversal ===
2025-10-29 19:30:37 | INFO     | enaho_downloader | Módulos: ['01', '02', '03', '04', '05', '34']
2025-10-29 19:30:37 | INFO     | enaho_downloader | Años: ['2024']
2025-10-29 19:30:37 | INFO     | enaho_downloader | Directorio: E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data
2025-10-29 19:30:37 | INFO     | enaho_downloader | Total de descargas programadas: 6
2025-10-29 19:30:37 | INFO     | enaho_downloader | Descarga paralela con 3 wo

Descargando modulo_01_2024.zip:  30%|██▉       | 2.49M/8.38M [00:01<00:03, 1.77MiB/s]
Descargando modulo_03_2024.zip:   0%|          | 0.00/11.9M [00:00<?, ?iB/s][A
Descargando modulo_01_2024.zip:  32%|███▏      | 2.69M/8.38M [00:01<00:03, 1.55MiB/s][A
Descargando modulo_01_2024.zip:  34%|███▍      | 2.86M/8.38M [00:01<00:03, 1.42MiB/s][A
Descargando modulo_01_2024.zip:  36%|███▋      | 3.05M/8.38M [00:02<00:03, 1.35MiB/s][A
Descargando modulo_01_2024.zip:  38%|███▊      | 3.19M/8.38M [00:02<00:04, 1.22MiB/s]A
Descargando modulo_01_2024.zip:  43%|████▎     | 3.59M/8.38M [00:02<00:04, 1.20MiB/s]A
Descargando modulo_01_2024.zip:  45%|████▌     | 3.80M/8.38M [00:02<00:03, 1.39MiB/s]A
Descargando modulo_01_2024.zip:  47%|████▋     | 3.96M/8.38M [00:02<00:03, 1.42MiB/s]A
Descargando modulo_01_2024.zip:  50%|████▉     | 4.18M/8.38M [00:02<00:02, 1.61MiB/s]A
Descargando modulo_01_2024.zip:  53%|█████▎    | 4.41M/8.38M [00:02<00:02, 1.74MiB/s][A
Descargando modulo_01_2024.zip:  58%|█████▊   

2025-10-29 19:31:05 | INFO     | enaho_downloader | Descarga completada: modulo_01_2024.zip (8.0 MB)




Descargando modulo_02_2024.zip:  21%|██        | 1.22M/5.79M [00:00<00:04, 1.12MiB/s][A[A
Descargando modulo_03_2024.zip:  40%|███▉      | 4.70M/11.9M [00:04<00:05, 1.25MiB/s][A

2025-10-29 19:31:05 | INFO     | enaho_downloader | Extraídos 1 archivos en: E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data\modulo_01_2024
2025-10-29 19:31:05 | INFO     | enaho_downloader | Archivo ZIP eliminado: modulo_01_2024.zip




Descargando modulo_02_2024.zip:  23%|██▎       | 1.34M/5.79M [00:01<00:03, 1.14MiB/s][A[A
Descargando modulo_03_2024.zip:  41%|████      | 4.87M/11.9M [00:04<00:05, 1.32MiB/s][A

Descargando modulo_02_2024.zip:  26%|██▌       | 1.49M/5.79M [00:01<00:03, 1.20MiB/s][A[A
Descargando modulo_03_2024.zip:  42%|████▏     | 5.01M/11.9M [00:04<00:05, 1.30MiB/s][A

Descargando modulo_02_2024.zip:  29%|██▊       | 1.65M/5.79M [00:01<00:03, 1.30MiB/s][A[A
Descargando modulo_03_2024.zip:  43%|████▎     | 5.16M/11.9M [00:04<00:05, 1.33MiB/s][A

Descargando modulo_02_2024.zip:  31%|███       | 1.79M/5.79M [00:03<00:24, 166kiB/s] [A[A
Descargando modulo_03_2024.zip:  45%|████▍     | 5.30M/11.9M [00:07<00:38, 169kiB/s] [A

Descargando modulo_02_2024.zip:  35%|███▍      | 2.02M/5.79M [00:04<00:14, 260kiB/s][A[A
Descargando modulo_03_2024.zip:  46%|████▋     | 5.52M/11.9M [00:07<00:24, 263kiB/s][A

Descargando modulo_02_2024.zip:  37%|███▋      | 2.14M/5.79M [00:04<00:13, 271kiB/s][A[A

2025-10-29 19:31:12 | INFO     | enaho_downloader | Archivo cargado: enaho01-2024-100.dta (44731 filas)
2025-10-29 19:31:12 | INFO     | enaho_downloader | Descargando módulo 04 año 2024




Descargando modulo_02_2024.zip:  87%|████████▋ | 5.06M/5.79M [00:08<00:00, 1.39MiB/s][A[A

Descargando modulo_02_2024.zip:  91%|█████████ | 5.24M/5.79M [00:08<00:00, 1.25MiB/s][A[A

Descargando modulo_02_2024.zip:  95%|█████████▍| 5.49M/5.79M [00:08<00:00, 1.52MiB/s][A[A

Descargando modulo_02_2024.zip: 100%|██████████| 5.79M/5.79M [00:08<00:00, 683kiB/s] [A[A


2025-10-29 19:31:13 | INFO     | enaho_downloader | Descarga completada: modulo_02_2024.zip (5.5 MB)
2025-10-29 19:31:13 | INFO     | enaho_downloader | Extraídos 3 archivos en: E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data\modulo_02_2024
2025-10-29 19:31:13 | INFO     | enaho_downloader | Archivo ZIP eliminado: modulo_02_2024.zip


Descargando modulo_04_2024.zip:   0%|          | 0.00/14.2M [00:00<?, ?iB/s]
Descargando modulo_04_2024.zip:   2%|▏         | 213k/14.2M [00:00<00:18, 774kiB/s] ][A
Descargando modulo_04_2024.zip:   2%|▏         | 295k/14.2M [00:00<00:19, 709kiB/s]s][A
Descargando modulo_04_2024.zip:   3%|▎         | 426k/14.2M [00:00<00:31, 434kiB/s]s][A
Descargando modulo_04_2024.zip:   4%|▎         | 524k/14.2M [00:01<00:25, 536kiB/s]s][A
Descargando modulo_03_2024.zip:  58%|█████▊    | 6.95M/11.9M [00:14<00:47, 105kiB/s] [A
Descargando modulo_04_2024.zip:   4%|▍         | 590k/14.2M [00:01<00:31, 430kiB/s]][A

2025-10-29 19:31:16 | INFO     | enaho_downloader | Archivo cargado: enaho01-2024-200.dta (117721 filas)
2025-10-29 19:31:16 | INFO     | enaho_downloader | Archivo cargado: enaho_tabla_ciuo_88.dta (5195 filas)
2025-10-29 19:31:16 | INFO     | enaho_downloader | Archivo cargado: enaho_tabla_cno_2015.dta (473 filas)


Descargando modulo_04_2024.zip:   6%|▌         | 819k/14.2M [00:01<00:16, 786kiB/s]


2025-10-29 19:31:16 | INFO     | enaho_downloader | Descargando módulo 05 año 2024


Descargando modulo_03_2024.zip:  60%|██████    | 7.14M/11.9M [00:14<00:18, 254kiB/s][A
Descargando modulo_04_2024.zip:   7%|▋         | 967k/14.2M [00:01<00:23, 556kiB/s]][A
Descargando modulo_04_2024.zip:   8%|▊         | 1.16M/14.2M [00:01<00:16, 778kiB/s][A
Descargando modulo_04_2024.zip:  10%|▉         | 1.36M/14.2M [00:01<00:13, 980kiB/s][A
Descargando modulo_03_2024.zip:  64%|██████▍   | 7.62M/11.9M [00:15<00:06, 621kiB/s][A

Descargando modulo_04_2024.zip:  11%|█         | 1.54M/14.2M [00:02<00:11, 1.12MiB/s]
Descargando modulo_03_2024.zip:  65%|██████▌   | 7.77M/11.9M [00:15<00:05, 747kiB/s][A

Descargando modulo_04_2024.zip:  12%|█▏        | 1.69M/14.2M [00:02<00:12, 1.03MiB/s][A[A
Descargando modulo_03_2024.zip:  66%|██████▋   | 7.87M/11.9M [00:15<00:05, 789kiB/s][A

Descargando modulo_05_2024.zip:   1%|▏         | 229k/16.7M [00:00<00:16, 985kiB/s] [A[A
Descargando modulo_04_2024.zip:  13%|█▎        | 1.82M/14.2M [00:02<00:12, 976kiB/s] [A

Descargando modulo_05_20

2025-10-29 19:31:20 | INFO     | enaho_downloader | Descarga completada: modulo_03_2024.zip (11.3 MB)


Descargando modulo_04_2024.zip:  44%|████▍     | 6.24M/14.2M [00:05<00:05, 1.44MiB/s]

Descargando modulo_04_2024.zip:  45%|████▌     | 6.39M/14.2M [00:05<00:05, 1.40MiB/s][A[A

Descargando modulo_04_2024.zip:  46%|████▋     | 6.57M/14.2M [00:06<00:05, 1.36MiB/s][A[A

Descargando modulo_05_2024.zip:  31%|███▏      | 5.23M/16.7M [00:04<00:07, 1.47MiB/s][A[A

2025-10-29 19:31:20 | INFO     | enaho_downloader | Extraídos 1 archivos en: E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data\modulo_03_2024
2025-10-29 19:31:20 | INFO     | enaho_downloader | Archivo ZIP eliminado: modulo_03_2024.zip


Descargando modulo_04_2024.zip:  48%|████▊     | 6.77M/14.2M [00:06<00:04, 1.52MiB/s]

Descargando modulo_04_2024.zip:  49%|████▉     | 6.96M/14.2M [00:06<00:04, 1.61MiB/s][A[A

Descargando modulo_04_2024.zip:  51%|█████     | 7.15M/14.2M [00:06<00:04, 1.68MiB/s][A[A

Descargando modulo_04_2024.zip:  52%|█████▏    | 7.36M/14.2M [00:06<00:04, 1.68MiB/s][A[A

Descargando modulo_05_2024.zip:  36%|███▌      | 5.96M/16.7M [00:04<00:07, 1.46MiB/s][A[A

Descargando modulo_04_2024.zip:  53%|█████▎    | 7.53M/14.2M [00:06<00:05, 1.21MiB/s][A[A

Descargando modulo_05_2024.zip:  38%|███▊      | 6.28M/16.7M [00:04<00:07, 1.36MiB/s][A[A

Descargando modulo_05_2024.zip:  39%|███▊      | 6.47M/16.7M [00:04<00:06, 1.50MiB/s][A[A

Descargando modulo_04_2024.zip:  54%|█████▍    | 7.68M/14.2M [00:13<01:10, 92.4kiB/s][A[A

Descargando modulo_04_2024.zip:  56%|█████▋    | 7.99M/14.2M [00:14<00:53, 115kiB/s] [A[A

Descargando modulo_04_2024.zip:  57%|█████▋    | 8.13M/14.2M [00:15<00:40, 1

2025-10-29 19:31:39 | INFO     | enaho_downloader | Archivo cargado: enaho01a-2024-300.dta (106619 filas)
2025-10-29 19:31:39 | INFO     | enaho_downloader | Descargando módulo 34 año 2024




Descargando modulo_04_2024.zip:  92%|█████████▏| 13.0M/14.2M [00:25<00:02, 488kiB/s][A[A

Descargando modulo_04_2024.zip:  93%|█████████▎| 13.2M/14.2M [00:25<00:01, 601kiB/s][A[A

Descargando modulo_04_2024.zip:  94%|█████████▎| 13.3M/14.2M [00:25<00:01, 647kiB/s][A[A

Descargando modulo_04_2024.zip:  95%|█████████▍| 13.4M/14.2M [00:25<00:00, 767kiB/s][A[A

Descargando modulo_05_2024.zip:  74%|███████▍  | 12.3M/16.7M [00:23<00:04, 1.05MiB/s][A[A

Descargando modulo_04_2024.zip:  95%|█████████▌| 13.5M/14.2M [00:25<00:00, 801kiB/s]][A[A

Descargando modulo_04_2024.zip:  96%|█████████▌| 13.6M/14.2M [00:26<00:00, 648kiB/s] [A[A
Descargando modulo_34_2024.zip:   0%|          | 0.00/15.9M [00:00<?, ?iB/s][A

Descargando modulo_04_2024.zip:  97%|█████████▋| 13.7M/14.2M [00:26<00:00, 697kiB/s]][A[A

Descargando modulo_05_2024.zip:  77%|███████▋  | 12.9M/16.7M [00:24<00:03, 1.15MiB/s][A[A
Descargando modulo_04_2024.zip:  98%|█████████▊| 13.8M/14.2M [00:26<00:00, 574kiB/s][

2025-10-29 19:31:42 | INFO     | enaho_downloader | Descarga completada: modulo_04_2024.zip (13.5 MB)



Descargando modulo_34_2024.zip:   2%|▏         | 393k/15.9M [00:01<00:35, 442kiB/s][A

Descargando modulo_05_2024.zip:  87%|████████▋ | 14.5M/16.7M [00:25<00:01, 1.72MiB/s][A[A
Descargando modulo_34_2024.zip:   3%|▎         | 524k/15.9M [00:01<00:24, 624kiB/s][A

Descargando modulo_05_2024.zip:  88%|████████▊ | 14.7M/16.7M [00:25<00:01, 1.82MiB/s][A[A

Descargando modulo_05_2024.zip:  89%|████████▉ | 14.9M/16.7M [00:25<00:01, 1.55MiB/s][A[A

Descargando modulo_05_2024.zip:  90%|█████████ | 15.1M/16.7M [00:25<00:01, 1.46MiB/s][A[A

Descargando modulo_05_2024.zip:  91%|█████████▏| 15.3M/16.7M [00:26<00:00, 1.59MiB/s][A[A
Descargando modulo_34_2024.zip:   4%|▍         | 606k/15.9M [00:01<00:44, 343kiB/s][A

Descargando modulo_05_2024.zip:  93%|█████████▎| 15.5M/16.7M [00:26<00:00, 1.67MiB/s][A[A

Descargando modulo_05_2024.zip:  94%|█████████▍| 15.7M/16.7M [00:26<00:00, 1.74MiB/s][A[A
Descargando modulo_34_2024.zip:   5%|▍         | 770k/15.9M [00:02<00:28, 524kiB/s][A

2025-10-29 19:31:43 | INFO     | enaho_downloader | Extraídos 1 archivos en: E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data\modulo_04_2024
2025-10-29 19:31:43 | INFO     | enaho_downloader | Archivo ZIP eliminado: modulo_04_2024.zip




Descargando modulo_05_2024.zip:  95%|█████████▍| 15.9M/16.7M [00:26<00:00, 1.77MiB/s][A[A
Descargando modulo_34_2024.zip:   6%|▌         | 934k/15.9M [00:02<00:21, 699kiB/s][A
Descargando modulo_34_2024.zip:   7%|▋         | 1.05M/15.9M [00:02<00:19, 755kiB/s][A

Descargando modulo_05_2024.zip:  96%|█████████▌| 16.1M/16.7M [00:26<00:00, 1.52MiB/s][A[A
Descargando modulo_34_2024.zip:   8%|▊         | 1.23M/15.9M [00:02<00:15, 969kiB/s][A

Descargando modulo_05_2024.zip:  97%|█████████▋| 16.2M/16.7M [00:26<00:00, 1.43MiB/s][A[A
Descargando modulo_34_2024.zip:   8%|▊         | 1.35M/15.9M [00:02<00:14, 999kiB/s][A

Descargando modulo_05_2024.zip:  98%|█████████▊| 16.4M/16.7M [00:26<00:00, 1.44MiB/s][A[A
Descargando modulo_34_2024.zip:   9%|▉         | 1.50M/15.9M [00:02<00:13, 1.11MiB/s][A

Descargando modulo_05_2024.zip:  99%|█████████▉| 16.5M/16.7M [00:26<00:00, 1.40MiB/s][A[A
Descargando modulo_34_2024.zip:  10%|█         | 1.63M/15.9M [00:02<00:12, 1.15MiB/s][A

Desc

2025-10-29 19:32:00 | INFO     | enaho_downloader | Descarga completada: modulo_05_2024.zip (15.9 MB)


Descargando modulo_34_2024.zip:  33%|███▎      | 5.21M/15.9M [00:19<00:34, 313kiB/s][A
Descargando modulo_34_2024.zip:  33%|███▎      | 5.25M/15.9M [00:19<00:32, 326kiB/s][A
Descargando modulo_34_2024.zip:  33%|███▎      | 5.29M/15.9M [00:20<00:36, 289kiB/s][A
Descargando modulo_34_2024.zip:  33%|███▎      | 5.33M/15.9M [00:20<00:36, 290kiB/s][A
Descargando modulo_34_2024.zip:  34%|███▍      | 5.44M/15.9M [00:20<00:25, 412kiB/s][A
Descargando modulo_34_2024.zip:  34%|███▍      | 5.49M/15.9M [00:20<00:26, 395kiB/s][A
Descargando modulo_34_2024.zip:  35%|███▍      | 5.54M/15.9M [00:20<00:27, 378kiB/s][A
Descargando modulo_34_2024.zip:  35%|███▌      | 5.59M/15.9M [00:20<00:25, 399kiB/s][A
Descargando modulo_34_2024.zip:  35%|███▌      | 5.64M/15.9M [00:21<00:28, 360kiB/s][A
Descargando modulo_34_2024.zip:  36%|███▌      | 5.69M/15.9M [00:21<00:26, 387kiB/s][A
Descargando modulo_34_2024.zip:  36%|███▌      | 5.73M/15.9M [00:21<00:26, 383kiB/s][A
Descargando modulo_34_2024.zip: 

2025-10-29 19:32:13 | INFO     | enaho_downloader | Extraídos 5 archivos en: E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data\modulo_05_2024



Descargando modulo_34_2024.zip:  61%|██████    | 9.72M/15.9M [00:32<00:12, 503kiB/s]

2025-10-29 19:32:13 | INFO     | enaho_downloader | Archivo ZIP eliminado: modulo_05_2024.zip


[A
Descargando modulo_34_2024.zip:  61%|██████▏   | 9.77M/15.9M [00:32<00:12, 487kiB/s][A
Descargando modulo_34_2024.zip:  62%|██████▏   | 9.83M/15.9M [00:32<00:13, 438kiB/s][A
Descargando modulo_34_2024.zip:  62%|██████▏   | 9.88M/15.9M [00:32<00:13, 445kiB/s][A
Descargando modulo_34_2024.zip:  62%|██████▏   | 9.96M/15.9M [00:32<00:11, 528kiB/s][A
Descargando modulo_34_2024.zip:  63%|██████▎   | 10.1M/15.9M [00:32<00:08, 667kiB/s][A
Descargando modulo_34_2024.zip:  64%|██████▎   | 10.2M/15.9M [00:32<00:08, 665kiB/s][A
Descargando modulo_34_2024.zip:  64%|██████▍   | 10.2M/15.9M [00:33<00:08, 648kiB/s][A
Descargando modulo_34_2024.zip:  65%|██████▍   | 10.3M/15.9M [00:33<00:08, 677kiB/s][A
Descargando modulo_34_2024.zip:  65%|██████▌   | 10.4M/15.9M [00:33<00:07, 756kiB/s][A
Descargando modulo_34_2024.zip:  66%|██████▌   | 10.5M/15.9M [00:33<00:07, 751kiB/s][A
Descargando modulo_34_2024.zip:  66%|██████▋   | 10.6M/15.9M [00:33<00:07, 696kiB/s][A
Descargando modulo_34_2024.z

2025-10-29 19:32:46 | INFO     | enaho_downloader | Descarga completada: modulo_34_2024.zip (15.2 MB)
2025-10-29 19:32:51 | INFO     | enaho_downloader | Extraídos 2 archivos en: E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data\modulo_34_2024
2025-10-29 19:32:51 | INFO     | enaho_downloader | Archivo ZIP eliminado: modulo_34_2024.zip
2025-10-29 19:33:08 | INFO     | enaho_downloader | Archivo cargado: sumaria-2024-12g.dta (33691 filas)
2025-10-29 19:33:11 | INFO     | enaho_downloader | Archivo cargado: enaho01a-2024-400.dta (110451 filas)
2025-10-29 19:33:17 | INFO     | enaho_downloader | Archivo cargado: sumaria-2024.dta (33691 filas)
2025-10-29 19:33:22 | INFO     | enaho_downloader | Archivo cargado: enaho01a-2024-500.dta (85992 filas)
2025-10-29 19:33:22 | INFO     | enaho_downloader | Archivo cargado: enaho_tabla_ciiu_rev3.dta (296 filas)
2025-10-29 19:33:22 | INFO     | enaho_downloader | Archivo cargado: enaho_tabla_ciiu_rev4.dta (419 filas)
2025-10-29 19:33:22

In [21]:
from analisis_pob_mon_lab import pipeline_completo, analisis_descriptivo_ponderado


df_final = pipeline_completo(
      df_individuos=data_individuos,
      df_hogares_enaho=data_hogares,
      ruta_ciiu=r'E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data\modulo_05_2024\enaho_tabla_ciiu_rev4.dta',
      ruta_cno=r'E:\papx\end_to_end_ml\nb_pr\enahopy\examples\investigacion\data\modulo_05_2024\enaho_tabla_cno_2015.dta'
  )
#
# resultados = analisis_descriptivo_ponderado(
#     df=df_final,
#     var_pobreza='es_pobre_monetario',
#     peso='factor07'
# )


INICIANDO PIPELINE COMPLETO DE PROCESAMIENTO

→ Cargando tablas CIIU y CNO...
  ✓ CIIU: 52 códigos cargados
  ✓ CNO: 46 códigos cargados

→ Limpiando variables individuales...
  ✓ p208a: 113755 valores válidos
  ✓ p513t: 62874 valores válidos
  ✓ i524e1: 25327 valores válidos
  ✓ p301b: 100072 valores válidos
  ✓ p301c: 23740 valores válidos

→ Creando características individuales...
  • Calculando informalidad laboral...
    ✓ 43078/61878 informales (69.6%)
  • Identificando personas con discapacidad...
    ✓ 6,052 personas con discapacidad (5.1%)
  • Identificando personas con seguro de salud...
    ✓ Encontradas 8 columnas de seguro: p4191, p4192, p4193, p4194, p4195, p4196, p4197, p4198
    DEBUG - Valores únicos en p4191:
                 'no' :   85,929 ( 73.0%)
            'essalud' :   24,422 ( 20.7%)
                  nan :    7,370 (  6.3%)
      • essalud: 24,422 personas (20.7%)
      • seguro_privado: 1,070 personas (0.9%)
      • entidad_prestadora: 753 personas (0.6%)
 

In [23]:
df_final.sample(15)

Unnamed: 0,conglome,vivienda,hogar,n_personas,n_ninos,n_adultos_mayores,n_edad_trabajar,n_ocupados,n_informales,n_discapacitados,...,tasa_informalidad_hogar,prop_asegurados,prop_discapacitados,ingreso_laboral_percapita,ingreso_por_hora,carga_ninos,carga_adultos_mayores,ocupados_per_capita,es_pobre_monetario,pobreza_laboral
14368,17278,388,11,4,0,0,2,2,2.0,0,...,1.0,0.25,0.0,0.0,0.0,0.0,0.0,1.0,0,1
30826,20126,62,22,3,1,0,2,1,1.0,0,...,1.0,1.0,0.0,5341.666504,95.386905,0.333333,0.0,0.333333,1,0
11788,16849,28,11,3,1,0,2,1,0.0,0,...,0.0,0.666667,0.0,3965.0,61.953125,0.333333,0.0,0.333333,0,0
14783,17335,111,11,4,0,1,3,2,0.0,1,...,0.0,1.0,0.25,11943.75,140.514706,0.0,0.25,0.5,0,0
8518,16358,74,11,4,0,0,4,3,2.0,0,...,0.666667,1.0,0.0,16994.5,124.959559,0.0,0.0,0.75,0,0
20316,18324,44,11,4,1,0,3,1,0.0,0,...,0.0,1.0,0.0,3911.75,81.494792,0.25,0.0,0.25,0,0
438,15078,47,11,7,3,0,4,4,4.0,0,...,1.0,1.0,0.0,5753.428711,71.407801,0.428571,0.0,0.571429,0,0
19517,18152,131,11,2,0,0,2,2,1.0,0,...,0.5,0.5,0.0,11306.5,47.110417,0.0,0.0,1.0,0,0
21133,18477,20,11,8,0,0,6,3,1.0,0,...,0.333333,0.5,0.0,5657.5,72.532051,0.0,0.0,0.5,0,0
238,15042,140,11,5,2,0,3,1,1.0,0,...,1.0,0.8,0.0,7467.200195,233.35,0.4,0.0,0.2,0,0


In [24]:
df_final.to_csv('dataframe_final_2024.csv', index=False)