In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')


In [2]:
# === CARGA DE DATOS ===
def load_education_data():
    """Carga solo los datasets educativos necesarios"""
    data = {}

    files = {
        'education': '../downloads/normalizacion/education.csv',
        'admissions': '../downloads/normalizacion/education_admition.csv',
        'enrollment': '../downloads/normalizacion/education_enrollment.csv',
        'edu_municipality': '../downloads/normalizacion/education_municipality.csv',
        'municipality': '../downloads/normalizacion/municipality.csv'
    }

    for name, path in files.items():
        delimiter = ';' if 'education' in path and 'municipality' not in path else ','
        data[name] = pd.read_csv(path, delimiter=delimiter)
        if 'id' in data[name].columns:
            data[name]['id'] = data[name]['id'].astype(str)
        if 'id_education' in data[name].columns:
            data[name]['id_education'] = data[name]['id_education'].astype(str)
        if 'id_municipality' in data[name].columns:
            data[name]['id_municipality'] = data[name]['id_municipality'].astype(str)

    return data

data = load_education_data()


In [3]:
# === CONFIGURACI√ìN DE CICLOS EDUCATIVOS ===
def get_cycle_config():
    """Configuraci√≥n de ciclos con a√±os de permanencia"""
    return {
        'infantil_i_ciclo': {
            'name': 'Infantil I',
            'a√±os_permanencia': 3
        },
        'infantil_ii_ciclo': {
            'name': 'Infantil II',
            'a√±os_permanencia': 3
        },
        'primaria': {
            'name': 'Primaria',
            'a√±os_permanencia': 6
        },
        'eso': {
            'name': 'ESO',
            'a√±os_permanencia': 4
        }
    }

cycle_config = get_cycle_config()


In [4]:
# === AN√ÅLISIS DE OCUPACI√ìN POR CENTRO EDUCATIVO ===
def analyze_center_occupancy(data, cycle_config):
    """Analiza ocupaci√≥n de centros: solicitudes vs capacidad estimada"""

    target_year = "2022-2023"
    valid_cycles = ['infantil_i_ciclo', 'infantil_ii_ciclo', 'primaria', 'eso']

    admissions = data['admissions'].copy()
    admissions = admissions[admissions['cycle'].isin(valid_cycles)]

    enrollment = data['enrollment'].copy()
    enrollment = enrollment[enrollment['cycle'].isin(valid_cycles)]

    centros_con_ciclos_validos = set()
    centros_enrollment = enrollment['id_education'].unique()
    centros_con_ciclos_validos.update(centros_enrollment)
    centros_admissions = admissions['id_education'].unique()
    centros_con_ciclos_validos.update(centros_admissions)

    center_analysis = []

    for center_id in centros_con_ciclos_validos:
        center_info = data['education'][data['education']['id'] == center_id]
        if center_info.empty:
            continue

        center_info = center_info.iloc[0]
        center_data = {
            'id_education': center_id,
            'center_name': center_info['name_short'],
            'center_type': center_info.get('description_short', 'No especificado')
        }

        total_plazas_estimadas = 0
        total_max_matriculados = 0
        total_matriculados_a√±o_objetivo = 0
        total_solicitudes_presentadas = 0
        total_solicitudes_admitidas = 0
        ciclos_activos = 0

        for cycle in valid_cycles:
            config = cycle_config[cycle]

            cycle_enrollment_all_years = enrollment[
                (enrollment['id_education'] == center_id) &
                (enrollment['cycle'] == cycle)
            ]

            cycle_admissions_all_years = admissions[
                (admissions['id_education'] == center_id) &
                (admissions['cycle'] == cycle) &
                (admissions['type_solicitude'] == 'Admitidas')
            ]

            plazas_por_a√±o = {}
            a√±os_matricula = set(cycle_enrollment_all_years['year'].unique())
            a√±os_admisiones = set(cycle_admissions_all_years['year'].unique())
            todos_los_a√±os = a√±os_matricula | a√±os_admisiones

            for a√±o in todos_los_a√±os:
                matriculados_a√±o = cycle_enrollment_all_years[
                    cycle_enrollment_all_years['year'] == a√±o
                ]['total'].sum()

                admitidos_a√±o = cycle_admissions_all_years[
                    cycle_admissions_all_years['year'] == a√±o
                ]['total'].sum()

                if matriculados_a√±o > 0 or admitidos_a√±o > 0:
                    plazas_a√±o = matriculados_a√±o / config['a√±os_permanencia']
                    plazas_por_a√±o[a√±o] = {
                        'matriculados': matriculados_a√±o,
                        'admitidos': admitidos_a√±o,
                        'total_estudiantes': matriculados_a√±o,
                        'plazas_estimadas': plazas_a√±o
                    }

            if plazas_por_a√±o:
                a√±o_max_plazas = max(plazas_por_a√±o.keys(), key=lambda x: plazas_por_a√±o[x]['plazas_estimadas'])
                max_plazas_data = plazas_por_a√±o[a√±o_max_plazas]

                plazas_estimadas = max_plazas_data['plazas_estimadas']
                max_matriculados = max_plazas_data['matriculados']
                max_admitidos = max_plazas_data['admitidos']
                max_total_estudiantes = max_plazas_data['total_estudiantes']
                a√±o_max = a√±o_max_plazas
            else:
                plazas_estimadas = 0
                max_matriculados = 0
                max_admitidos = 0
                max_total_estudiantes = 0
                a√±o_max = 'N/A'

            cycle_enrollment_target = enrollment[
                (enrollment['id_education'] == center_id) &
                (enrollment['cycle'] == cycle) &
                (enrollment['year'] == target_year)
            ]
            matriculados_a√±o_objetivo = cycle_enrollment_target['total'].sum()

            cycle_admissions_target = admissions[
                (admissions['id_education'] == center_id) &
                (admissions['cycle'] == cycle) &
                (admissions['year'] == target_year)
            ]

            solicitudes_presentadas = cycle_admissions_target[
                cycle_admissions_target['type_solicitude'] == 'Presentadas'
            ]['total'].sum()
            solicitudes_admitidas = cycle_admissions_target[
                cycle_admissions_target['type_solicitude'] == 'Admitidas'
            ]['total'].sum()

            if cycle == 'primaria':
                infantil_ii_enrollment_all = enrollment[
                    (enrollment['id_education'] == center_id) &
                    (enrollment['cycle'] == 'infantil_ii_ciclo')
                ]

                infantil_ii_admissions_all = admissions[
                    (admissions['id_education'] == center_id) &
                    (admissions['cycle'] == 'infantil_ii_ciclo') &
                    (admissions['type_solicitude'] == 'Admitidas')
                ]

                if not infantil_ii_enrollment_all.empty or not infantil_ii_admissions_all.empty:
                    plazas_infantil_por_a√±o = {}
                    a√±os_inf_matricula = set(infantil_ii_enrollment_all['year'].unique())
                    a√±os_inf_admisiones = set(infantil_ii_admissions_all['year'].unique())
                    todos_a√±os_inf = a√±os_inf_matricula | a√±os_inf_admisiones

                    for a√±o in todos_a√±os_inf:
                        mat_inf = infantil_ii_enrollment_all[
                            infantil_ii_enrollment_all['year'] == a√±o
                        ]['total'].sum()

                        if mat_inf > 0:
                            plazas_inf_a√±o = (mat_inf) / cycle_config['infantil_ii_ciclo']['a√±os_permanencia']
                            plazas_infantil_por_a√±o[a√±o] = plazas_inf_a√±o

                    if plazas_infantil_por_a√±o:
                        if target_year in plazas_infantil_por_a√±o:
                            transiciones_automaticas = plazas_infantil_por_a√±o[target_year]
                        else:
                            transiciones_automaticas = 0

                        max_plazas_infantil = max(plazas_infantil_por_a√±o.values())
                    else:
                        transiciones_automaticas = 0

                    solicitudes_admitidas_corregidas = solicitudes_admitidas + transiciones_automaticas
                    center_data[f'{cycle}_transiciones_infantil'] = transiciones_automaticas
                    center_data[f'{cycle}_max_plazas_infantil_ii'] = max_plazas_infantil if plazas_infantil_por_a√±o else 0
                else:
                    solicitudes_admitidas_corregidas = solicitudes_admitidas
                    center_data[f'{cycle}_transiciones_infantil'] = 0
                    center_data[f'{cycle}_max_plazas_infantil_ii'] = 0
            else:
                solicitudes_admitidas_corregidas = solicitudes_admitidas
                center_data[f'{cycle}_transiciones_infantil'] = 0

            tasa_ocupacion_ciclo = (matriculados_a√±o_objetivo / max_matriculados) if max_matriculados > 0 else 0
            ratio_demanda_ciclo = (solicitudes_presentadas / plazas_estimadas) if plazas_estimadas > 0 else 0
            ratio_admision_vs_capacidad = (solicitudes_admitidas_corregidas / plazas_estimadas) if plazas_estimadas > 0 else 0
            eficiencia_admision = (solicitudes_admitidas / solicitudes_presentadas) if solicitudes_presentadas > 0 else 0

            center_data.update({
                f'{cycle}_max_matriculados': max_matriculados,
                f'{cycle}_max_admitidos': max_admitidos,
                f'{cycle}_max_total_estudiantes': max_total_estudiantes,
                f'{cycle}_a√±o_max': a√±o_max,
                f'{cycle}_matriculados_objetivo': matriculados_a√±o_objetivo,
                f'{cycle}_plazas_estimadas': plazas_estimadas,
                f'{cycle}_solicitudes_presentadas': solicitudes_presentadas,
                f'{cycle}_solicitudes_admitidas': solicitudes_admitidas,
                f'{cycle}_solicitudes_admitidas_corregidas': solicitudes_admitidas_corregidas,
                f'{cycle}_tasa_ocupacion': tasa_ocupacion_ciclo,
                f'{cycle}_ratio_demanda': ratio_demanda_ciclo,
                f'{cycle}_ratio_admision_capacidad': ratio_admision_vs_capacidad,
                f'{cycle}_eficiencia_admision': eficiencia_admision,
                f'{cycle}_activo': 1 if plazas_estimadas > 0 or solicitudes_presentadas > 0 else 0
            })

            if plazas_estimadas > 0 or solicitudes_presentadas > 0:
                ciclos_activos += 1
                total_plazas_estimadas += plazas_estimadas
                total_max_matriculados += max_matriculados
                total_matriculados_a√±o_objetivo += matriculados_a√±o_objetivo
                total_solicitudes_presentadas += solicitudes_presentadas
                total_solicitudes_admitidas += solicitudes_admitidas_corregidas

        if ciclos_activos > 0:
            total_solicitudes_admitidas_reales = 0
            for cycle in valid_cycles:
                if center_data.get(f'{cycle}_activo', 0) == 1:
                    total_solicitudes_admitidas_reales += center_data[f'{cycle}_solicitudes_admitidas']

            center_data.update({
                'ciclos_activos': ciclos_activos,
                'total_plazas_estimadas': total_plazas_estimadas,
                'total_matriculados': total_matriculados_a√±o_objetivo,
                'total_max_matriculados': total_max_matriculados,
                'total_solicitudes_presentadas': total_solicitudes_presentadas,
                'total_solicitudes_admitidas': total_solicitudes_admitidas,
                'total_solicitudes_admitidas_reales': total_solicitudes_admitidas_reales,
                'tasa_ocupacion_centro': (total_matriculados_a√±o_objetivo / total_max_matriculados) if total_max_matriculados > 0 else 0,
                'ratio_demanda_centro': (total_solicitudes_presentadas / total_plazas_estimadas) if total_plazas_estimadas > 0 else 0,
                'ratio_admision_capacidad_centro': (total_solicitudes_admitidas / total_plazas_estimadas) if total_plazas_estimadas > 0 else 0,
                'eficiencia_admision_centro': (total_solicitudes_admitidas_reales / total_solicitudes_presentadas) if total_solicitudes_presentadas > 0 else 0
            })

            center_analysis.append(center_data)

    return pd.DataFrame(center_analysis)

centers_df = analyze_center_occupancy(data, cycle_config)
centers_activos = centers_df[centers_df['ciclos_activos'] > 0]


In [5]:
# === AN√ÅLISIS DE COBERTURA EDUCATIVA POR MUNICIPIO ===
def load_demographic_data():
    """Carga datos demogr√°ficos para el an√°lisis de cobertura"""
    demographics = pd.read_csv('../downloads/normalizacion/municipality_demographics.csv')

    age_to_cycle_mapping = {
        '0-4': ['infantil_i_ciclo', 'infantil_ii_ciclo'],
        '5-9': ['infantil_ii_ciclo', 'primaria'],
        '10-14': ['primaria', 'eso'],
        '15-19': ['eso']
    }

    return demographics, age_to_cycle_mapping

def analyze_municipal_coverage(data, centers_df, cycle_config):
    """Analiza la cobertura educativa por municipio"""

    demographics, age_mapping = load_demographic_data()
    edu_municipality = data['edu_municipality']
    municipalities = data['municipality']

    latest_year = demographics['year'].max()
    demographics_latest = demographics[demographics['year'] == latest_year].copy()

    municipal_analysis = []
    municipios_con_centros = edu_municipality['id_municipality'].unique()

    for municipality_id in municipios_con_centros:
        muni_info = municipalities[municipalities['id'] == municipality_id]
        if muni_info.empty:
            continue

        muni_info = muni_info.iloc[0]
        muni_secondary_id = muni_info['id_secondary']

        muni_demographics = demographics_latest[
            demographics_latest['id_secondary_municipality'] == muni_secondary_id
        ].copy()

        if muni_demographics.empty:
            continue

        centros_municipio = edu_municipality[
            edu_municipality['id_municipality'] == municipality_id
        ]['id_education'].unique()

        centros_activos_municipio = centers_df[
            centers_df['id_education'].isin(centros_municipio)
        ].copy()

        if centros_activos_municipio.empty:
            continue

        municipal_data = {
            'id_municipality': municipality_id,
            'municipality_name': muni_info['name'],
            'total_centers': len(centros_activos_municipio),
            'total_population_0_19': 0
        }

        for _, demo_row in muni_demographics.iterrows():
            age_range = demo_row['range']
            population = demo_row['total']

            if age_range in ['0-4', '5-9', '10-14', '15-19']:
                municipal_data['total_population_0_19'] += population
                municipal_data[f'population_{age_range}'] = population

        for cycle in ['infantil_i_ciclo', 'infantil_ii_ciclo', 'primaria', 'eso']:
            config = cycle_config[cycle]

            cycle_max_matriculados = centros_activos_municipio[f'{cycle}_max_matriculados'].sum()
            cycle_plazas_estimadas = centros_activos_municipio[f'{cycle}_plazas_estimadas'].sum()
            cycle_solicitudes = centros_activos_municipio[f'{cycle}_solicitudes_presentadas'].sum()
            cycle_admitidos = centros_activos_municipio[f'{cycle}_solicitudes_admitidas_corregidas'].sum()

            centros_con_ciclo = centros_activos_municipio[
                centros_activos_municipio[f'{cycle}_activo'] == 1
            ]
            num_centros_ciclo = len(centros_con_ciclo)

            municipal_data.update({
                f'{cycle}_capacity': cycle_max_matriculados,
                f'{cycle}_plazas_estimadas': cycle_plazas_estimadas,
                f'{cycle}_max_matriculados': cycle_max_matriculados,
                f'{cycle}_solicitudes': cycle_solicitudes,
                f'{cycle}_admitidos': cycle_admitidos,
                f'{cycle}_num_centers': num_centros_ciclo,
                f'{cycle}_centers_names': ', '.join(centros_con_ciclo['center_name'].tolist()) if num_centros_ciclo > 0 else 'Ninguno'
            })

        pop_0_4 = municipal_data.get('population_0-4', 0)
        estimated_need_infantil_i = (pop_0_4 * 3) / 5

        pop_5_9 = municipal_data.get('population_5-9', 0)
        estimated_need_infantil_ii = (pop_0_4 * 2) / 5 + (pop_5_9 * 1) / 5

        pop_10_14 = municipal_data.get('population_10-14', 0)
        estimated_need_primaria = (pop_5_9 * 4) / 5 + (pop_10_14 * 2) / 5

        pop_15_19 = municipal_data.get('population_15-19', 0)
        estimated_need_eso = (pop_10_14 * 3) / 5 + (pop_15_19 * 1) / 5

        needs = {
            'infantil_i_ciclo': estimated_need_infantil_i,
            'infantil_ii_ciclo': estimated_need_infantil_ii,
            'primaria': estimated_need_primaria,
            'eso': estimated_need_eso
        }

        total_estimated_need = 0
        total_capacity = 0
        cycles_with_need = 0
        cycles_covered = 0

        for cycle, estimated_need in needs.items():
            capacity = municipal_data[f'{cycle}_capacity']

            coverage_ratio = (capacity / estimated_need) if estimated_need > 0 else float('inf') if capacity > 0 else 0
            is_covered = coverage_ratio >= 1 if estimated_need > 0 else capacity > 0

            municipal_data.update({
                f'{cycle}_estimated_need': estimated_need,
                f'{cycle}_coverage_ratio': coverage_ratio,
                f'{cycle}_is_covered': is_covered,
                f'{cycle}_deficit': max(0, estimated_need - capacity)
            })

            if estimated_need > 0:
                cycles_with_need += 1
                total_estimated_need += estimated_need
                total_capacity += capacity
                if is_covered:
                    cycles_covered += 1

        overall_coverage_ratio = (total_capacity / total_estimated_need) if total_estimated_need > 0 else 0
        coverage_percentage = (cycles_covered / cycles_with_need * 100) if cycles_with_need > 0 else 0

        cycles_with_access = 0
        cycles_available = ['infantil_i_ciclo', 'infantil_ii_ciclo', 'primaria', 'eso']

        for cycle in cycles_available:
            if municipal_data[f'{cycle}_num_centers'] > 0:
                cycles_with_access += 1

        access_percentage = (cycles_with_access / len(cycles_available)) * 100

        if cycles_with_access == len(cycles_available):
            access_classification = "Acceso Completo"
        elif cycles_with_access >= 3:
            access_classification = "Acceso Bueno"
        elif cycles_with_access >= 2:
            access_classification = "Acceso Parcial"
        elif cycles_with_access >= 1:
            access_classification = "Acceso Limitado"
        else:
            access_classification = "Sin Acceso"

        has_basic_education = (
            municipal_data['infantil_ii_ciclo_num_centers'] > 0 and
            municipal_data['primaria_num_centers'] > 0 and
            municipal_data['eso_num_centers'] > 0
        )

        has_mandatory_education = (
            municipal_data['primaria_num_centers'] > 0 and
            municipal_data['eso_num_centers'] > 0
        )

        municipal_data.update({
            'total_estimated_need': total_estimated_need,
            'total_capacity': total_capacity,
            'overall_coverage_ratio': overall_coverage_ratio,
            'cycles_with_need': cycles_with_need,
            'cycles_covered': cycles_covered,
            'coverage_percentage': coverage_percentage,
            'is_fully_covered': cycles_covered == cycles_with_need and cycles_with_need > 0,
            'total_deficit': max(0, total_estimated_need - total_capacity),
            'cycles_with_access': cycles_with_access,
            'access_percentage': access_percentage,
            'access_classification': access_classification,
            'has_basic_education': has_basic_education,
            'has_mandatory_education': has_mandatory_education,
            'missing_cycles': [cycle for cycle in cycles_available if municipal_data[f'{cycle}_num_centers'] == 0]
        })

        municipal_analysis.append(municipal_data)

    return pd.DataFrame(municipal_analysis)

municipal_coverage_df = analyze_municipal_coverage(data, centers_df, cycle_config)


In [6]:
# === IMPRIMIR COLUMNAS DE LOS DATAFRAMES PARA DOCUMENTACI√ìN ===

print("="*80)
print("üìä COLUMNAS DEL DATAFRAME: centers_df")
print("="*80)
print(f"Total de columnas: {len(centers_df.columns)}")
print("\nListado de columnas:")
for i, col in enumerate(centers_df.columns, 1):
    print(f"{i:2d}. {col}")

print("\n" + "="*80)
print("üèõÔ∏è COLUMNAS DEL DATAFRAME: municipal_coverage_df")
print("="*80)
print(f"Total de columnas: {len(municipal_coverage_df.columns)}")
print("\nListado de columnas:")
for i, col in enumerate(municipal_coverage_df.columns, 1):
    print(f"{i:2d}. {col}")




üìä COLUMNAS DEL DATAFRAME: centers_df
Total de columnas: 75

Listado de columnas:
 1. id_education
 2. center_name
 3. center_type
 4. infantil_i_ciclo_transiciones_infantil
 5. infantil_i_ciclo_max_matriculados
 6. infantil_i_ciclo_max_admitidos
 7. infantil_i_ciclo_max_total_estudiantes
 8. infantil_i_ciclo_a√±o_max
 9. infantil_i_ciclo_matriculados_objetivo
10. infantil_i_ciclo_plazas_estimadas
11. infantil_i_ciclo_solicitudes_presentadas
12. infantil_i_ciclo_solicitudes_admitidas
13. infantil_i_ciclo_solicitudes_admitidas_corregidas
14. infantil_i_ciclo_tasa_ocupacion
15. infantil_i_ciclo_ratio_demanda
16. infantil_i_ciclo_ratio_admision_capacidad
17. infantil_i_ciclo_eficiencia_admision
18. infantil_i_ciclo_activo
19. infantil_ii_ciclo_transiciones_infantil
20. infantil_ii_ciclo_max_matriculados
21. infantil_ii_ciclo_max_admitidos
22. infantil_ii_ciclo_max_total_estudiantes
23. infantil_ii_ciclo_a√±o_max
24. infantil_ii_ciclo_matriculados_objetivo
25. infantil_ii_ciclo_plazas_es