In [None]:
%pip install duckdb pandas numpy pyspainmobility requests

<h1 align="center"><b>Building a 3-Tier Data Lakehouse for Mobility Analysis in Spain</b></h1>
<h3 align="center"><b style="color:gray">Silver Layer</b></h3>
<h4 align="right">Joan Fernández Navarro & Borja Albert Gramaje</h4>
<h3><b>Table of Contents</b></h3>
<ul style = "list-style-type: none; line-height: 0.5em;">
    <li><a href="#holidays"><h5>1. Spanish Holidays (Open Holidyas API)</h5></a></li>
    <li><a href="#mitma"><h5>2. Spanish Ministry of Transport, Mobility and Urban Agenda (MITMA) Open Data</h5></a></li>
    <ul style = "list-style-type: none; line-height: 1em;">
        <li><a href="#od"><h5>1.1. Origin-destination (OD) trip matrices</h5></a></li>
        <li><a href="#people"><h5>1.2. People by day</h5></a></li>
        <li><a href="#overnight"><h5>1.3. Overnight stays</h5></a></li>
        <li><a href="#zones"><h5>1.4. Zones</h5></a></li>
        <li><a href="#zones"><h5>1.5. Centroids</h5></a></li>
    </ul>
    <li><a href="#ine"><h5>3. Spanish National Statistics Institute (INE)</h5></a></li>
    <ul style = "list-style-type: none; line-height: 1em;">
        <li><a href="#population"><h5>3.1. Population by municipio (Padrón)</h5></a></li>
        <li><a href="#income"><h5>3.2. Income by distrito</h5></a></li>
        <li><a href="#business"><h5>3.3. Business by municipio</h5></a></li>
    </ul>
    </ul>
</ul>

In [2]:
import os
import duckdb
import requests
import pandas as pd
from pyspainmobility import Mobility, Zones

BASE_PATH = f"{os.getcwd()}/../../raw"
LAKE_LAYER = "bronze"

con = duckdb.connect("./../../mobility.db")

def SQL(q):
    """Run SQL (printed for clarity) and return a DataFrame."""
    return con.execute(q).fetchdf()

print("DuckDB version:", con.sql("SELECT version();").fetchone()[0])

DuckDB version: v1.4.2


<h2 id="holidays"><b>1. Spanish Holidays (Open Holidyas API)</b></h2>

In [4]:
SQL("""
    INSTALL httpfs;
    LOAD httpfs;
""")

SQL("""
    -- La tabla temporal es visible solo para la sesión que la creó.
    CREATE OR REPLACE TEMP TABLE spanish_holidays AS
    SELECT 
        startDate AS fecha,
        name[1].text AS nombre
    FROM read_json(
        'https://openholidaysapi.org/PublicHolidays?countryIsoCode=ES&languageIsoCode=ES&validFrom=2023-01-01&validTo=2023-12-31',
        format='array'
    )
    WHERE nationwide = true;
""")

Unnamed: 0,Count
0,9


In [5]:
SQL("SELECT * FROM spanish_holidays")

Unnamed: 0,fecha,nombre
0,2023-01-06,Epifanía del Señor
1,2023-04-07,Viernes Santo
2,2023-05-01,Día del Trabajador
3,2023-08-15,Asunción de la Virgen
4,2023-10-12,Fiesta Nacional de España
5,2023-11-01,Todos los Santos
6,2023-12-06,Día de la Constitución Española
7,2023-12-08,Inmaculada Concepción
8,2023-12-25,Navidad


<h2 id="mitma"><b>1. Spanish Ministry of Transport, Mobility and Urban Agenda (MITMA) Open Data</b></h2>

<h2 id="od"><b>1.1. Origin-destination (OD) trip matrices</b></h2>

```mermaid
flowchart TD

    %% --------------------------
    %% Bronze Sources
    %% --------------------------
    B1[bronze_mitma_od_distritos]:::bronze
    B2[bronze_mitma_od_municipios]:::bronze
    B3[bronze_mitma_od_gau]:::bronze
    H[spanish_holidays]:::bronze

    %% --------------------------
    %% Transform blocks
    %% --------------------------
    subgraph T1[Transformations]
        direction TB
        C1[Parse fecha]
        C2[Convert periodo → hora]
        C3[Numerical Casting]
        C4[Boolean normalization]
        C5[Holidays Flags]
        C6[Add zone_level]
    end

    %% --------------------------
    %% Individual transform outputs
    %% --------------------------
    S1[silver_od_distritos]:::silver
    S2[silver_od_municipios]:::silver
    S3[silver_od_gau]:::silver

    %% --------------------------
    %% Final unified table
    %% --------------------------
    ALL[silver_od_all]:::target

    %% --------------------------
    %% Flows bronze → transform
    %% --------------------------
    B1 --> T1 --> S1
    B2 --> T1 --> S2
    B3 --> T1 --> S3
    H --> C5

    %% --------------------------
    %% UNION ALL final
    %% --------------------------
    S1 --> ALL
    S2 --> ALL
    S3 --> ALL

    %% --------------------------
    %% Styles
    %% --------------------------
    classDef bronze fill:#f2d7d5,stroke:#a93226,color:#000;
    classDef silver fill:#d6eaf8,stroke:#2e86c1,color:#000;
    classDef target fill:#d5f5e3,stroke:#1d8348,color:#000,font-weight:bold;

```

![Descripción de la imagen](./schemas/silver_od.png)

In [None]:
"""
Generates a unified silver_od_all table directly from all Bronze MITMA OD tables.
Adds:
  - Type casting
  - Weekend / holiday flags
  - NULL filtering for required fields
  - zone_level field
"""

SQL("""
CREATE OR REPLACE TABLE silver_od_all AS
WITH base AS (

    -------------------------------------------------------------------
    -- DISTRITOS
    -------------------------------------------------------------------
    SELECT
        'distritos' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        CAST(periodo AS SMALLINT) AS hora,

        origen AS origen_zone_id,
        destino AS destino_zone_id,

        CAST(viajes AS DOUBLE)    AS viajes,
        CAST(viajes_km AS DOUBLE) AS viajes_km,

        distancia,
        actividad_origen,
        actividad_destino,
        residencia,
        renta,
        edad,
        sexo,

        CASE WHEN estudio_destino_posible ILIKE 'si' THEN TRUE
             WHEN estudio_destino_posible ILIKE 'no' THEN FALSE END
             AS estudio_destino_posible,

        CASE WHEN estudio_origen_posible ILIKE 'si' THEN TRUE
             WHEN estudio_origen_posible ILIKE 'no' THEN FALSE END
             AS estudio_origen_posible

    FROM bronze_mitma_od_distritos


    UNION ALL

    -------------------------------------------------------------------
    -- MUNICIPIOS
    -------------------------------------------------------------------
    SELECT
        'municipios' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        CAST(periodo AS SMALLINT) AS hora,

        origen AS origen_zone_id,
        destino AS destino_zone_id,

        CAST(viajes AS DOUBLE)    AS viajes,
        CAST(viajes_km AS DOUBLE) AS viajes_km,

        distancia,
        actividad_origen,
        actividad_destino,
        residencia,
        renta,
        edad,
        sexo,

        CASE WHEN estudio_destino_posible ILIKE 'si' THEN TRUE
             WHEN estudio_destino_posible ILIKE 'no' THEN FALSE END,
        CASE WHEN estudio_origen_posible ILIKE 'si' THEN TRUE
             WHEN estudio_origen_posible ILIKE 'no' THEN FALSE END

    FROM bronze_mitma_od_municipios


    UNION ALL

    -------------------------------------------------------------------
    -- GAU
    -------------------------------------------------------------------
    SELECT
        'gau' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        CAST(periodo AS SMALLINT) AS hora,

        origen AS origen_zone_id,
        destino AS destino_zone_id,

        CAST(viajes AS DOUBLE)    AS viajes,
        CAST(viajes_km AS DOUBLE) AS viajes_km,

        distancia,
        actividad_origen,
        actividad_destino,
        residencia,
        renta,
        edad,
        sexo,

        CASE WHEN estudio_destino_posible ILIKE 'si' THEN TRUE
             WHEN estudio_destino_posible ILIKE 'no' THEN FALSE END,
        CASE WHEN estudio_origen_posible ILIKE 'si' THEN TRUE
             WHEN estudio_origen_posible ILIKE 'no' THEN FALSE END

    FROM bronze_mitma_od_gau
),

enriched AS (
    SELECT
        *,
        CASE WHEN dayofweek(fecha) IN (6,7) THEN TRUE ELSE FALSE END AS is_weekend,
        CASE WHEN fecha IN (SELECT fecha FROM spanish_holidays) 
            THEN TRUE 
            ELSE FALSE 
        END AS is_holiday
    FROM base
),

filtered AS (
    SELECT *
    FROM enriched
    WHERE 
        -- Campos críticos: descartar registros incompletos
        fecha IS NOT NULL
        AND hora IS NOT NULL
        AND origen_zone_id IS NOT NULL
        AND destino_zone_id IS NOT NULL
        AND viajes IS NOT NULL
        AND viajes_km IS NOT NULL
        AND distancia IS NOT NULL
)

SELECT * FROM filtered;
""")

print("Created unified Silver table: silver_od_all")

Created unified Silver table: silver_od_all


In [7]:
SQL("""
    SELECT *,
    FROM silver_od_all
    ORDER BY viajes DESC
    LIMIT 20
""")

Unnamed: 0,zone_level,fecha,hora,origen_zone_id,destino_zone_id,viajes,viajes_km,distancia,actividad_origen,actividad_destino,residencia,renta,edad,sexo,estudio_destino_posible,estudio_origen_posible,is_weekend,is_holiday
0,gau,2022-03-03,8,GAU Madrid,GAU Madrid,18996.93,90782.038,2-10,casa,frecuente,28,>15,45-65,mujer,False,False,False,False
1,gau,2022-03-03,8,GAU Madrid,GAU Madrid,18831.837,91311.245,2-10,casa,frecuente,28,10-15,45-65,mujer,False,False,False,False
2,gau,2022-03-04,8,GAU Madrid,GAU Madrid,17562.185,84415.297,2-10,casa,frecuente,28,10-15,45-65,mujer,False,False,False,False
3,gau,2022-03-04,14,GAU Madrid,GAU Madrid,17542.066,21311.297,0.5-2,frecuente,casa,28,10-15,45-65,mujer,False,False,False,False
4,gau,2022-03-04,8,GAU Madrid,GAU Madrid,17125.443,80962.213,2-10,casa,frecuente,28,>15,45-65,mujer,False,False,False,False
5,gau,2022-03-03,8,GAU Madrid,GAU Madrid,16717.248,20911.137,0.5-2,casa,frecuente,28,10-15,45-65,mujer,False,False,False,False
6,gau,2022-03-03,8,GAU Madrid,GAU Madrid,16692.804,79708.001,2-10,casa,frecuente,28,>15,25-45,mujer,False,False,False,False
7,gau,2022-03-03,8,GAU Madrid,GAU Madrid,16673.719,80961.202,2-10,casa,frecuente,28,10-15,25-45,mujer,False,False,False,False
8,gau,2022-03-03,8,GAU Madrid,GAU Madrid,16513.202,79211.988,2-10,casa,frecuente,28,10-15,45-65,hombre,False,False,False,False
9,gau,2022-03-03,14,GAU Madrid,GAU Madrid,16409.672,20103.389,0.5-2,frecuente,casa,28,10-15,45-65,mujer,False,False,False,False


In [5]:
SQL(f"""
    SELECT zone_level, COUNT(*)
    FROM silver_od_all
    GROUP BY zone_level;
""")

Unnamed: 0,zone_level,count_star()
0,municipios,34779907
1,gau,21164797
2,distritos,55441797


In [None]:
SQL(f"""
    SELECT *
    FROM silver_od_all 
    LIMIT 10;
""")

Unnamed: 0,zone_level,fecha,hora,origen_zone_id,destino_zone_id,viajes,viajes_km,distancia,actividad_origen,actividad_destino,residencia,renta,edad,sexo,estudio_destino_posible,estudio_origen_posible,is_weekend,is_holiday
0,distritos,2022-03-05,12,1402110,1402109,13256.0,51465.0,2-10,casa,frecuente,14,10-15,0-25,mujer,False,False,True,False
1,distritos,2022-03-05,12,1402110,1402109,23736.0,147094.0,2-10,casa,frecuente,14,10-15,25-45,hombre,False,False,True,False
2,distritos,2022-03-05,12,1402110,1402109,276.0,133873.0,2-10,casa,frecuente,14,10-15,25-45,mujer,False,False,True,False
3,distritos,2022-03-05,12,1402110,1402109,16952.0,75381.0,2-10,casa,frecuente,14,10-15,45-65,hombre,False,False,True,False
4,distritos,2022-03-05,12,1402110,1402109,37069.0,181165.0,2-10,casa,frecuente,14,10-15,45-65,mujer,False,False,True,False
5,distritos,2022-03-05,12,1402110,1402109,10065.0,68155.0,2-10,casa,frecuente,14,10-15,65-100,hombre,False,False,True,False
6,distritos,2022-03-05,12,1402110,1402109,24452.0,113582.0,2-10,casa,frecuente,14,10-15,65-100,mujer,False,False,True,False
7,distritos,2022-03-05,12,1402110,1402109,5934.0,5376.0,2-10,casa,no_frecuente,14,10-15,25-45,hombre,False,False,True,False
8,distritos,2022-03-05,12,1402110,1402109,345.0,16547.0,2-10,casa,no_frecuente,14,10-15,25-45,mujer,False,False,True,False
9,distritos,2022-03-05,12,1402110,1402109,2119.0,6927.0,2-10,casa,no_frecuente,14,10-15,45-65,hombre,False,False,True,False


In [8]:
SQL(f"""
    SELECT is_weekend, count(*) as total
    FROM silver_od_all 
    GROUP BY is_weekend;
""")

Unnamed: 0,is_weekend,total
0,False,76123130
1,True,35263371


<h2 id="people"><b>1.2. People by day</b></h2>

```mermaid
flowchart TD
    %% --------------------------
    %% Bronze Sources
    %% --------------------------
    B1[bronze_mitma_peple_day_distritos]:::bronze
    B2[bronze_mitma_peple_day_municipios]:::bronze
    B3[bronze_mitma_peple_day_gau]:::bronze

    %% --------------------------
    %% Transform blocks
    %% --------------------------
    subgraph T1[Transformations]
        direction TB
        C1[Parse fecha]
        C3[Numerical Casting]
        C6[Add zone_level]
    end

    %% --------------------------
    %% Individual transform outputs
    %% --------------------------
    S1[silver_peple_day_distritos]:::silver
    S2[silver_peple_day_municipios]:::silver
    S3[silver_peple_day_gau]:::silver

    %% --------------------------
    %% Final unified table
    %% --------------------------
    ALL[silver_peple_day_all]:::target

    %% --------------------------
    %% Flows bronze → transform
    %% --------------------------
    B1 --> T1 --> S1
    B2 --> T1 --> S2
    B3 --> T1 --> S3

    %% --------------------------
    %% UNION ALL final
    %% --------------------------
    S1 --> ALL
    S2 --> ALL
    S3 --> ALL

    %% --------------------------
    %% Styles
    %% --------------------------
    classDef bronze fill:#f2d7d5,stroke:#a93226,color:#000;
    classDef silver fill:#d6eaf8,stroke:#2e86c1,color:#000;
    classDef target fill:#d5f5e3,stroke:#1d8348,color:#000,font-weight:bold;

```

![Descripción de la imagen](./schemas/silver_people_day.png)

In [9]:
"""
Generates a unified silver_people_day_all table directly from all Bronze MITMA People Day tables.
Adds:
  - Type casting
  - zone_level field
  - Filtering of incomplete/null rows
"""

SQL("""
CREATE OR REPLACE TABLE silver_people_day_all AS
WITH base AS (

    -------------------------------------------------------------------
    -- DISTRITOS
    -------------------------------------------------------------------
    SELECT
        'distritos' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        zona_pernoctacion,
        edad,
        sexo,
        numero_viajes,
        CAST(REPLACE(personas, '.', '') AS DOUBLE) AS personas
    FROM bronze_mitma_people_day_distritos

    UNION ALL

    -------------------------------------------------------------------
    -- MUNICIPIOS
    -------------------------------------------------------------------
    SELECT
        'municipios' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        zona_pernoctacion,
        edad,
        sexo,
        numero_viajes,
        CAST(REPLACE(personas, '.', '') AS DOUBLE) AS personas
    FROM bronze_mitma_people_day_municipios

    UNION ALL

    -------------------------------------------------------------------
    -- GAU
    -------------------------------------------------------------------
    SELECT
        'gau' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        zona_pernoctacion,
        edad,
        sexo,
        numero_viajes,
        CAST(REPLACE(personas, '.', '') AS DOUBLE) AS personas
    FROM bronze_mitma_people_day_gau
),

filtered AS (
    SELECT *
    FROM base
    WHERE
        -- Required fields: avoid null or garbage rows
        fecha IS NOT NULL
        AND zona_pernoctacion IS NOT NULL
        AND edad IS NOT NULL
        AND sexo IS NOT NULL
        AND numero_viajes IS NOT NULL
        AND personas IS NOT NULL
)

SELECT * FROM filtered;
""")

print("Created unified Silver table: silver_people_day_all")


Created unified Silver table: silver_people_day_all


In [10]:
SQL(f"""
    SELECT zone_level, COUNT(*)
    FROM silver_people_day_all
    GROUP BY zone_level;
""")

Unnamed: 0,zone_level,count_star()
0,gau,182214
1,municipios,231688
2,distritos,340448


In [11]:
SQL(f"""
    SELECT *
    FROM silver_people_day_all 
    LIMIT 10;
""")

Unnamed: 0,zone_level,fecha,zona_pernoctacion,edad,sexo,numero_viajes,personas
0,distritos,2022-03-01,1001,0-25,hombre,0,125296.0
1,distritos,2022-03-01,1001,0-25,hombre,2,115378.0
2,distritos,2022-03-01,1001,0-25,hombre,2+,176630.0
3,distritos,2022-03-01,1001,0-25,mujer,0,125069.0
4,distritos,2022-03-01,1001,0-25,mujer,2,117712.0
5,distritos,2022-03-01,1001,0-25,mujer,2+,117712.0
6,distritos,2022-03-01,1001,25-45,hombre,0,111041.0
7,distritos,2022-03-01,1001,25-45,hombre,1,13820.0
8,distritos,2022-03-01,1001,25-45,hombre,2,94913.0
9,distritos,2022-03-01,1001,25-45,hombre,2+,147164.0


<h2 id="overnight"><b>1.3. Overnight stays</b></h2>

```mermaid
flowchart TD

    %% --------------------------
    %% Bronze Sources
    %% --------------------------
    B1[bronze_mitma_overnight_stay_distritos]:::bronze
    B2[bronze_mitma_overnight_stay_municipios]:::bronze
    B3[bronze_mitma_overnight_stay_gau]:::bronze

    %% --------------------------
    %% Transform blocks
    %% --------------------------
    subgraph T1[Transformations]
        direction TB
        C1[Parse fecha]
        C3[Numerical Casting]
        C6[Add zone_level]
    end

    %% --------------------------
    %% Individual transform outputs
    %% --------------------------
    S1[silver_overnight_stay_distritos]:::silver
    S2[silver_overnight_stay_municipios]:::silver
    S3[silver_overnight_stay_gau]:::silver

    %% --------------------------
    %% Final unified table
    %% --------------------------
    ALL[silver_overnight_stay_all]:::target

    %% --------------------------
    %% Flows bronze → transform
    %% --------------------------
    B1 --> T1 --> S1
    B2 --> T1 --> S2
    B3 --> T1 --> S3

    %% --------------------------
    %% UNION ALL final
    %% --------------------------
    S1 --> ALL
    S2 --> ALL
    S3 --> ALL

    %% --------------------------
    %% Styles
    %% --------------------------
    classDef bronze fill:#f2d7d5,stroke:#a93226,color:#000;
    classDef silver fill:#d6eaf8,stroke:#2e86c1,color:#000;
    classDef target fill:#d5f5e3,stroke:#1d8348,color:#000,font-weight:bold;

```

![Descripción de la imagen](./schemas/silver_overnight.png)

In [12]:
"""
Generates a unified silver_people_day_all table directly from all Bronze MITMA People Day tables.
Adds:
  - Type casting
  - zone_level field
  - Filtering of incomplete/null rows
"""

SQL("""
CREATE OR REPLACE TABLE silver_overnight_stay_all AS
WITH base AS (

    -------------------------------------------------------------------
    -- DISTRITOS
    -------------------------------------------------------------------
    SELECT
        'distritos' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        zona_pernoctacion,
        zona_residencia,
        CAST(REPLACE(personas, '.', '') AS DOUBLE) AS personas
    FROM bronze_mitma_overnight_stay_distritos

    UNION ALL

    -------------------------------------------------------------------
    -- MUNICIPIOS
    -------------------------------------------------------------------
    SELECT
        'municipios' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        zona_pernoctacion,
        zona_residencia,
        CAST(REPLACE(personas, '.', '') AS DOUBLE) AS personas
    FROM bronze_mitma_overnight_stay_municipios

    UNION ALL

    -------------------------------------------------------------------
    -- GAU
    -------------------------------------------------------------------
    SELECT
        'gau' AS zone_level,
        strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
        zona_pernoctacion,
        zona_residencia,
        CAST(REPLACE(personas, '.', '') AS DOUBLE) AS personas
    FROM bronze_mitma_overnight_stay_gau
),

filtered AS (
    SELECT *
    FROM base
    WHERE
        -- Required fields: avoid null or garbage rows
        fecha IS NOT NULL
        AND zona_pernoctacion IS NOT NULL
        AND zona_residencia IS NOT NULL
        AND personas IS NOT NULL
)

SELECT * FROM filtered;
""")

print("Created unified Silver table: silver_overnight_stay_all")


Created unified Silver table: silver_overnight_stay_all


In [13]:
SQL(f"""
    SELECT zone_level, COUNT(*)
    FROM silver_overnight_stay_all
    GROUP BY zone_level;
""")

Unnamed: 0,zone_level,count_star()
0,municipios,741266
1,distritos,947839
2,gau,574703


In [14]:
SQL(f"""
    SELECT *
    FROM silver_overnight_stay_all 
    LIMIT 10;
""")

Unnamed: 0,zone_level,fecha,zona_pernoctacion,zona_residencia,personas
0,distritos,2022-03-01,01001,1001,2733784.0
1,distritos,2022-03-01,01004_AM,1001,2514.0
2,distritos,2022-03-01,01009_AM,1001,18431.0
3,distritos,2022-03-01,01017_AM,1001,2922.0
4,distritos,2022-03-01,01051,1001,7831.0
5,distritos,2022-03-01,01058_AM,1001,10600.0
6,distritos,2022-03-01,0105901,1001,10301.0
7,distritos,2022-03-01,0105902,1001,3243.0
8,distritos,2022-03-01,0105903,1001,2514.0
9,distritos,2022-03-01,0105904,1001,13588.0


<h2 id="zones"><b>1.4. Zones</b></h2>

```mermaid
flowchart TD

    %% --------------------------
    %% Bronze Sources
    %% --------------------------
    B1[bronze_mitma_zones_distritos]:::bronze
    B2[bronze_mitma_zones_municipios]:::bronze
    B3[bronze_mitma_zones_gau]:::bronze

    %% --------------------------
    %% Transform blocks
    %% --------------------------
    subgraph T1[Transformations]
        direction TB
        C1[Parse Geometry]
        C3[Numerical Casting]
        C6[Add zone_level]
        C5[Centroid calculation]
        C4[Filtering by zone type]
        C2[Link distrito-municipio]
    end

    %% --------------------------
    %% Individual transform outputs
    %% --------------------------
    S1[silver_zones_distritos]:::silver
    S2[silver_zones_municipios]:::silver
    S3[silver_zones_gau]:::silver

    %% --------------------------
    %% Final unified table
    %% --------------------------
    ALL[silver_zones_all]:::target

    %% --------------------------
    %% Flows bronze → transform
    %% --------------------------
    B1 --> T1 --Length is 7--> S1
    B2 --> T1 --Length is 5--> S2
    B3 --> T1 --Length is 8--> S3

    %% --------------------------
    %% UNION ALL final
    %% --------------------------
    S1 --> ALL
    S2 --> ALL
    S3 --> ALL

    %% --------------------------
    %% Styles
    %% --------------------------
    classDef bronze fill:#f2d7d5,stroke:#a93226,color:#000;
    classDef silver fill:#d6eaf8,stroke:#2e86c1,color:#000;
    classDef target fill:#d5f5e3,stroke:#1d8348,color:#000,font-weight:bold;

```

![Descripción de la imagen](./schemas/silver_zones.png)

In [15]:
SQL("""
    INSTALL spatial;
    LOAD spatial;
""") 

Unnamed: 0,Success


In [4]:
SQL("""
CREATE OR REPLACE TABLE silver_zones_all AS
WITH base AS (
    -------------------------------------------------------------------
    -- DISTRITOS
    -------------------------------------------------------------------
    SELECT
        id,
        name,
        'distritos' AS zone_level,
        CAST(REPLACE(population, '.', '') AS DOUBLE) AS population,
        ST_Multi(ST_GeomFromText(geometry)) AS geometry_obj,
        -- calculo de centroide para usarlo en el modelo de gravedad
        ST_Centroid(geometry_obj) AS centroid,
        ST_X(ST_Centroid(geometry_obj)) AS centroid_longitude,
        ST_Y(ST_Centroid(geometry_obj)) AS centroid_latitude,
        substring(id, 1, 5) AS municipio_id
    FROM bronze_mitma_distritos
    WHERE length(id) = 7

    UNION ALL

    -------------------------------------------------------------------
    -- MUNICIPIOS
    -------------------------------------------------------------------
    SELECT
        id,
        name,
        'municipios' AS zone_level,
        CAST(REPLACE(population, '.', '') AS DOUBLE) AS population,
        ST_Multi(ST_GeomFromText(geometry)) AS geometry_obj,
        ST_Centroid(geometry_obj) AS centroid,
        ST_X(ST_Centroid(geometry_obj)) AS centroid_longitude,
        ST_Y(ST_Centroid(geometry_obj)) AS centroid_latitude,
        id AS municipio_id
    FROM bronze_mitma_municipios
    WHERE length(id) = 5

    UNION ALL

    -------------------------------------------------------------------
    -- GAU
    -------------------------------------------------------------------
    SELECT
        id,
        name,
        'gau' AS zone_level,
        CAST(REPLACE(population, '.', '') AS DOUBLE) AS population,
        ST_Multi(ST_GeomFromText(geometry)) AS geometry_obj,
        ST_Centroid(geometry_obj) AS centroid,
        ST_X(ST_Centroid(geometry_obj)) AS centroid_longitude,
        ST_Y(ST_Centroid(geometry_obj)) AS centroid_latitude,
        NULL AS municipio_id
    FROM bronze_mitma_gau
    WHERE length(id) = 8
),
filtered AS (
    SELECT *
    FROM base
    WHERE
        id IS NOT NULL
        AND name IS NOT NULL
        AND population IS NOT NULL
        AND geometry_obj IS NOT NULL
)
SELECT * FROM filtered;
""")

print("Created unified Silver table: silver_zones_all")


CatalogException: Catalog Error: Scalar Function with name "st_multi" is not in the catalog, but it exists in the spatial extension.

Please try installing and loading the spatial extension:
INSTALL spatial;
LOAD spatial;



In [18]:
SQL(f"""
    SELECT zone_level, COUNT(*)
    FROM silver_zones_all
    GROUP BY zone_level;
""")

Unnamed: 0,zone_level,count_star()
0,municipios,1645
1,gau,911
2,distritos,1565


In [3]:
SQL(f"""
    SELECT 
        * EXCLUDE (geometry_obj, centroid), 
        -- esto tiene que darnos un punto en geojson
        ST_AsGeoJSON(centroid) AS centroid,
        -- esto tiene que darnos un multipolygon en geojson
        ST_AsGeoJSON(geometry_obj) AS geojson 
    FROM silver_zones_all 
    LIMIT 10;
""")

CatalogException: Catalog Error: Scalar Function with name "st_asgeojson" is not in the catalog, but it exists in the spatial extension.

Please try installing and loading the spatial extension:
INSTALL spatial;
LOAD spatial;



<h2 id="ine"><b>2. Spanish National Statistics Institute (INE)</b></h2>

In [19]:
con.close()