In [None]:
%pip install duckdb pandas numpy pyspainmobility requests

In [5]:
import duckdb
import pandas

BASE_PATH = '../../raw'
LAKE_LAYER = 'silver'

con = duckdb.connect('../../mobility.db')

def SQL(q):
    """Run SQL (printed for clarity) and return a DataFrame."""
    return con.execute(q).fetchdf()

print("DuckDB version:", con.sql("SELECT version();").fetchone()[0]) # type: ignore

DuckDB version: v1.4.2


In [4]:
con.close()

```sql
-- viajes distristos
-- se castea en vez de VARCHAR de 'si/no' a un booleano de true o false
CREATE TABLE silver_mitma_od_districts (
    fecha DATE, -- de TEXT a DATE
    hora SMALLINT,
    origen TEXT,
    destino TEXT,
    distancia TEXT,
    actividad_origen TEXT,
    actividad_destino TEXT,
    residencia TEXT,
    renta TEXT,
    edad TEXT,
    sexo TEXT,
    viajes DOUBLE,
    viajes_km DOUBLE,
    estudio_destino_posible BOOLEAN,  -- de VARCHAR a BOOLEAN
    estudio_origen_posible BOOLEAN,   -- de VARCHAR a BOOLEAN
);

-- Viajes municipios
-- silver_mitma_od_municipalities / bronze_mitma_viajes_municipios
CREATE TABLE silver_mitma_od_municipalities (
    fecha DATE,  -- de TEXT a DATE
    hora SMALLINT,
    origen TEXT,
    destino TEXT,
    distancia TEXT,
    actividad_origen TEXT,
    actividad_destino TEXT,
    residencia TEXT,
    renta TEXT,
    edad TEXT,
    sexo TEXT,
    viajes DOUBLE,
    viajes_km DOUBLE,
    estudio_destino_posible BOOLEAN, -- de VARCHAR a BOOLEAN
    estudio_origen_posible BOOLEAN,  -- de VARCHAR a BOOLEAN
);

-- Viajes GAU
-- bronze_mitma_od_gau / bronze_mitma_viajes_gau
CREATE TABLE silver_mitma_od_gau (
    fecha DATE,  -- de TEXT a DATE
    hora SMALLINT,
    origen TEXT,
    destino TEXT,
    distancia TEXT,
    actividad_origen TEXT,
    actividad_destino TEXT,
    residencia TEXT,
    renta TEXT,
    edad TEXT,
    sexo TEXT,
    viajes DOUBLE,
    viajes_km DOUBLE,
    estudio_destino_posible BOOLEAN, -- de VARCHAR a BOOLEAN
    estudio_origen_posible BOOLEAN,  -- de VARCHAR a BOOLEAN
);
```

In [None]:
def load_od_matrices(type = "districts"):
    """ Load MITMA OD matrices from bronze to silver layer, transforming data types as needed."""

    dataset = 'od'
    table_name = f"{LAKE_LAYER}_mitma_{dataset}_{type}"

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            fecha DATE, -- de TEXT a DATE
            hora SMALLINT, -- Rename from 'periodo' to 'hora'. Converted to SMALLINT
            origen TEXT,
            destino TEXT,
            distancia TEXT,
            actividad_origen TEXT,
            actividad_destino TEXT,
            residencia TEXT,
            renta TEXT,
            edad TEXT,
            sexo TEXT,
            viajes DOUBLE, -- de VARCHAR a DOUBLE
            viajes_km DOUBLE, -- de VARCHAR a DOUBLE
            estudio_destino_posible BOOLEAN,  -- de VARCHAR a BOOLEAN
            estudio_origen_posible BOOLEAN,   -- de VARCHAR a BOOLEAN
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
            CAST(periodo AS SMALLINT) AS hora,
            origen,
            destino,
            distancia,
            actividad_origen,
            actividad_destino,
            residencia,
            renta,
            edad,
            sexo,
            CAST(viajes AS DOUBLE) AS viajes,
            CAST(viajes_km AS DOUBLE) AS viajes_km,
            CASE 
                WHEN estudio_destino_posible IN ('si', 'SI', 'sí') THEN TRUE 
                WHEN estudio_destino_posible IN ('no', 'NO') THEN FALSE 
                ELSE NULL
            END AS estudio_destino_posible,
            CASE 
                WHEN estudio_origen_posible IN ('si', 'SI', 'sí') THEN TRUE 
                WHEN estudio_origen_posible IN ('no', 'NO') THEN FALSE 
                ELSE NULL
            END AS estudio_origen_posible,
        FROM bronze_mitma_{dataset}_{type};
""")

In [7]:
load_od_matrices(type="distritos")
load_od_matrices(type="municipios")
load_od_matrices(type="gau")

In [10]:
SQL(f"SELECT * FROM {LAKE_LAYER}_mitma_od_distritos LIMIT 5;")

Unnamed: 0,fecha,hora,origen,destino,distancia,actividad_origen,actividad_destino,residencia,renta,edad,sexo,viajes,viajes_km,estudio_destino_posible,estudio_origen_posible
0,2022-03-01,8,1001,01002,10-50,casa,frecuente,1,10-15,,,2.764,125.486,False,False
1,2022-03-01,17,1001,01002,10-50,no_frecuente,casa,1,10-15,,,6.526,303.751,False,False
2,2022-03-01,0,1001,01009_AM,2-10,casa,frecuente,1,10-15,,,10.279,67.832,False,False
3,2022-03-01,0,1001,01009_AM,2-10,frecuente,casa,1,>15,,,4.591,42.419,False,False
4,2022-03-01,2,1001,01009_AM,2-10,casa,frecuente,1,10-15,,,2.539,13.819,False,False


In [11]:
SQL(f"""
    SELECT '{LAKE_LAYER}_mitma_od_distritos' as name, count(*) FROM {LAKE_LAYER}_mitma_od_distritos
    UNION
    SELECT '{LAKE_LAYER}_mitma_od_municipios' as name, count(*) FROM {LAKE_LAYER}_mitma_od_municipios
    UNION
    SELECT '{LAKE_LAYER}_mitma_od_gau' as name, count(*) FROM {LAKE_LAYER}_mitma_od_gau;
""")

Unnamed: 0,name,count_star()
0,silver_mitma_od_gau,20827473
1,silver_mitma_od_municipios,34684127
2,silver_mitma_od_distritos,55862966


```sql
-- Distritos
CREATE TABLE IF NOT EXISTS bronze_mitma_people_day_districts (
  fecha DATE,
  zona_pernoctacion TEXT,
  edad TEXT,
  sexo TEXT,
  numero_viajes TEXT,   -- 0,1,2,2+ (mantener TEXT)
  personas DOUBLE,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```
```sql
-- Municipios
CREATE TABLE IF NOT EXISTS bronze_mitma_people_day_municipalities (
  fecha DATE,
  zona_pernoctacion TEXT,
  edad TEXT,
  sexo TEXT,
  numero_viajes TEXT,
  personas DOUBLE,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```
```sql
-- GAU
CREATE TABLE IF NOT EXISTS bronze_mitma_people_day_gau (
  fecha DATE,
  zona_pernoctacion TEXT,
  edad TEXT,
  sexo TEXT,
  numero_viajes TEXT,
  personas DOUBLE,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```

In [12]:
def load_people_day(type = "districts", start_date='2022-03-01', end_date='2022-03-03'):
    """ Load MITMA People Day data from bronze to silver layer, transforming data types as needed. """
    dataset = 'people_day'
    table_name = f'{LAKE_LAYER}_mitma_{dataset}_{type}'

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            fecha DATE,
            zona_pernoctacion TEXT,
            edad TEXT,
            sexo TEXT,
            numero_viajes TEXT,
            personas DOUBLE,
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
            zona_pernoctacion,
            edad,
            sexo,
            numero_viajes,
            CAST(personas AS DOUBLE) AS personas,
        FROM bronze_mitma_{dataset}_{type};
    """)

In [13]:
load_people_day(type="distritos")
load_people_day(type="municipios")
load_people_day(type="gau")

In [18]:
SQL(f"SELECT * FROM {LAKE_LAYER}_mitma_people_day_distritos LIMIT 5;")

Unnamed: 0,fecha,zona_pernoctacion,edad,sexo,numero_viajes,personas
0,2022-03-01,1001,0-25,hombre,0,125.296
1,2022-03-01,1001,0-25,hombre,2,115.378
2,2022-03-01,1001,0-25,hombre,2+,176.63
3,2022-03-01,1001,0-25,mujer,0,125.069
4,2022-03-01,1001,0-25,mujer,2,117.712


In [17]:
SQL(f"""
    SELECT '{LAKE_LAYER}_mitma_people_day_distritos' as name, count(*) FROM {LAKE_LAYER}_mitma_people_day_distritos
    UNION
    SELECT '{LAKE_LAYER}_mitma_people_day_municipios' as name, count(*) FROM {LAKE_LAYER}_mitma_people_day_municipios
    UNION
    SELECT '{LAKE_LAYER}_mitma_people_day_gau' as name, count(*) FROM {LAKE_LAYER}_mitma_people_day_gau;
""")

Unnamed: 0,name,count_star()
0,silver_mitma_people_day_gau,182214
1,silver_mitma_people_day_municipios,231688
2,silver_mitma_people_day_distritos,340448


```sql
-- Distritos
CREATE TABLE IF NOT EXISTS bronze_mitma_overnight_stay_districts (
  fecha DATE,
  zona_residencia TEXT,
  zona_pernoctacion TEXT,
  personas DOUBLE,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```
```sql
-- Municipios
CREATE TABLE IF NOT EXISTS bronze_mitma_overnight_stay_municipalities (
  fecha DATE,
  zona_residencia TEXT,
  zona_pernoctacion TEXT,
  personas DOUBLE,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```
```sql
-- GAU
CREATE TABLE IF NOT EXISTS bronze_mitma_overnight_stay_gau (
  fecha DATE,
  zona_residencia TEXT,
  zona_pernoctacion TEXT,
  personas DOUBLE,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```

In [22]:

def load_overnight_stay(type = "districts"):
    dataset = 'overnight_stay'
    table_name = f'{LAKE_LAYER}_mitma_{dataset}_{type}'

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            fecha DATE,
            zona_residencia TEXT,
            zona_pernoctacion TEXT,
            personas DOUBLE,
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            strptime(CAST(fecha AS VARCHAR), '%Y%m%d')::DATE AS fecha,
            zona_residencia,
            zona_pernoctacion,
            CAST(personas AS DOUBLE) AS personas,
        FROM bronze_mitma_{dataset}_{type};
""")

In [23]:
load_overnight_stay(type="distritos")
load_overnight_stay(type="municipios")
load_overnight_stay(type="gau")

In [25]:
SQL(f"SELECT * FROM {LAKE_LAYER}_mitma_overnight_stay_distritos LIMIT 5;")

Unnamed: 0,fecha,zona_residencia,zona_pernoctacion,personas
0,2022-03-01,1001,01001,2733.784
1,2022-03-01,1001,01004_AM,2.514
2,2022-03-01,1001,01009_AM,18.431
3,2022-03-01,1001,01017_AM,2.922
4,2022-03-01,1001,01051,7.831


In [26]:
SQL(f"""
    SELECT '{LAKE_LAYER}_mitma_overnight_stay_distritos' as name, count(*) FROM {LAKE_LAYER}_mitma_overnight_stay_distritos
    UNION
    SELECT '{LAKE_LAYER}_mitma_overnight_stay_municipios' as name, count(*) FROM {LAKE_LAYER}_mitma_overnight_stay_municipios
    UNION
    SELECT '{LAKE_LAYER}_mitma_overnight_stay_gau' as name, count(*) FROM {LAKE_LAYER}_mitma_overnight_stay_gau;
""")

Unnamed: 0,name,count_star()
0,silver_mitma_overnight_stay_municipios,741266
1,silver_mitma_overnight_stay_distritos,947839
2,silver_mitma_overnight_stay_gau,574703


```sql
-- Distritos
CREATE TABLE IF NOT EXISTS bronze_mitma_districts (
  id TEXT,
  name TEXT,
  population DOUBLE,
  geometry GEOMETRY,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```
```sql
-- Municipios
CREATE TABLE IF NOT EXISTS bronze_mitma_municipalities (
  id TEXT,
  name TEXT,
  population DOUBLE,
  geometry GEOMETRY,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```
```sql
-- GAU
CREATE TABLE IF NOT EXISTS bronze_mitma_gau (
  id TEXT,
  name TEXT,
  population DOUBLE,
  geometry GEOMETRY,
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```

In [32]:

def load_zones(type = "districts"):
    table_name = f'{LAKE_LAYER}_mitma_{type}'

    SQL("""
        INSTALL spatial;
        LOAD spatial;
    """)

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            id TEXT,
            name TEXT,
            population DOUBLE,
            geometry GEOMETRY,
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            id,
            name,
            CAST(population AS DOUBLE) AS population,
            ST_Multi(ST_GeomFromText(geometry)) AS geometry
        FROM bronze_mitma_{type};
    """)

In [33]:
load_zones(type="distritos")
load_zones(type="municipios")
load_zones(type="gau")

In [34]:
SQL(f"SELECT * FROM bronze_mitma_distritos LIMIT 5;")

Unnamed: 0,id,name,population,geometry,loaded_at,source_file
0,01001,Alegría-Dulantzi,2925.0,MULTIPOLYGON (((-2.534282365031893 42.78795118...,2025-11-16 16:29:23.829537,../../raw/MITMA/distritos/zones.csv.gz
1,01002,Amurrio,10307.0,MULTIPOLYGON (((-2.975672535119074 42.93785360...,2025-11-16 16:29:23.829537,../../raw/MITMA/distritos/zones.csv.gz
2,01004_AM,Artziniega agregacion de municipios,3005.0,MULTIPOLYGON (((-3.1400593014901315 43.1616524...,2025-11-16 16:29:23.829537,../../raw/MITMA/distritos/zones.csv.gz
3,01009_AM,Asparrena agregacion de municipios,4599.0,POLYGON ((-2.331385516435793 42.81775192629513...,2025-11-16 16:29:23.829537,../../raw/MITMA/distritos/zones.csv.gz
4,01010,Ayala/Aiara,2951.0,POLYGON ((-3.0007561458990124 43.0679967058346...,2025-11-16 16:29:23.829537,../../raw/MITMA/distritos/zones.csv.gz


In [None]:
SQL(f"""
    SELECT 
        * EXCLUDE (geometry), 
        ST_AsGeoJSON(geometry) AS geojson 
    FROM {LAKE_LAYER}_mitma_distritos
    LIMIT 5;
""")

Unnamed: 0,id,name,population,geojson
0,01001,Alegría-Dulantzi,2925.0,"{""type"":""MultiPolygon"",""coordinates"":[[[[-2.53..."
1,01002,Amurrio,10307.0,"{""type"":""MultiPolygon"",""coordinates"":[[[[-2.97..."
2,01004_AM,Artziniega agregacion de municipios,3005.0,"{""type"":""MultiPolygon"",""coordinates"":[[[[-3.14..."
3,01009_AM,Asparrena agregacion de municipios,4599.0,"{""type"":""MultiPolygon"",""coordinates"":[[[[-2.33..."
4,01010,Ayala/Aiara,2951.0,"{""type"":""MultiPolygon"",""coordinates"":[[[[-3.00..."


In [35]:
SQL(f"""
    SELECT '{LAKE_LAYER}_mitma_distritos' as name, count(*) FROM {LAKE_LAYER}_mitma_distritos
    UNION
    SELECT '{LAKE_LAYER}_mitma_municipios' as name, count(*) FROM {LAKE_LAYER}_mitma_municipios
    UNION
    SELECT '{LAKE_LAYER}_mitma_gau' as name, count(*) FROM {LAKE_LAYER}_mitma_gau;
""")

Unnamed: 0,name,count_star()
0,silver_mitma_municipios,2618
1,silver_mitma_distritos,3792
2,silver_mitma_gau,2086


```sql
-- Distritos
CREATE TABLE IF NOT EXISTS bronze_ine_padron_municipios (
  cod        VARCHAR,
  nombre     VARCHAR,
  fk_unidad  INTEGER,
  fk_escala  INTEGER,
  data_txt   TEXT,
  data       JSON,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  -- loaded_by TEXT DEFAULT CURRENT_USER,
  source_file TEXT
);

In [None]:
import requests
import pandas as pd
import os
def get_padron_by_municipio(year: int):
    url = f"https://servicios.ine.es/wstempus/js/ES/DATOS_TABLA/29005?date={year}0101:{year}1231"

    response = requests.get(url, timeout=120)
    data = response.json()

    df = pd.json_normalize(data)
    return df

def load_padron(year = 2023):
    dataset_path = f'../raw/INE/padron_municipios'
    table_name = f'bronze_ine_padron_municipios'

    filename = f'padron_municipios_{year}.csv.gz'

    full_path = os.path.abspath(dataset_path)

    os.makedirs(dataset_path, exist_ok=True)
    
    if(not os.path.isfile(f'{dataset_path}/{filename}')):
        df = get_padron_by_municipio(year)
        # ya que no nos devuelve un csv, lo guardamos y además comprimido
        if df is not None:
            df.to_csv(f"{dataset_path}/{filename}", index=False, compression='gzip')
        else:
            raise ValueError("Zones.get_zone_geodataframe() returned None")

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            cod        VARCHAR,
            nombre     VARCHAR,
            fk_unidad  INTEGER,
            fk_escala  INTEGER,
            data_txt   TEXT,
            data       JSON,
            -- Columnas extras añadidas para auditoria. 
            loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            -- loaded_by TEXT DEFAULT CURRENT_USER,
            source_file TEXT
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            COD        AS cod,
            Nombre     AS nombre,
            FK_Unidad::INTEGER AS fk_unidad,
            FK_Escala::INTEGER AS fk_escala,
            REGEXP_REPLACE(
                REGEXP_REPLACE(
                    REGEXP_REPLACE(
                        REPLACE(data, '''', '"'),
                        '\\bTrue\\b', 'true'
                        ),
                        '\\bFalse\\b', 'false'
                ),
                '\\bNone\\b', 'null'
            ) AS data_txt,
            CAST(data_txt AS JSON) AS data,
            CURRENT_TIMESTAMP AS loaded_at,
            -- CURRENT_USER AS loaded_by,
            filename AS source_file
        FROM read_csv(
            '{dataset_path}/{filename}',
            filename = true,
            all_varchar = true
        );
    """)

In [76]:
load_padron(2023)

In [89]:
SQL("SELECT * FROM bronze_ine_padron_municipios LIMIT 5;")

Unnamed: 0,cod,nombre,fk_unidad,fk_escala,data_txt,data,loaded_at,source_file
0,DPOP19723,Ababuj. Total. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 12:57:31.251038,../raw/INE/padron_municipios/padron_municipios...
1,DPOP19724,Ababuj. Hombres. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 12:57:31.251038,../raw/INE/padron_municipios/padron_municipios...
2,DPOP19725,Ababuj. Mujeres. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 12:57:31.251038,../raw/INE/padron_municipios/padron_municipios...
3,DPOP17671,Abades. Total. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 12:57:31.251038,../raw/INE/padron_municipios/padron_municipios...
4,DPOP17672,Abades. Hombres. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 12:57:31.251038,../raw/INE/padron_municipios/padron_municipios...


In [93]:
import requests
import pandas as pd

def get_poblacion_quinq_by_municipio(year: int) -> pd.DataFrame:
    # Tabla 33974: "Población por sexo, municipios y edad (grupos quinquenales)" (Padrón Continuo)
    # tip=A fuerza periodicidad anual; tv=periodo:<YYYY> filtra al año solicitado
    url = f"https://servicios.ine.es/wstempus/js/ES/DATOS_TABLA/68535?nult=1&tip=A&date={year}0101:{year}1231"
    print("Fetching data from URL:", url)
    resp = requests.get(url, timeout=180)
    resp.raise_for_status()
    data = resp.json()
    df = pd.json_normalize(data)
    return df

df = get_poblacion_quinq_by_municipio(2023)
df.head()

Fetching data from URL: https://servicios.ine.es/wstempus/js/ES/DATOS_TABLA/68535?nult=1&tip=A&date=20230101:20231231


Unnamed: 0,status
0,No puede mostrarse por restricciones de volumen


In [19]:
con.close()