In [None]:
%pip install duckdb pandas numpy pyspainmobility requests

In [16]:
import duckdb
import pandas

BASE_PATH = '../../raw'
LAKE_LAYER = 'bronze'

con = duckdb.connect('../../mobility.db')

def SQL(q):
    """Run SQL (printed for clarity) and return a DataFrame."""
    return con.execute(q).fetchdf()

print("DuckDB version:", con.sql("SELECT version();").fetchone()[0]) # type: ignore

DuckDB version: v1.4.2


```sql
-- viajes distristos
-- se castea en vez de VARCHAR de 'si/no' a un booleano de true o false
-- bronze_mitma_od_districts / bronze_mitma_viajes_distritos
CREATE TABLE bronze_mitma_od_districts (
    fecha TEXT,
    periodo TEXT,
    origen TEXT,
    destino TEXT,
    distancia TEXT,
    actividad_origen TEXT,
    actividad_destino TEXT,
    residencia TEXT,
    renta TEXT,
    edad TEXT,
    sexo TEXT,
    viajes TEXT,
    viajes_km TEXT,
    -- Columnas extras añadidas para auditoria. 
    loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    -- loaded_by TEXT DEFAULT CURRENT_USER,
    source_file TEXT
);

-- Viajes municipios
-- bronze_mitma_od_municipalities / bronze_mitma_viajes_municipios
CREATE TABLE bronze_mitma_od_municipalities (
    fecha TEXT,
    periodo TEXT,
    origen TEXT,
    destino TEXT,
    distancia TEXT,
    actividad_origen TEXT,
    actividad_destino TEXT,
    residencia TEXT,
    renta TEXT,
    edad TEXT,
    sexo TEXT,
    viajes TEXT,
    viajes_km TEXT,
    -- Columnas extras añadidas para auditoria. 
    loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    -- loaded_by TEXT DEFAULT CURRENT_USER,
    source_file TEXT
);

-- Viajes GAU
-- bronze_mitma_od_gau / bronze_mitma_viajes_gau
CREATE TABLE bronze_mitma_od_gau (
    fecha TEXT,
    periodo TEXT,
    origen TEXT,
    destino TEXT,
    distancia TEXT,
    actividad_origen TEXT,
    actividad_destino TEXT,
    residencia TEXT,
    renta TEXT,
    edad TEXT,
    sexo TEXT,
    viajes TEXT,
    viajes_km TEXT,
    -- Columnas extras añadidas para auditoria. 
    loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    -- loaded_by TEXT DEFAULT CURRENT_USER,
    source_file TEXT
);
```

In [17]:
from pyspainmobility import Mobility
import os

def load_od_matrices(type = "districts", start_date='2022-03-01', end_date='2022-03-03'):
    dataset = 'od'
    dataset_path = f'{BASE_PATH}/MITMA/{dataset}_{type}'
    full_path = os.path.abspath(dataset_path)

    table_name = f'{LAKE_LAYER}_mitma_{dataset}_{type}'

    os.makedirs(dataset_path, exist_ok=True)
    
    if not os.listdir(dataset_path): # only download if the directory is empty
        data = Mobility(
            version=2,
            zones=type,
            start_date=start_date,
            end_date=end_date,
            output_directory=full_path,
        )
    
        data.get_od_data(keep_activity=True)

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            fecha TEXT,
            periodo TEXT,
            origen TEXT,
            destino TEXT,
            distancia TEXT,
            actividad_origen TEXT,
            actividad_destino TEXT,
            residencia TEXT,
            renta TEXT,
            edad TEXT,
            sexo TEXT,
            viajes TEXT,
            viajes_km TEXT,
            estudio_destino_posible TEXT,
            estudio_origen_posible TEXT,
            -- Columnas extras añadidas para auditoria. 
            loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            -- loaded_by TEXT DEFAULT CURRENT_USER,
            source_file TEXT
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            fecha,
            periodo,
            origen,
            destino,
            distancia,
            actividad_origen,
            actividad_destino,
            residencia,
            renta,
            edad,
            sexo,
            viajes,
            viajes_km,
            estudio_destino_posible,
            estudio_origen_posible,
            CURRENT_TIMESTAMP AS loaded_at,
            -- CURRENT_USER AS loaded_by,
            filename AS source_file
        FROM read_csv(
            '{dataset_path}/*.csv.gz',
            filename = true,
            all_varchar = true
        );
 
""")

In [18]:
load_od_matrices(type="distritos")
load_od_matrices(type="municipios")
load_od_matrices(type="gau")

In [19]:
SQL(f"SELECT * FROM {LAKE_LAYER}_mitma_od_distritos LIMIT 5;")

Unnamed: 0,fecha,periodo,origen,destino,distancia,actividad_origen,actividad_destino,residencia,renta,edad,sexo,viajes,viajes_km,estudio_destino_posible,estudio_origen_posible,loaded_at,source_file
0,20220301,8,1001,01002,10-50,casa,frecuente,1,10-15,,,2.764,125.486,no,no,2025-11-16 16:49:05.867514,../../raw/MITMA/od_distritos/20220301_Viajes_d...
1,20220301,17,1001,01002,10-50,no_frecuente,casa,1,10-15,,,6.526,303.751,no,no,2025-11-16 16:49:05.867514,../../raw/MITMA/od_distritos/20220301_Viajes_d...
2,20220301,0,1001,01009_AM,2-10,casa,frecuente,1,10-15,,,10.279,67.832,no,no,2025-11-16 16:49:05.867514,../../raw/MITMA/od_distritos/20220301_Viajes_d...
3,20220301,0,1001,01009_AM,2-10,frecuente,casa,1,>15,,,4.591,42.419,no,no,2025-11-16 16:49:05.867514,../../raw/MITMA/od_distritos/20220301_Viajes_d...
4,20220301,2,1001,01009_AM,2-10,casa,frecuente,1,10-15,,,2.539,13.819,no,no,2025-11-16 16:49:05.867514,../../raw/MITMA/od_distritos/20220301_Viajes_d...


In [14]:
# todo, altera table para llamar periodo a hora, esta la columna creada con periodo en vez de hora
SQL(f"""
    SELECT '{LAKE_LAYER}_mitma_od_distritos' as name, count(*) FROM {LAKE_LAYER}_mitma_od_distritos
    UNION
    SELECT '{LAKE_LAYER}_mitma_od_municipios' as name, count(*) FROM {LAKE_LAYER}_mitma_od_municipios
    UNION
    SELECT '{LAKE_LAYER}_mitma_od_gau' as name, count(*) FROM {LAKE_LAYER}_mitma_od_gau;
""")

Unnamed: 0,name,count_star()
0,bronze_mitma_od_gau,20827473
1,bronze_mitma_od_municipios,34684127
2,bronze_mitma_od_distritos,55862966


```sql
-- Distritos
CREATE TABLE IF NOT EXISTS bronze_mitma_people_day_districts (
  fecha TEXT,
  zona_pernoctacion TEXT,
  edad TEXT,
  sexo TEXT,
  numero_viajes TEXT,   -- 0,1,2,2+ (mantener TEXT)
  personas TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);

-- Municipios
CREATE TABLE IF NOT EXISTS bronze_mitma_people_day_municipalities (
  fecha TEXT,
  zona_pernoctacion TEXT,
  edad TEXT,
  sexo TEXT,
  numero_viajes TEXT,
  personas TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);

-- GAU
CREATE TABLE IF NOT EXISTS bronze_mitma_people_day_gau (
  fecha TEXT,
  zona_pernoctacion TEXT,
  edad TEXT,
  sexo TEXT,
  numero_viajes TEXT,
  personas TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);

In [20]:
from pyspainmobility import Mobility
import os

def load_people_day(type = "districts", start_date='2022-03-01', end_date='2022-03-03'):
    dataset = 'people_day'
    dataset_path = f'{BASE_PATH}/MITMA/{dataset}_{type}'
    full_path = os.path.abspath(dataset_path)

    table_name = f'{LAKE_LAYER}_mitma_{dataset}_{type}'

    os.makedirs(dataset_path, exist_ok=True)
    
    if not os.listdir(dataset_path): # only download if the directory is empty
        data = Mobility(
            version=2,
            zones=type,
            start_date=start_date,
            end_date=end_date,
            output_directory=full_path,
        )
    
        data.get_number_of_trips_data()

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            fecha TEXT,
            zona_pernoctacion TEXT,
            edad TEXT,
            sexo TEXT,
            numero_viajes TEXT,
            personas TEXT,
            -- Columnas extras añadidas para auditoria. 
            loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            -- loaded_by TEXT DEFAULT CURRENT_USER,
            source_file TEXT
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            fecha,
            zona_pernoctacion,
            edad,
            sexo,
            numero_viajes,
            personas,
            CURRENT_TIMESTAMP AS loaded_at,
            -- CURRENT_USER AS loaded_by,
            filename AS source_file
        FROM read_csv(
            '{dataset_path}/*.csv.gz',
            filename = true,
            all_varchar = true
        );
 
""")

In [21]:
load_people_day(type="distritos")
load_people_day(type="municipios")
load_people_day(type="gau")

Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/personas/ficheros-diarios/2022-03/20220301_Personas_dia_distritos.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/personas/ficheros-diarios/2022-03/20220301_Personas_dia_distritos.csv.gz
Saved 784312 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/people_day_distritos/20220301_Personas_distritos_v2.csv.gz
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/personas/ficheros-diarios/2022-03/20220302_Personas_dia_distritos.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/personas/ficheros-diarios/2022-03/20220302_Personas_dia_distritos.csv.gz
Saved 774109 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/people_day_distritos/20220302_Personas_distritos_v2.csv.gz
Downloading file from https://movilidad-opendata.mitma.es/es

100%|██████████| 3/3 [00:00<00:00, 14.21it/s]


Concatenating all the dataframes....
Writing the parquet file....
Parquet file generated successfully at  /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/people_day_distritos/Personas_distritos_2022-03-01_2022-03-03_v2.parquet
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-municipios/personas/ficheros-diarios/2022-03/20220301_Personas_dia_municipios.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-municipios/personas/ficheros-diarios/2022-03/20220301_Personas_dia_municipios.csv.gz
Saved 530935 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/people_day_municipios/20220301_Personas_municipios_v2.csv.gz
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-municipios/personas/ficheros-diarios/2022-03/20220302_Personas_dia_municipios.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-municipios/personas/ficheros-diarios/

100%|██████████| 3/3 [00:00<00:00, 21.84it/s]

Concatenating all the dataframes....
Writing the parquet file....





Parquet file generated successfully at  /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/people_day_municipios/Personas_municipios_2022-03-01_2022-03-03_v2.parquet
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-GAU/personas/ficheros-diarios/2022-03/20220301_Personas_dia_GAU.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-GAU/personas/ficheros-diarios/2022-03/20220301_Personas_dia_GAU.csv.gz
Saved 418238 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/people_day_gau/20220301_Personas_GAU_v2.csv.gz
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-GAU/personas/ficheros-diarios/2022-03/20220302_Personas_dia_GAU.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-GAU/personas/ficheros-diarios/2022-03/20220302_Personas_dia_GAU.csv.gz
Saved 410613 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/

100%|██████████| 3/3 [00:00<00:00, 26.85it/s]

Concatenating all the dataframes....
Writing the parquet file....
Parquet file generated successfully at  /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/people_day_gau/Personas_GAU_2022-03-01_2022-03-03_v2.parquet





In [23]:
SQL("SELECT * FROM bronze_mitma_people_day_distritos LIMIT 5;")

Unnamed: 0,fecha,zona_pernoctacion,edad,sexo,numero_viajes,personas,loaded_at,source_file
0,20220301,1001,0-25,hombre,0,125.296,2025-11-16 16:06:59.908747,../../raw/MITMA/people_day_distritos/20220301_...
1,20220301,1001,0-25,hombre,2,115.378,2025-11-16 16:06:59.908747,../../raw/MITMA/people_day_distritos/20220301_...
2,20220301,1001,0-25,hombre,2+,176.63,2025-11-16 16:06:59.908747,../../raw/MITMA/people_day_distritos/20220301_...
3,20220301,1001,0-25,mujer,0,125.069,2025-11-16 16:06:59.908747,../../raw/MITMA/people_day_distritos/20220301_...
4,20220301,1001,0-25,mujer,2,117.712,2025-11-16 16:06:59.908747,../../raw/MITMA/people_day_distritos/20220301_...


In [28]:
SQL(f"""
    SELECT '{LAKE_LAYER}_mitma_people_day_distritos' as name, count(*) FROM {LAKE_LAYER}_mitma_people_day_distritos
    UNION
    SELECT '{LAKE_LAYER}_mitma_people_day_municipios' as name, count(*) FROM {LAKE_LAYER}_mitma_people_day_municipios
    UNION
    SELECT '{LAKE_LAYER}_mitma_people_day_gau' as name, count(*) FROM {LAKE_LAYER}_mitma_people_day_gau;
""")

Unnamed: 0,name,count_star()
0,bronze_mitma_people_day_gau,182214
1,bronze_mitma_people_day_distritos,340448
2,bronze_mitma_people_day_municipios,231688


```sql
-- Distritos
CREATE TABLE IF NOT EXISTS bronze_mitma_overnight_stay_districts (
  fecha TEXT,
  zona_residencia TEXT,
  zona_pernoctacion TEXT,
  personas TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```
```sql
-- Municipios
CREATE TABLE IF NOT EXISTS bronze_mitma_overnight_stay_municipalities (
  fecha TEXT,
  zona_residencia TEXT,
  zona_pernoctacion TEXT,
  personas TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```
```sql
-- GAU
CREATE TABLE IF NOT EXISTS bronze_mitma_overnight_stay_gau (
  fecha TEXT,
  zona_residencia TEXT,
  zona_pernoctacion TEXT,
  personas TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```

In [25]:
from pyspainmobility import Mobility
import os

def load_overnight_stay(type = "districts", start_date='2022-03-01', end_date='2022-03-03'):
    dataset = 'overnight_stay'
    dataset_path = f'{BASE_PATH}/MITMA/{dataset}_{type}'
    full_path = os.path.abspath(dataset_path)

    table_name = f'{LAKE_LAYER}_mitma_{dataset}_{type}'

    os.makedirs(dataset_path, exist_ok=True)
    
    if not os.listdir(dataset_path): # only download if the directory is empty
        data = Mobility(
            version=2,
            zones=type,
            start_date=start_date,
            end_date=end_date,
            output_directory=full_path,
        )
        data.get_overnight_stays_data()

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            fecha TEXT,
            zona_residencia TEXT,
            zona_pernoctacion TEXT,
            personas TEXT,
            -- Columnas extras añadidas para auditoria. 
            loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            -- loaded_by TEXT DEFAULT CURRENT_USER,
            source_file TEXT
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            fecha,
            zona_residencia,
            zona_pernoctacion,
            personas,
            CURRENT_TIMESTAMP AS loaded_at,
            -- CURRENT_USER AS loaded_by,
            filename AS source_file
        FROM read_csv(
            '{dataset_path}/*.csv.gz',
            filename = true,
            all_varchar = true
        );
 
""")

In [26]:
load_overnight_stay(type="distritos")
load_overnight_stay(type="municipios")
load_overnight_stay(type="gau")

Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/pernoctaciones/ficheros-diarios/2022-03/20220301_Pernoctaciones_distritos.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/pernoctaciones/ficheros-diarios/2022-03/20220301_Pernoctaciones_distritos.csv.gz
Saved 1853342 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/overnight_stay_distritos/20220301_Pernoctaciones_distritos_v2.csv.gz
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/pernoctaciones/ficheros-diarios/2022-03/20220302_Pernoctaciones_distritos.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-distritos/pernoctaciones/ficheros-diarios/2022-03/20220302_Pernoctaciones_distritos.csv.gz
Saved 1643373 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/overnight_stay_distritos/20220302_Pernoctaciones_distritos_v2.csv.gz
Downlo

100%|██████████| 3/3 [00:00<00:00,  9.74it/s]


Concatenating all the dataframes....
Writing the parquet file....
Parquet file generated successfully at  /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/overnight_stay_distritos/Pernoctaciones_distritos_2022-03-01_2022-03-03_v2.parquet
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-municipios/pernoctaciones/ficheros-diarios/2022-03/20220301_Pernoctaciones_municipios.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-municipios/pernoctaciones/ficheros-diarios/2022-03/20220301_Pernoctaciones_municipios.csv.gz
Saved 1468082 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/overnight_stay_municipios/20220301_Pernoctaciones_municipios_v2.csv.gz
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-municipios/pernoctaciones/ficheros-diarios/2022-03/20220302_Pernoctaciones_municipios.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basi

100%|██████████| 3/3 [00:00<00:00, 12.72it/s]


Concatenating all the dataframes....
Writing the parquet file....
Parquet file generated successfully at  /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/overnight_stay_municipios/Pernoctaciones_municipios_2022-03-01_2022-03-03_v2.parquet
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-GAU/pernoctaciones/ficheros-diarios/2022-03/20220301_Pernoctaciones_GAU.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-GAU/pernoctaciones/ficheros-diarios/2022-03/20220301_Pernoctaciones_GAU.csv.gz
Saved 1255035 bytes to /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/overnight_stay_gau/20220301_Pernoctaciones_GAU_v2.csv.gz
Downloading file from https://movilidad-opendata.mitma.es/estudios_basicos/por-GAU/pernoctaciones/ficheros-diarios/2022-03/20220302_Pernoctaciones_GAU.csv.gz
Downloading: https://movilidad-opendata.mitma.es/estudios_basicos/por-GAU/pernoctaciones/ficheros-diarios/2022-03/20

100%|██████████| 3/3 [00:00<00:00, 13.92it/s]


Concatenating all the dataframes....
Writing the parquet file....
Parquet file generated successfully at  /Users/bgramaje/workspace/MUCEIM/bigdata/muceim-bigdata_project/raw/MITMA/overnight_stay_gau/Pernoctaciones_GAU_2022-03-01_2022-03-03_v2.parquet


In [29]:
SQL("SELECT * FROM bronze_mitma_overnight_stay_distritos LIMIT 5;")

Unnamed: 0,fecha,zona_residencia,zona_pernoctacion,personas,loaded_at,source_file
0,20220301,1001,01001,2733.784,2025-11-16 16:08:51.009323,../../raw/MITMA/overnight_stay_distritos/20220...
1,20220301,1001,01004_AM,2.514,2025-11-16 16:08:51.009323,../../raw/MITMA/overnight_stay_distritos/20220...
2,20220301,1001,01009_AM,18.431,2025-11-16 16:08:51.009323,../../raw/MITMA/overnight_stay_distritos/20220...
3,20220301,1001,01017_AM,2.922,2025-11-16 16:08:51.009323,../../raw/MITMA/overnight_stay_distritos/20220...
4,20220301,1001,01051,7.831,2025-11-16 16:08:51.009323,../../raw/MITMA/overnight_stay_distritos/20220...


In [30]:
SQL(f"""
    SELECT '{LAKE_LAYER}_mitma_overnight_stay_distritos' as name, count(*) FROM {LAKE_LAYER}_mitma_overnight_stay_distritos
    UNION
    SELECT '{LAKE_LAYER}_mitma_overnight_stay_municipios' as name, count(*) FROM {LAKE_LAYER}_mitma_overnight_stay_municipios
    UNION
    SELECT '{LAKE_LAYER}_mitma_overnight_stay_gau' as name, count(*) FROM {LAKE_LAYER}_mitma_overnight_stay_gau;
""")

Unnamed: 0,name,count_star()
0,bronze_mitma_overnight_stay_gau,574703
1,bronze_mitma_overnight_stay_distritos,947839
2,bronze_mitma_overnight_stay_municipios,741266


```sql
-- Distritos
CREATE TABLE IF NOT EXISTS bronze_mitma_districts (
  id TEXT,
  name TEXT,
  population TEXT,
  geometry TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```

```sql
-- Municipios
CREATE TABLE IF NOT EXISTS bronze_mitma_municipalities (
  id TEXT,
  name TEXT,
  population TEXT,
  geometry TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```

```sql
-- GAU
CREATE TABLE IF NOT EXISTS bronze_mitma_gau (
  id TEXT,
  name TEXT,
  population TEXT,
  geometry TEXT,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  source_file TEXT
);
```

In [3]:
%load_ext autoreload

In [7]:
%autoreload 2
# pyspain tiene un bug para descargar las zonas, asi que lo parcheamos aqui.
# hay que meterse en el Zones.py del codigo fuente y cambiar las lineas 129, 134, 141.
# para poner esto os.path.join(self.output_path, "el file que sea"), en vez de os.path.join(data_directory, "el file que sea")
from pyspainmobility import Zones
import os
import pandas as pd

def load_zones(type = "districts"):
    dataset_path = f'{BASE_PATH}/MITMA/{type}'
    table_name = f'{LAKE_LAYER}_mitma_{type}'
    full_path = os.path.abspath(dataset_path)

    os.makedirs(dataset_path, exist_ok=True)
    
    if(not os.path.isfile(f'{dataset_path}/zones.csv.gz')):
        data = Zones(
                version=2,
                zones=type,
                output_directory=full_path,
            )
        
        dfdata = data.get_zone_geodataframe()
        # ya que no nos devuelve un csv, lo guardamos y además comprimido
        if dfdata is not None:
            print(dfdata.head())
            dfdata.to_csv(f'{dataset_path}/zones.csv.gz', index=True, compression='gzip')
        else:
            raise ValueError("Zones.get_zone_geodataframe() returned None")
    
    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            id TEXT,
            name TEXT,
            population TEXT,
            geometry TEXT, -- no geometry porque estamos en BRONZE LAYER
            -- Columnas extras añadidas para auditoria. 
            loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            -- loaded_by TEXT DEFAULT CURRENT_USER,
            source_file TEXT
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            id,
            name,
            population,
            geometry,
            CURRENT_TIMESTAMP AS loaded_at,
            -- CURRENT_USER AS loaded_by,
            filename AS source_file
        FROM read_csv(
            '{dataset_path}/*.csv.gz',
            filename = true,
            all_varchar = true
        );
    """)

In [None]:
load_zones(type="distritos")
load_zones(type="municipios")
load_zones(type="gau")

In [6]:
SQL("""
    SELECT 'bronze_mitma_distritos' as name, count(*) FROM bronze_mitma_distritos
    UNION
    SELECT 'bronze_mitma_municipios' as name, count(*) FROM bronze_mitma_municipios
    UNION
    SELECT 'bronze_mitma_gau' as name, count(*) FROM bronze_mitma_gau;
""")

Unnamed: 0,name,count_star()
0,bronze_mitma_municipios,2618
1,bronze_mitma_gau,2086
2,bronze_mitma_distritos,3792


```sql
-- Distritos
CREATE TABLE IF NOT EXISTS bronze_ine_padron_municipios (
  cod        VARCHAR,
  nombre     VARCHAR,
  fk_unidad  INTEGER,
  fk_escala  INTEGER,
  data_txt   TEXT,
  data       JSON,
  -- Columnas extras añadidas para auditoria. 
  loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  -- loaded_by TEXT DEFAULT CURRENT_USER,
  source_file TEXT
);

In [10]:
import requests
import pandas as pd

def get_padron_by_municipio(year: int):
    url = f"https://servicios.ine.es/wstempus/js/ES/DATOS_TABLA/29005?date={year}0101:{year}1231"

    response = requests.get(url, timeout=120)
    data = response.json()

    df = pd.json_normalize(data)
    return df

def load_padron(year = 2023):
    dataset_path = f'{BASE_PATH}/INE/padron_municipios'
    table_name = f'{LAKE_LAYER}_ine_padron_municipios'

    filename = f'padron_municipios_{year}.csv.gz'

    os.makedirs(dataset_path, exist_ok=True)
    
    if(not os.path.isfile(f'{dataset_path}/{filename}')):
        df = get_padron_by_municipio(year)
        # ya que no nos devuelve un csv, lo guardamos y además comprimido
        if df is not None:
            df.to_csv(f"{dataset_path}/{filename}", index=False, compression='gzip')
        else:
            raise ValueError("Zones.get_zone_geodataframe() returned None")

    SQL(f"DROP TABLE IF EXISTS {table_name};")
    SQL(f"""
        CREATE TABLE IF NOT EXISTS {table_name}(
            cod        VARCHAR,
            nombre     VARCHAR,
            fk_unidad  INTEGER,
            fk_escala  INTEGER,
            data_txt   TEXT,
            data       JSON,
            -- Columnas extras añadidas para auditoria. 
            loaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            -- loaded_by TEXT DEFAULT CURRENT_USER,
            source_file TEXT
        );
    """)

    SQL(f"""
        INSERT INTO {table_name}
        SELECT
            COD        AS cod,
            Nombre     AS nombre,
            FK_Unidad::INTEGER AS fk_unidad,
            FK_Escala::INTEGER AS fk_escala,
            REGEXP_REPLACE(
                REGEXP_REPLACE(
                    REGEXP_REPLACE(
                        REPLACE(data, '''', '"'),
                        '\\bTrue\\b', 'true'
                        ),
                        '\\bFalse\\b', 'false'
                ),
                '\\bNone\\b', 'null'
            ) AS data_txt,
            CAST(data_txt AS JSON) AS data,
            CURRENT_TIMESTAMP AS loaded_at,
            -- CURRENT_USER AS loaded_by,
            filename AS source_file
        FROM read_csv(
            '{dataset_path}/{filename}',
            filename = true,
            all_varchar = true
        );
    """)

In [11]:
load_padron(2023)

In [12]:
SQL("SELECT * FROM bronze_ine_padron_municipios LIMIT 5;")

Unnamed: 0,cod,nombre,fk_unidad,fk_escala,data_txt,data,loaded_at,source_file
0,DPOP19723,Ababuj. Total. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 16:42:55.625390,../../raw/INE/padron_municipios/padron_municip...
1,DPOP19724,Ababuj. Hombres. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 16:42:55.625390,../../raw/INE/padron_municipios/padron_municip...
2,DPOP19725,Ababuj. Mujeres. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 16:42:55.625390,../../raw/INE/padron_municipios/padron_municip...
3,DPOP17671,Abades. Total. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 16:42:55.625390,../../raw/INE/padron_municipios/padron_municip...
4,DPOP17672,Abades. Hombres. Total habitantes. Personas.,3,1,"[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...","[{""Fecha"": 1672527600000, ""FK_TipoDato"": 1, ""F...",2025-11-16 16:42:55.625390,../../raw/INE/padron_municipios/padron_municip...


In [93]:
import requests
import pandas as pd

def get_poblacion_quinq_by_municipio(year: int) -> pd.DataFrame:
    # Tabla 33974: "Población por sexo, municipios y edad (grupos quinquenales)" (Padrón Continuo)
    # tip=A fuerza periodicidad anual; tv=periodo:<YYYY> filtra al año solicitado
    url = f"https://servicios.ine.es/wstempus/js/ES/DATOS_TABLA/68535?nult=1&tip=A&date={year}0101:{year}1231"
    print("Fetching data from URL:", url)
    resp = requests.get(url, timeout=180)
    resp.raise_for_status()
    data = resp.json()
    df = pd.json_normalize(data)
    return df

df = get_poblacion_quinq_by_municipio(2023)
df.head()

Fetching data from URL: https://servicios.ine.es/wstempus/js/ES/DATOS_TABLA/68535?nult=1&tip=A&date=20230101:20231231


Unnamed: 0,status
0,No puede mostrarse por restricciones de volumen


In [20]:
con.close()