In [26]:
# Install dependencies
%pip install --upgrade pip 
%pip install pandas requests

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [27]:
import requests
import xml.etree.ElementTree as ET
import os
import hashlib
import time
import json
from datetime import datetime, timezone

In [28]:
input_url = "https://sitservicios.lapaz.bo/geoserver/ows"
data_dir = "../data"

In [29]:
r = requests.get(f"{input_url}?service=WFS&version=1.0.0&request=GetCapabilities")
tree = ET.fromstring(r.content)

In [30]:
ns = {"wfs": "http://www.opengis.net/wfs"}
layer_names = [el.text for el in tree.findall(".//wfs:Name", ns)]

In [31]:
def get_xml_metadata(layer_name):
    r = requests.get(
        f"{input_url}?service=WFS&version=1.0.0&request=DescribeFeatureType&typeName={layer_name}"
    )
    tree = ET.fromstring(r.content)

    metadata = {}
    for element in tree.findall(
        ".//xsd:element", {"xsd": "http://www.w3.org/2001/XMLSchema"}
    ):
        name = element.get("name")
        type_ = element.get("type")
        metadata[name] = type_

    return metadata

In [None]:
def create_metadata(file_path, wfs_url, name, resp):

    with open(file_path, "rb") as f_in:
        file_contents = f_in.read()

    file_hash = hashlib.sha256(file_contents).hexdigest()

    metadata = {
        "hash": file_hash,
        "source": wfs_url,
        "request_status": resp.status_code,
        "unix_timestamp": int(time.time()),
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "xml_metadata": get_xml_metadata(name),
    }

    metadata_filename = name.replace(":","_") + ".metadata.json"

    with open(f"{data_dir}/{metadata_filename}", "w", encoding="utf-8") as meta_file:
        json.dump(metadata, meta_file, indent=4, ensure_ascii=False)

In [None]:
def download_layer(layer_name, format="application/json"):
    wfs_url = f"{input_url}?service=WFS&version=1.0.0&request=GetFeature&typeName={layer_name}&outputFormat={format}"
    r = requests.get(wfs_url)

    if r.status_code == 200:
        file_path = os.path.join(data_dir, f"{layer_name.replace(":","_")}.geojson")
        with open(file_path, "wb") as f_out:
            f_out.write(r.content)

        create_metadata(file_path, input_url, layer_name, r)
        print(f"Downloaded and saved {layer_name} to {file_path}")
    else:
        print(f"Failed to download {layer_name}: {r.status_code}")

In [34]:
from concurrent.futures import ThreadPoolExecutor, as_completed


def process_layer(name, format="application/json"):
    print(f"Descargando: {name}")
    try:
        download_layer(name, format)
    except json.decoder.JSONDecodeError:
        if format == "application/json":
            print(
                f"⚠️ Error de decodificación JSON para {name}, reintentando como GeoJSON"
            )
            download_layer(name, format="geojson")
            return
        else:
            print(f"❌ Error de decodificación JSON para {name}, no se pudo descargar")
    except Exception as e:
        print(f"⚠️ Excepción con {name}: {e}")


with ThreadPoolExecutor() as executor:
    results = {executor.submit(process_layer, name): name for name in layer_names}
    for future in as_completed(results):
        pass  # Opcional: usa future.result() para recoger resultados si hace falta

Descargando: WFS
Descargando: lapaz:riesgo_36zonas
Descargando: sit:actividadesEconomicasEBA
Descargando: lpm:aeropuertos
Descargando: sit:agenciasviajes
Descargando: sit:agenciascooperacion
Descargando: lapaz:rest2006_area
Descargando: sit:ap_sector_chucura
Descargando: sit:ap_municipales2018
Descargando: sit:ap_nacional
Descargando: movilidad:areasradiotaxis
Descargando: lapaz:areasequipamiento


Downloaded and saved lapaz:riesgo_36zonas to ../data/lapaz:riesgo_36zonas.geojson
Descargando: sit:arqueo_muyaltasensibilidad
Downloaded and saved sit:actividadesEconomicasEBA to ../data/sit:actividadesEconomicasEBA.geojson
Downloaded and saved sit:ap_sector_chucura to ../data/sit:ap_sector_chucura.geojson
Descargando: sit:equipamientoareas
Descargando: sit:smc_arqueologico
Downloaded and saved lapaz:rest2006_area to ../data/lapaz:rest2006_area.geojson
Descargando: culturas:asociaciones
Downloaded and saved WFS to ../data/WFS.geojson
Descargando: sim:obras_sgeo_sim
Downloaded and saved sit:agenciasviajes to ../data/sit:agenciasviajes.geojson
Descargando: lpm:bancos
Downloaded and saved sit:ap_municipales2018 to ../data/sit:ap_municipales2018.geojson
Downloaded and saved movilidad:areasradiotaxis to ../data/movilidad:areasradiotaxis.geojson
Descargando: sit:bancos
Descargando: sit:bibliotecas
Downloaded and saved sit:ap_nacional to ../data/sit:ap_nacional.geojson
Descargando: sit:bicicl