In [None]:
# Install dependencies
%pip install --upgrade pip 
%pip install pandas requests

In [None]:
import requests
import xml.etree.ElementTree as ET
import os
import hashlib
import time
import json
from datetime import datetime, timezone

In [None]:
input_url = "https://sitservicios.lapaz.bo/geoserver/ows"
data_dir = "../data"

In [None]:
r = requests.get(f"{input_url}?service=WFS&version=1.0.0&request=GetCapabilities")
tree = ET.fromstring(r.content)

In [None]:
ns = {"wfs": "http://www.opengis.net/wfs"}
layer_names = [el.text for el in tree.findall(".//wfs:Name", ns)]

In [None]:
def get_xml_metadata(layer_name):
    r = requests.get(
        f"{input_url}?service=WFS&version=1.0.0&request=DescribeFeatureType&typeName={layer_name}"
    )
    tree = ET.fromstring(r.content)

    metadata = {}
    for element in tree.findall(
        ".//xsd:element", {"xsd": "http://www.w3.org/2001/XMLSchema"}
    ):
        name = element.get("name")
        type_ = element.get("type")
        metadata[name] = type_

    return metadata

In [None]:
def create_metadata(file_path, wfs_url, name, resp):

    with open(file_path, "rb") as f_in:
        file_contents = f_in.read()

    file_hash = hashlib.sha256(file_contents).hexdigest()

    metadata = {
        "hash": file_hash,
        "source": wfs_url,
        "request_status": resp.status_code,
        "unix_timestamp": int(time.time()),
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "xml_metadata": get_xml_metadata(name),
    }

    metadata_filename = name.replace(":","_") + ".metadata.json"

    with open(f"{data_dir}/{metadata_filename}", "w", encoding="utf-8") as meta_file:
        json.dump(metadata, meta_file, indent=4, ensure_ascii=False)

In [None]:
def download_layer(layer_name, format="application/json"):
    wfs_url = f"{input_url}?service=WFS&version=1.0.0&request=GetFeature&typeName={layer_name}&outputFormat={format}"
    r = requests.get(wfs_url)
    name = layer_name.replace(":","_")
    if r.status_code == 200:
        file_path = os.path.join(data_dir, f"{name}.geojson")
        with open(file_path, "wb") as f_out:
            f_out.write(r.content)

        create_metadata(file_path, input_url, layer_name, r)
        print(f"Downloaded and saved {layer_name} to {file_path}")
    else:
        print(f"Failed to download {layer_name}: {r.status_code}")

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed


def process_layer(name, format="application/json"):
    print(f"Descargando: {name}")
    try:
        download_layer(name, format)
    except json.decoder.JSONDecodeError:
        if format == "application/json":
            print(
                f"⚠️ Error de decodificación JSON para {name}, reintentando como GeoJSON"
            )
            download_layer(name, format="geojson")
            return
        else:
            print(f"❌ Error de decodificación JSON para {name}, no se pudo descargar")
    except Exception as e:
        print(f"⚠️ Excepción con {name}: {e}")


with ThreadPoolExecutor() as executor:
    results = {executor.submit(process_layer, name): name for name in layer_names}
    for future in as_completed(results):
        pass  # Opcional: usa future.result() para recoger resultados si hace falta