In [None]:
from getpass import getpass
from owslib.csw import CatalogueServiceWeb as CSW
from owslib.ows import BoundingBox
from pyproj import CRS, Transformer
from shapely.geometry import box, Polygon
from shapely.ops import transform

import pandas as pd
import geopandas as gpd
import logging

In [None]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [None]:
# Deixar os campos em branco para não usar autenticação
auth = dict(
    username=getpass(prompt="Digite o usuário") or None,
    password=getpass(prompt="Digite a senha") or None
)

In [None]:
csw  = CSW("https://geoservicos.sgb.gov.br/geonetwork/srv/eng/csw", **auth)

## Get results

In [None]:
start = 1
page_size = 100

results = []


while True:
    csw.getrecords2(startposition=start, maxrecords=page_size, esn="full")  # ESN=full traz os bbox
    start = csw.results.get("nextrecord")
    logging.info(csw.results)

    for key, record in csw.records.items():
        row = {prop: getattr(record, prop) for prop in dir(record) if not prop.startswith("_")}
        results.append(row)
    
    if not start:
        logging.info("Fetch results done")
        break


def bbox_to_wgs84(minx, miny, maxx, maxy, crs):    
    # try:
    bbox = box(minx, miny, maxx, maxy)
    wgs84 = 'EPSG:4326'

    if crs and crs != wgs84:            
        proj_src = CRS(crs)
        proj_dst = CRS(wgs84)

        project = Transformer.from_crs(proj_src, proj_dst, always_xy=True).transform
        bbox = transform(project, bbox)            

    return bbox
    
    # except:
    #     return None
    

# Convert to GeoDataFrame
metadata = gpd.GeoDataFrame(
    pd.DataFrame(results)
        .set_index("identifier")
        .assign(
            bbox_crs = lambda df: df.bbox.apply(lambda val: val.crs.code)
                .str.replace("[\s\W]+", "", regex=True)     # Clear spaces and non-words
                .str.replace("1984", "84", regex=False)     # Normalize 1984 to 84 
                .str.replace("^4326$", "WGS84", regex=True) # Replace EPSG codes to WGS84
                .str.replace("^SouthAmericanDatum", "", regex=True) # Replace EPSG codes to WGS84
                .replace({
                    "WGS84": "EPSG:4326",
                    "SIRGAS2000": "EPSG:4674",
                    "SAD69": "EPSG:4291",
                    "CórregoAlegre": "EPSG:4225",
                })
                .fillna("EPSG:4326"),
            bbox = lambda df: df.bbox.apply(
                lambda val: tuple([float(num) for num in (val.minx, val.miny, val.maxx, val.maxy)])
            ),
            bbox_wgs84 = lambda df: df.apply(
                lambda val: bbox_to_wgs84(crs=val.bbox_crs, *val.bbox), 
                axis="columns"
            )
        )
        .pipe(
            lambda df: df.reindex(columns=sorted(df.columns))
        ),
    geometry="bbox_wgs84",
    crs="EPSG:4326"
)

metadata.to_parquet("metadata-geo.parquet")
metadata.info()

In [None]:
metadata.sample(5)