In [None]:
import os
import io
import requests
import zipfile
import geopandas as gpd
import ibis
from ibis import _
from cng.utils import *
from cng.h3 import *
from minio import Minio

con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
endpoint = os.getenv("AWS_S3_ENDPOINT", "minio.carlboettiger.info")
set_secrets(con)

access_key = os.getenv("MINIO_KEY")
secret_key = os.getenv("MINIO_SECRET")

client = Minio(
    "minio.carlboettiger.info",
    access_key=access_key,
    secret_key=secret_key,
    secure=True
)

duckdb_install_h3()

# usage: t.mutate(geom_valid = ST_MakeValid(t.geom))
@ibis.udf.scalar.builtin
def ST_MakeValid(geom) -> dt.geometry:
 ...

In [None]:
import requests
import zipfile
import os

def download(url, zip_path, out_dir, layer):
    print("Downloading...")
    r = requests.get(url, stream=True)
    with open(zip_path, "wb") as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)
    print("Download complete!")
    
    # Make sure output folder exists
    os.makedirs(out_dir, exist_ok=True)
    
    # Extract ZIP
    print("Extracting...")
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall(out_dir)
    print(f"Extraction complete! Files are in: {out_dir}")

In [None]:
import fiona 
def list_layers(gdb):
    layers = fiona.listlayers(gdb)
    for layer in layers:
        print(layer)

In [None]:
import geopandas as gpd
def gdb_to_parquet(gdb,layer, parquet):
    gdf = (con.read_geo(gdb, layer=layer)
        .rename(geometry = "Shape")
        .filter(_.FID_VectorAnalysisSelfUni1 != 1152569) #dropping problematic goem 
        .mutate(geometry = _.geometry.convert('ESRI:102039', 'EPSG:4326'),
                row_n = ibis.row_number()))
    gdf.to_parquet(parquet) 


# pad US 4.1

In [None]:
#url isn't working anymore
pad_41_url = "https://prod-is-usgs-sb-prod-content.s3.us-west-2.amazonaws.com/6759b69fd34edfeb8710a3ea/PADUS4_1VectorAnalysis_OtherExtents.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20251113T223956Z&X-Amz-SignedHeaders=host&X-Amz-Expires=86399&X-Amz-Credential=AKIAI7K4IX6D4QLARINA%2F20251113%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Signature=631e010af16362f8670dfae0c2f114c3e2d22b84aeaaf86064e8ec0c21f102f5"
# Output paths
zip_path = "PADUS4_1VectorAnalysis_PADUS_Only.zip"
out_dir = "PADUS4_1VectorAnalysis_PADUS_Only"

# download(pad_41_url, zip_path, out_dir)

In [None]:
# save zip to s3
folder = 'pad-us-4_1'
subfolder= 'no_overlap'
zip_url = f'{folder}/{subfolder}/{zip_path}'
bucket_name = 'public-biodiversity'
# client.fput_object(bucket_name,zip_url, zip_path)

In [None]:
# get layers
gdb = f'{out_dir}/{out_dir}.gdb'
list_layers(gdb)

In [None]:
# convert to parquet, save to s3
layer='PADUS4_1VectorAnalysis_PADUS_Only_Simp_SingP'
folder = 'pad-us-4_1'
parquet_file = 'pad-us-4_1_no_overlap.parquet'
parquet_url = f's3://public-biodiversity/{folder}/{subfolder}/{parquet_file}'
gdb_to_parquet(gdb,layer, parquet_url)

In [None]:
geobuf_file = 'pad-us-4_1_no_overlap.fgb'
geobuf_url = f's3://public-biodiversity/{folder}/{subfolder}/{geobuf_file}'
gdf = con.read_parquet(parquet_url).execute().set_crs('epsg:4326')
gdf.to_file(geobuf_file)
gdf.to_file(geobuf_url)

In [None]:
%%time
pmtiles_file = 'pad-us-4_1_no_overlap.pmtiles'
pmtiles = f'{folder}/{subfolder}/{pmtiles_file}'
source_layer_name = re.sub(r'\W+', '', os.path.splitext(os.path.basename(pmtiles_file))[0])

options =[f'--layer={source_layer_name}',
            '--extend-zooms-if-still-dropping',  
         ]
new_pmtiles = to_pmtiles(geobuf_file, pmtiles_file, options = options )
client.fput_object(bucket_name, pmtiles, new_pmtiles)

# visualize

In [None]:
import leafmap.maplibregl as leafmap
m = leafmap.Map(style="positron")
pmtiles_file = 'pad-us-4_1_no_overlap.pmtiles'
folder = 'pad-us-4_1'
pmtiles = f'https://minio.carlboettiger.info/public-biodiversity/{folder}/{subfolder}/{pmtiles_file}'
m.add_pmtiles(pmtiles)
m