# PAD-US v4

U.S. Geological Survey (USGS) Gap Analysis Project (GAP), 2024, Protected Areas Database of the United States (PAD-US) 4.0: U.S. Geological Survey data release, https://doi.org/10.5066/P96WBCHS. 




In [None]:
# pip install git+https://github.com/boettiger-lab/cng-python.git


In [1]:
import ibis
from ibis import _
con = ibis.duckdb.connect("tmp", extensions = ["spatial"])

# s3-write permissions
from cng.utils import set_secrets, ST_MakeValid, to_geojson, to_pmtiles, s3_cp
import streamlit as st
set_secrets(con, st.secrets["MINIO_KEY"], st.secrets["MINIO_SECRET"])

#con.raw_sql("SET MEMORY_LIMIT='10G'")

In [3]:
from pathlib import Path
zip = "PADUS4_0_Geodatabase.zip"

if Path(zip).exists():
    import zipfile
    with zipfile.ZipFile(zip, 'r') as zip_ref:
        zip_ref.extractall()


In [4]:
#con.sql(f"select * from st_read_meta('{gdb}')").execute()  # no metadata?
# using duckdb + try_cast doesn't work either.
gdb = "PADUS4_0_Geodatabase.gdb"
layer = "PADUS4_0Combined_Proclamation_Marine_Fee_Designation_Easement"

fgb = "pad-us-4.fgb"

if Path(gdb).exists():
    ## UGH, duckdb still complains 'Geometry type 11 not supported'
    import geopandas
    gdf = geopandas.read_file(pad_us_4_gdb,
                layer = layer,
                driver = "pygrio")
    gdf.to_file()
    crs = gdf.crs.to_string()

In [6]:
# Sadly cannot do it this way
# con.read_geo(gdb, layer = layer, keep_wkb = True).mutate(geom = SHAPE.try_cast("geometry")).to_parquet('test.parquet')


In [8]:
# Geometry type 11 not supported
# SELECT TRY_CAST(wkb_geometry AS GEOMETRY) FROM st_read('some_dataset.fgb', keep_wkb := true);

# use fgb from geopandas instead

if Path(fgb).exists():
    (
        con
        .read_geo("pad-us-4.fgb")
        .mutate(geom = _.geom.convert('ESRI:102039', 'EPSG:4326'))
        .filter((_.FeatClass.isin(["Easement", "Fee"])) | (
            (_.FeatClass == "Proclamation") & (_.Mang_Name == "TRIB"))
            )
        .mutate(geom = ST_MakeValid(_.geom), 
                row_n = ibis.row_number())
        .to_parquet('s3://public-biodiversity/pad-us-4/pad-us-4.parquet')
    )


In [6]:
# crosses the antimerdian, creates issues.   
# better solution https://maplibre.org/maplibre-gl-js/docs/examples/line-across-180th-meridian/
#  https://github.com/gadomski/antimeridian
dropme = "Papahanaumokuakea Marine National Monument"
s3 = 's3://public-biodiversity/pad-us-4/pad-us-4.parquet'
parquet = con.read_parquet(s3)
parquet.filter(_.Unit_Nm != dropme).to_parquet(s3)


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [5]:
# optionally select specific columns to include/exclude
parquet = con.read_parquet('s3://public-biodiversity/pad-us-4/pad-us-4.parquet')

to_geojson(parquet, "pad-us-4.geojson", con = con)
pmtiles = to_pmtiles("pad-us-4.geojson", "pad-us-4.pmtiles")

# upload, since pmtiles cannot stream to/from buckets
s3_cp('pad-us-4.pmtiles', "s3://public-biodiversity/pad-us-4/pad-us-4.pmtiles", "minio", key = st.secrets["MINIO_KEY"], secret = st.secrets["MINIO_SECRET"] )
# Note the layer name for pmtiles: 'padus4'


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

For layer 0, using name "padus4"
633636 features, 607180492 bytes of geometry and attributes, 45832193 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes
tile 1/0/0 size is 881403 (probably really 881403) with detail 12, >500000    
Going to try keeping the sparsest 45.38% of the features to make it fit
tile 1/0/0 size is 826138 (probably really 826138) with detail 12, >500000    
Going to try keeping the sparsest 21.97% of the features to make it fit
tile 1/0/0 size is 737574 (probably really 737574) with detail 12, >500000    
Going to try keeping the sparsest 11.92% of the features to make it fit
tile 1/0/0 size is 633799 (probably really 633799) with detail 12, >500000    
Going to try keeping the sparsest 7.52% of the features to make it fit
tile 1/0/0 size is 534715 (probably really 534715) with detail 12, >500000    
Going to try keeping the sparsest 5.63% of the features to make it fit
tile 2/0/1 size is 1361070 (probably really 1361070) with detail 12, >500000    
Goi

Successfully generated PMTiles file: pad-us-4.pmtiles


In [7]:
parquet

In [2]:
# We can also compute intersections

"https://data.source.coop/cboettig/social-vulnerability/2022/SVI2022_US_tract.parquet"
svi = "s3://public-data/social-vulnerability/2022/SVI2022_US_tract.parquet" # faster with local

t1 = con.read_parquet(svi).select(_.ST_ABBR, _.STATE, _.COUNTY, _.FIPS, _.RPL_THEMES,  _.Shape, _.Shape_Area).rename(geom = "Shape")
t2 = con.read_parquet('s3://public-biodiversity/pad-us-4/pad-us-4.parquet').select(_.Unit_Nm, _.geom)

In [None]:
(t1
 .left_join(t2, t1.geom.intersects(t2.geom))
 .to_parquet("s3://public-biodiversity/pad-us-4/pad-by-tract.parquet")
)

In [24]:
(t1
 .right_join(t2, t1.geom.intersects(t2.geom))
 .to_parquet("s3://public-biodiversity/pad-us-4/tract-by-pad.parquet")
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CPU times: user 18h 32min 3s, sys: 21.7 s, total: 18h 32min 24s
Wall time: 9h 16min 32s


In [28]:
t1.count().execute(), t2.count().execute()

(84120, 414767)

In [9]:
# use ST number to get congressional districts
# https://www2.census.gov/geo/tiger/TIGER2024/CD/tl_2024_56_cd119.zip