# PAD-US v4

U.S. Geological Survey (USGS) Gap Analysis Project (GAP), 2024, Protected Areas Database of the United States (PAD-US) 4.0: U.S. Geological Survey data release, https://doi.org/10.5066/P96WBCHS. 




In [5]:
import ibis
from ibis import _
con = ibis.duckdb.connect(extensions = ["spatial"])

# s3-write permissions
from cng.utils import set_secrets
import streamlit as st
set_secrets(con, st.secrets["MINIO_KEY"], st.secrets["MINIO_SECRET"])



In [None]:
import zipfile
zip = "../PADUS4_0_Geodatabase.zip"
with zipfile.ZipFile(zip, 'r') as zip_ref:
    zip_ref.extractall()


In [17]:
#con.sql(f"select * from st_read_meta('{gdb}')").execute()  # no metadata?
# using duckdb + try_cast doesn't work either.
gdb = "../PADUS4_0_Geodatabase.gdb"

## UGH, duckdb still complains 'Geometry type 11 not supported'
import geopandas
gdf = geopandas.read_file(pad_us_4_gdb,
              layer = "PADUS4_0Combined_Proclamation_Marine_Fee_Designation_Easement",
              driver = "pygrio")
gdf.to_file("pad-us-4.fgb")
crs = gdf.crs.to_string()

In [17]:


layer = "PADUS4_0Combined_Proclamation_Marine_Fee_Designation_Easement"
con.sql(f'''
SELECT TRY_CAST(SHAPE AS GEOMETRY)
  FROM st_read('{gdb}', keep_wkb := true, layer = '{layer}')
''').to_parquet('test.parquet')

NotImplementedException: Not implemented Error: WKB Reader: Geometry type 11 not supported

In [None]:
# Geometry type 11 not supported
# SELECT TRY_CAST(wkb_geometry AS GEOMETRY) FROM st_read('some_dataset.fgb', keep_wkb := true);


gpd = (
    con
    .read_geo("pad-us-4.fgb")
    .mutate(geom = _.geom.convert('ESRI:102039', 'EPSG:4326'))
    .filter((_.FeatClass.isin(["Easement", "Fee"])) | (
           (_.FeatClass == "Proclamation") & (_.Mang_Name == "TRIB"))
           )
    .to_parquet('s3://public-biodiversity/pad-us-4/pad-us-4.parquet')
)


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [None]:


"https://data.source.coop/cboettig/social-vulnerability/2022/SVI2022_US_tract.parquet"
svi = "s3://public-data/social-vulnerability/2022/SVI2022_US_tract.parquet" # faster with local
t1 = con.read_parquet(svi, "svi").select(_.ST_ABBR, _.STATE, FIPS, _.RPL_THEMES,  _.Shape, _.Shape_Area).rename(geom = "Shape")
t2 = con.read_parquet('s3://public-biodiversity/pad-us-4/pad-us-4.parquet').select(, _Unit_Nm, _.geom)

In [23]:
(t1
 .left_join(t2, t1.geom.intersects(t2.geom))
 .to_parquet("s3://public-biodiversity/pad-us-4/pad-by-tract.parquet")
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CPU times: user 18h 50min 39s, sys: 5min 48s, total: 18h 56min 27s
Wall time: 9h 28min 22s


In [24]:
(t1
 .right_join(t2, t1.geom.intersects(t2.geom))
 .to_parquet("s3://public-biodiversity/pad-us-4/tract-by-pad.parquet")
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CPU times: user 18h 32min 3s, sys: 21.7 s, total: 18h 32min 24s
Wall time: 9h 16min 32s


In [28]:
t1.count().execute(), t2.count().execute()

(84120, 414767)

In [9]:
# use ST number to get congressional districts
# https://www2.census.gov/geo/tiger/TIGER2024/CD/tl_2024_56_cd119.zip