# Preprocessing

In [None]:
from cng.utils import *
from cng.h3 import *
from ibis import _
import ibis.selectors as s
import os
from osgeo import gdal
from minio import Minio
import streamlit 
from datetime import timedelta
import geopandas as gpd
import re

# Get signed URLs to access license-controlled layers
key = st.secrets["MINIO_KEY"]
secret = st.secrets["MINIO_SECRET"]
client = Minio("minio.carlboettiger.info", key, secret)

con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
endpoint = os.getenv("AWS_S3_ENDPOINT", "minio.carlboettiger.info")
duckdb_install_h3()

set_secrets(con)

In [None]:
census_path = "s3://public-census/2024/"
state_file = census_path + 'state/2024_us_state.parquet'
county_file = census_path + 'county/2024_us_county.parquet'

state_h3_file = census_path + 'state/2024_us_state_h3_z8.parquet'
county_h3_file = census_path + 'county/2024_us_county_h3_z8.parquet'
city_h3_file = census_path + 'places_subdivisions/2024_us_places_subdivisions_h3_z8.parquet'

#### Converting data to hexes at zoom 8

In [None]:
def h3_from_geom(con, name, cols, save_path, zoom = 8):
    """
    Computes hexes directly from geometry.
    """
    cols = ", ".join(cols) if isinstance(cols, list) else cols
    con.raw_sql(f'''
    CREATE OR REPLACE TEMP TABLE t2 AS
    SELECT {cols},
           h3_polygon_wkt_to_cells_string(ST_Force2D(dump.geom), {zoom}) AS h{zoom}
    FROM (
        SELECT {cols}, UNNEST(ST_Dump(geom)) AS dump
        FROM {name}
    )
    ''')
    con.sql(f'''
        SELECT {cols}, UNNEST(h{zoom}) AS h{zoom},
        ST_GeomFromText(h3_cell_to_boundary_wkt(UNNEST(h{zoom}))) AS geom
        FROM t2
    ''').to_parquet(save_path)
    return 

# TPL Conservation Almanac

Hexing this data at zoom 8 level

In [None]:
tpl = client.get_presigned_url(
    "GET",
    "shared-tpl",
    "tpl.parquet",
    expires=timedelta(hours=2),
)

cols = ['fid', 'tpl_id', 'state_id', 'state', 'county', 'municipality',
        'site', 'acres', 'year', 'date', 'owner','owner_type','manager',
        'manager_type','purchase_type','easement','easement_type',
        'access_type','purpose_type','duration_type','data_provider',
        'data_source','source_date','data_aggregator','comments','amount',
        'program_id','program','sponsor_id','sponsor','sponsor_type','FIPS']

state_ids = con.read_parquet(state_file).drop('geom')

tpl_table = (con.read_parquet(tpl)
    .rename(tpl_id = "TPL_ID", state = "State", county = "County", municipality = "Municipality", site = "Site_Name",
            acres = "Reported_Acres", area = "Shape_Area", year = "Close_Year", date = "Close_Date", owner = "Owner_Name",
            owner_type = "Owner_Type", manager = "Manager_Name", manager_type = "Manager_Type",
            purchase_type = "Purchase_Type", easement = "EasementHolder_Name", easement_type = "EasementHolder_Type",
            access_type = "Public_Access_Type", purpose_type = "Purpose_Type", duration_type = "Duration_Type",
            data_provider = "Data_Provider", data_source = "Data_Source", source_date = "Source_Date",
            data_aggregator = "Data_Aggregator", comments = "Comments", amount = "Amount", program_id = 'Program_ID',
            program = 'Program_Name', sponsor_id = "Sponsor_ID", sponsor = "Sponsor_Name", sponsor_type = "Sponsor_Type")
    .mutate(geom = _.geom.convert("ESRI:102039", "EPSG:4326"))
    .inner_join(state_ids, 'state'))
    
con.create_table('tpl', tpl_table, overwrite=True)
# h3_from_geom(con, 'tpl', cols, save_path = 's3://shared-tpl/conservation_almanac/z8/tpl_h3_z8.parquet')

#### Generate PMTiles

In [None]:
tpl_table.to_parquet('s3://shared-tpl/conservation_almanac/tpl.parquet')
tpl_table.to_parquet('tpl_epsg4326.parquet') #local copy to use to_geojson
to_geojson('tpl_epsg4326.parquet', "tpl.geojson")
pmtiles = to_pmtiles("tpl.geojson", "tpl.pmtiles")
s3_cp('tpl.pmtiles', "s3://shared-tpl/conservation_almanac/tpl.pmtiles", "minio")

# Census

Getting polygons and FIPS codes from Census state, county, place, and subdivision data. 



#### State

In [None]:
url = "/vsizip//vsicurl/https://www2.census.gov/geo/tiger/TIGER2024/STATE/tl_2024_us_state.zip"
state = (con.read_geo(url)
            .mutate(geom = _.geom.convert('EPSG:4269','EPSG:4326'))
            .rename(FIPS = "GEOID", state_id = "STUSPS", name = "NAME")
            .select('FIPS','state_id','name','geom')
             )
state.to_parquet(state_file)

#get h3
con.read_parquet(state_file, table_name = 'state')
cols = ['state','state_id','FIPS']
h3_from_geom(con, 'state', cols, save_path = state_h3_file)

#### County

In [None]:
%%time
## CT counties changed to "planning regions" in 2022, so I'm grabbing older data to get the county boundaries 
url = "/vsizip//vsicurl/https://www2.census.gov/geo/tiger/TIGER2020/COUNTY/tl_2020_us_county.zip"
con.read_geo(url)
CT_counties = (con.read_geo(url)
            .mutate(geom = _.geom.convert('EPSG:4269','EPSG:4326'))
            .rename(FIPS = "GEOID", county = "NAMELSAD")
            .select('FIPS','STATEFP','county','geom')
            .filter(_.STATEFP == '09')
             )

# US counties 
url = "/vsizip//vsicurl/https://www2.census.gov/geo/tiger/TIGER2024/COUNTY/tl_2024_us_county.zip"
con.read_geo(url)
county = (con.read_geo(url)
            .mutate(geom = _.geom.convert('EPSG:4269','EPSG:4326'))
            .rename(FIPS = "GEOID", county = "NAMELSAD")
            .select('FIPS','STATEFP','county','geom')
            .union(CT_counties)
         ) 

#adding states to counties
state_ids = con.read_parquet(state_file).drop('geom')
county.inner_join(state_ids, [state_ids.FIPS == county.STATEFP]).select('FIPS','state_id','state','county','geom').to_parquet(county_file)

#get h3
con.read_parquet(county_file, table_name = 'county')
cols = ['state_id','state','county','FIPS']
# h3_from_geom(con, 'county', cols, save_path = county_h3_file)


#### Cities (places + subdivisions)

Note: Some cities are listed in both "Places" and "Subdivisions", so we will use `distinct()` to avoid duplicates.

In [None]:
match_pattern = r"(?i)\s*(city|town|village|charter|municipality|Borough)\b"
# match_pattern = r"(?i)(?<![a-z])(?:city|town|charter|municipality|[Bb]orough)(?![a-z])"

city_cols = ["state_id","county","FIPS","name",'city']

places_url = "https://www2.census.gov/geo/docs/reference/codes2020/national_place_by_county2020.txt"
places_fips = (con.read_csv(places_url)
               .rename(state_id = "STATE", county = "COUNTYNAME", city = "PLACENAME")
               .mutate(name=_.city.re_replace(match_pattern, "").strip())
               .mutate(FIPS = _.STATEFP + _.COUNTYFP)
               .select(city_cols))

subdivisions_url = "https://www2.census.gov/geo/docs/reference/codes2020/national_cousub2020.txt"
subdivisions_fips = (con.read_csv(subdivisions_url)
                     .rename(state_id = "STATE", county = "COUNTYNAME", city = "COUSUBNAME")
                     .mutate(name=_.city.re_replace(match_pattern, "").strip())
                     .mutate(FIPS = _.STATEFP + _.COUNTYFP)
                     .select(city_cols))

#get unique -> some cities are listed in both places and subdivisions
city_fips = places_fips.union(subdivisions_fips).distinct() 

#get h3 from counties 
county_h3 = con.read_parquet(county_h3_file)
city_fips.inner_join(county_h3, 'FIPS').select('FIPS','state_id','state','county','city','name','geom','h8').to_parquet(city_h3_file)


# Landvote

We want to join Landvote data with TPL Conservation Almanac, but Landvote doesn't have spatial data.

However, we can join Landvote with Census data to get FIPS codes and hexes. 
- First, need to split up landvote into its 3 jurisdictions: state, county, and municipals
- Join states with Census "states" to get state FIPS/hex
- Join counties with Census "counties" to get county FIPS/hex
- Join special districts with Census "places" and "subdivisions" to get county FIPS/hex
- Join municipals with Census "places" and "subdivisions" to get county FIPS/hex
- Then join all municipal, county, special district, and state data back together!



In [None]:
landvote_csv = client.get_presigned_url(
    "GET",
    "shared-tpl",
    "landvote/landvote_utf8.csv",
    expires=timedelta(hours=2),
)
collapse_spaces = r"\s+"
match_pattern = r"(?i)\b(city|town|charter|municipality|Borough)\b"
landvote_ = (con.read_csv(landvote_csv) #it skips the row with a unicode error 
            .rename(jurisdiction = "Jurisdiction Type", state_id = "State")
            .mutate(state_id = _.state_id.substitute({'Ore':'OR'}))
            .mutate(name=_['Jurisdiction Name'].re_replace(match_pattern, "").strip())
            .mutate(name=_.name.re_replace(collapse_spaces, " ").strip())
            .mutate(landvote_id=ibis.row_number().over(order_by=[_.state_id, _.jurisdiction, _.Date]))
            .mutate(_['Conservation Funds Approved'].replace('$', '')
                    .replace(',', '').cast('float').name('Conservation Funds Approved'))
            .mutate(year = _.Date.year())
            .rename(date = "Date", description = "Description", finance_mechanism = "Finance Mechanism",
                    other_comments = '"Other" Comment', purpose = "Purpose", total_funds_at_stake = "Total Funds at Stake",
                    conservation_funds_at_stake = "Conservation Funds at Stake", total_funds_approved = "Total Funds Approved",
                    conservation_funds_approved = "Conservation Funds Approved", passed = "Pass?", status = "Status", 
                    percent_yes = '% Yes', percent_no = '% No', notes = 'Notes', voted_acq_measure = "Voted Acq. Measure")
            )

#landvote_id is made with a window function, which can be a bit buggy, so it helps to materialize it after generating 
landvote_with_ids = landvote_.execute()  
landvote = con.create_table("landvote", landvote_with_ids, overwrite = True)

final_columns = ['landvote_id','FIPS',
    'state_id','state','county',
    'city','jurisdiction','year','date',
    'description','finance_mechanism',
    'other_comments','purpose',
    'total_funds_at_stake',
    'conservation_funds_at_stake',
    'total_funds_approved',
    'conservation_funds_approved',
    'passed','status','percent_yes','percent_no',
    'notes','voted_acq_measure',
    'geom','h8']

#### State level

In [None]:
state_z8 = con.read_parquet(state_h3_file)
states = (landvote.filter(_.jurisdiction == "State")
            .rename(state = "Jurisdiction Name")
            .mutate(county = ibis.literal('None'))
            .mutate(county_fips = ibis.literal('None'))
            .mutate(city = ibis.literal('None')))

landvote_state_z8 = (states.inner_join(state_z8, [states.state.upper() == state_z8.state.upper()])
                   .select(final_columns))

In [None]:
# getting non hex version 
state_geo = con.read_parquet(state_file)
landvote_state_geo = (states.inner_join(state_geo, [states.state.upper() == state_geo.state.upper()])).select(final_columns[:-1])

#### County level

In [None]:
county_match_pattern = r"(?i)(?:(\b[\w-]+(?:\s[\w-]+)*)\sCounty\b|of\s+([\w-]+(?:\s[\w-]+)*))"
county_vals = {'Columbus and Franklin County Metro Parks':'Franklin',
                ' Columbus and Franklin County Metro Parks':'Franklin',
                'Athens-Clarke County': 'Clarke',
                'City and County of San Francisco':'San Francisco',
                'Cleveland Metropolitan Park District':'Cuyahoga',
                'Denver City and County':'Denver',
                'East Baton Rouge Parish':'East Baton Rouge Parish',
                'Five Rivers MetroParks':'Montgomery',
                'Forest Preserve District of DuPage County':'DuPage',
                'Forest Preserve District of Kane County':'Kane',
                'Forest Preserves of Cook County':'Cook',
                'Great Parks of Hamilton County':'Hamilton',
                'Jacksonville':'Duval',
                'James City County': 'James City',
                'Johnny Appleseed Park District':'Allen',
                'Licking Park District':'Licking',
                'Matanuska-Susitna Borough':'Matanuska-Susitna Borough',
                'MetroParks of Butler County':'Butler',
                ' Metropolitan Park District of Toledo Area':'Lucas',
                'Metropolitan Park District of the Toledo Area':'Lucas',
                'Metropolitan Park District of Toledo Area':'Lucas',
                'Metropolitan Park District of Toledo Area ':'Lucas',
                'Park District of Ottawa County':'Ottawa',
                'Portage Park District':'Portage',
                'Preservation Park District of Delaware County':'Delaware',
                'Preservation Parks of Delaware County':'Delaware',
                'Santa Clara Valley Water District': 'Santa Clara',
                'St. Tammany Parish':'St. Tammany Parish',
                'Summit Metro Parks':'Summit'}

county_z8 = (con.read_parquet(county_h3_file)
            .mutate(name=_.county.re_extract(county_match_pattern, 1).strip())
            .mutate(name = _.county.substitute(value = county_vals,else_= _.name))
             )

counties = (landvote.filter(_.jurisdiction == "County")
            .rename(county = "Jurisdiction Name")
            .mutate(city = ibis.literal('None'))
            .mutate(name=_.name.re_extract(county_match_pattern, 1).strip())
            .mutate(name = _.county.substitute(value = county_vals,else_= _.name))
           )

landvote_county_z8 = (counties
    .inner_join(county_z8, [counties.name.upper() == county_z8.name.upper(), counties.state_id == county_z8.state_id])
    .select(final_columns)
                     )

In [None]:
# getting non hex version 
county_geo = (con.read_parquet(county_file)
                .mutate(name=_.county.re_extract(county_match_pattern, 1).strip())
                .mutate(name = _.county.substitute(value = county_vals,else_= _.name))
            )

landvote_county_geo = (counties.inner_join(county_geo, [counties.name.upper() == county_geo.name.upper(), 
                                                    counties.state_id == county_geo.state_id])
                   .select(final_columns[:-1])
                  )

#### Special District Level


In [None]:
sd_match_pattern = r"(?i)\b(city|town|CDP|CCD|village|charter|municipality|Borough|Park District|Authority|Basin|Mountains|2|1|District|Services|Special|Preservation|Assessment|Initiative|Open Space|Metro|Parks|Community|Recreation District)\b"
sd_z8 = (
    con.read_parquet(city_h3_file)
    .mutate(name=_.city.re_replace(sd_match_pattern, ""))
    .mutate(name=_.name.re_replace(collapse_spaces, " ").strip())
)

sd_vals = {'Tri-Lakes Park and Recreation District':'Monument',
           'Urban Drainage and Flood Control District':'Denver',
           'Blue Heron Recreation District':'Phoenix',
           'Mountains Recreation and Conservation Authority':'Santa Monica',
           'St. Helena Parish Recreation and Parks District':'Greensburg',
           'West Geauga Park and Recreation District':'Chardon',
           'Marin County Open Space District':'San Rafael',
          }

# filtering landvote to just special districts 
sd = (landvote.filter(_.jurisdiction == "Special District")
            .rename(city = "Jurisdiction Name")
            .mutate(name=_.name.re_replace(sd_match_pattern, ""))
            .mutate(name=_.name.re_replace(collapse_spaces, " ").strip())
            .mutate(name=_.city.substitute(value=sd_vals, else_=_.name))
     )

# detecting if a record has multiple counties listed in the notes field 
multiple_counties_ = (
    sd
    .filter(~_.notes.isnull())
    .filter( 
        (_.notes.contains("counties")) |
        (_.notes.contains("Counties")) |
        (_.notes.split("County").length()-1>1) 
    )
)

#extracting multiple counties from notes column 
@ibis.udf.scalar.python
def extract_counties_udf(note: str) -> list[str]:
    pattern = r"((?:[A-Z][a-zA-Z.\'-]*(?:\s+[A-Z][a-zA-Z.\'-]*)*)(?:,\s*)?(?:\s+and\s+)?)+(?=\s+(?:[Cc]ounty|[Cc]ounties))"
    p = re.compile(pattern)
    matches = [m.group(0) for m in p.finditer(note)]  # <-- Use finditer with group(0)
    counties = []
    for match in matches:
        parts = re.split(r',\s*|\s+and\s+', match)
        counties.extend(f"{part.strip()} County" for part in parts if part.strip())
    return counties

multiple_counties = (multiple_counties_
    .mutate(county_list=extract_counties_udf(_.notes))
    .unnest(["county_list"])
    .mutate(county=_.county_list)
    .drop("county_list")
)

multiple_counties_ids = multiple_counties.select('landvote_id').distinct().execute()['landvote_id'].to_list()

# Only has 1 county in the notes field
single_county_pattern = r'([A-Z][a-zA-Z]+(?:\s[A-Z][a-zA-Z]*)*\sCounty)\.?'
single_county = (sd
    .filter(~_.notes.isnull())
    .filter(_.landvote_id.notin(multiple_counties_ids))
    .mutate(county=_.notes.re_extract(single_county_pattern, 1).strip())
    .mutate(county=_.county.cases(
        ('',_.city.re_extract(single_county_pattern, 1).strip()),
        else_ = _.county))
    .filter(_.county != '')
)
single_county_ids= single_county.select('landvote_id').distinct().execute()['landvote_id'].to_list()

# Nothing in notes, need to join with census data to get county
manually_fill = (sd
    .filter(_.landvote_id.notin(multiple_counties_ids))
    .filter(_.landvote_id.notin(single_county_ids))
    .inner_join(sd_z8,[_.name.upper() == sd_z8.name.upper(),
                        _.state_id == sd_z8.state_id]) 
    .select(final_columns)
    .distinct()
)

sd_county_vals = {'Western Summit County':'Summit County'}
sd_with_counties = single_county.union(multiple_counties).mutate(county=_.county.substitute(value=sd_county_vals, else_=_.county))

#since we are joining on counties, there may be duplicate hexes because of the cities 
landvote_sd_z8 = (sd_with_counties
                .inner_join(county_z8.distinct(), [sd_with_counties.county.upper() == county_z8.county.upper(), 
                                                        sd_with_counties.state_id == county_z8.state_id])
                .select(final_columns)
                .union(manually_fill)
              )

In [None]:
sd = landvote_sd_z8.drop('h8','geom').distinct()
landvote_sd_geo = (sd.inner_join(county_geo,[sd.county.upper() == county_geo.county.upper(), sd.state_id == county_geo.state_id])
    .select(final_columns[:-1]))


#### Municipal level

Because there isn't a 1 to 1 match from municipals to Census data, we need to use both "Places" and "Subdivisons". 

In [None]:
municipal_vals = {
    "Addison": "Addison village",
    "Anderson Township Park District": "Anderson township",
    "Bainbridge Island Metropolitan Park & Recreation District": "Bainbridge Island",
    "Bainbridge Island Metropolitan Park and Recreation District ": "Bainbridge Island",
    "Bel-Ridge": "Bel-Ridge village",
    "Bend Park and Recreation District": "Bend",
    "Boardman Township Park District": "Boardman township",
    "Carney's Point Township": "Carneys Point township",
    "Castro Valley": "Castro Valley CDP",
    "Charter Township of Meridian": "Meridian township",
    "Charter Township of Oakland": "Oakland township",
    "Corrales": "Corrales village",
    "Dobbs Ferry": "Dobbs Ferry village",
    "Downers Grove Park District": "Downers Grove village",
    "Gates Mills": "Gates Mills village",
    "Glen Ellyn Park District": "Glen Ellyn village",
    "Hillsborough": "Hillsborough township",
    "Irvington": "Irvington village",
    "Lake Zurich": "Lake Zurich village",
    "Lake in the Hills": "Lake in the Hills village",
    "Libertyville": "Libertyville township",
    "Loch Arbor Village": "Loch Arbour Village",
    "Lockport Township Park District": "Lockport township",
    "Moapa": "Moapa CDP",
    "Nunda": "Nunda township",
    "Orland Park": "Orland Park village",
    "Park Ridge Recreation and Park District": "Park Ridge",
    "Peapack-Gladstone Borough": "Peapack and Gladstone",
    "Princeton Township": "Princeton",
    "Romeoville": "Romeoville village",
    "San Diego Open Space Park Facilities District No. 1": "San Diego",
    "Seattle Park District": "Seattle",
    "Stookey": "Stookey township",
    "Tarrytown": "Tarrytown village",
    "Tofte": "Tofte township",
    "Village of Corrales": "Corrales village",
    "Village of Lake Barrington": "Lake Barrington village",
    "Village of Los Ranchos de Albuquerque": "Los Ranchos de Albuquerque village",
    "West Paterson Borough": "Woodland Park",
    "Westampton": "Westampton township",
    "Willamalane Park and Recreation District": "Springfield",
    "Wilmette Park District": "Wilmette village",  
}
collapse_spaces = r"\s+"
city_z8 = (
    con.read_parquet(city_h3_file)
    .mutate(name=_.city.re_replace(match_pattern, ""))
    .mutate(name=_.name.re_replace(collapse_spaces, " ").strip())
)

# filter to only ciites
municipals = (landvote.filter(_.jurisdiction == "Municipal")
            .rename(city = "Jurisdiction Name")
            .mutate(name=_.name.re_replace(collapse_spaces, " ").strip())
            .mutate(name = _.city.substitute(value = municipal_vals, else_= _.name))
             )

# join with census data 
city_joined = (municipals.inner_join(city_z8, [municipals.name.upper() == city_z8.name.upper(), 
                                                municipals.state_id == city_z8.state_id]).select(final_columns))

# handling cities with multiple counties
dupes = city_joined.drop('h8','geom').distinct().group_by("landvote_id").agg(county_count = _.count()).filter(_.county_count > 1)
duplicate_ids = dupes.execute()['landvote_id'].to_list()

# 105 that are already filled in, manually scraping the counties from the notes 
pattern = r'^\s*([A-Z][a-z]+(?:\s[A-Z][a-z]+)*)\s(?:County|Co)\.?\s*$'
counties_filled = (municipals.filter(_.landvote_id.isin(duplicate_ids))
                    .filter(~_.notes.isnull())
                    .mutate(county=_.notes.re_extract(pattern, 1).strip()+ ibis.literal(' County'))
                    .filter(_.county !=' County')
                  )

# since we added the county, join it with the rest of the census data 
counties_filled_join = (counties_filled
    .inner_join(city_z8,[counties_filled.name.upper() == city_z8.name.upper(),
                          counties_filled.county.upper() == city_z8.county.upper(), 
                          counties_filled.state_id == city_z8.state_id])
    .select(final_columns))

counties_filled_ids = counties_filled_join.select('landvote_id').distinct().execute()['landvote_id'].to_list()

# join with the rest of the municipal data
landvote_city_z8 = city_joined.filter(~_.landvote_id.isin(counties_filled_ids)).union(counties_filled_join).distinct()

In [None]:
match_pattern = r"(?i)\b(city|town|charter|municipality|[Bb]orough)\b"

city_geo = (city_fips.inner_join(county_geo, 'FIPS').select(~s.endswith('_right')).drop('name')
    .mutate(name=_.city.re_replace(match_pattern, ""))
    .mutate(name=_.name.re_replace(collapse_spaces, " ").strip()))

municipals_counties = (counties_filled
    .mutate(name=_.city.re_replace(match_pattern, ""))
    .mutate(name=_.name.re_replace(collapse_spaces, " ").strip())
    .mutate(name = _.city.substitute(value = municipal_vals, else_= _.name))
    .inner_join(city_geo,[_.name.upper() == city_geo.name.upper(),
                          _.county.upper() == city_geo.county.upper(), 
                          _.state_id == city_geo.state_id])
    .select(final_columns[:-1])
                      )

other_municipals = (municipals.filter(~_.landvote_id.isin(counties_filled_ids))
    .mutate(name=_.city.re_replace(match_pattern, ""))
    .mutate(name=_.name.re_replace(collapse_spaces, " ").strip())
    .mutate(name = _.city.substitute(value = municipal_vals, else_= _.name))
    .inner_join(city_geo,[_.name.upper() == city_geo.name.upper(),_.state_id == city_geo.state_id])
    .select(final_columns[:-1]))

landvote_city_geo = municipals_counties.union(other_municipals).distinct() 

#### Joining all the landvote data with census
Note: `landvote_joined` has more unique rows than `landvote` because some cities/special districts span multiple counties. Each additional county creates a new row.

In [None]:
landvote_joined_z8 = landvote_city_z8.union(landvote_county_z8).union(landvote_sd_z8).union(landvote_state_z8)
landvote_joined_z8.to_parquet("s3://shared-tpl/landvote/z8/landvote_h3_z8.parquet")


# and non-hex version 
landvote_joined_geo = landvote_city_geo.union(landvote_county_geo).union(landvote_sd_geo).union(landvote_state_geo)
landvote_joined_geo.to_parquet("s3://shared-tpl/landvote/landvote_geom.parquet")

#### Generate PMTiles

In [None]:
parquet = client.get_presigned_url(
    "GET",
    "shared-tpl",
    "landvote/landvote_geom.parquet",
    expires=timedelta(hours=2),
)
to_geojson(parquet, "landvote_geom.geojson")
pmtiles = to_pmtiles("landvote_geom.geojson", "landvote_geom.pmtiles")
s3_cp('landvote_geom.pmtiles', "s3://shared-tpl/landvote/landvote_geom.pmtiles", "minio")