# Census: Downloading subdivisions data
---

I used these links and copied the url from downloading the zip file. It's the same format for each state so you only need to download it once to get the url.



In [1]:
import os
import io
import requests
import zipfile
import tempfile
import shutil
import geopandas as gpd
import ibis
from ibis import _
from cng.utils import *
from cng.h3 import *

duckdb_install_h3()
con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
con.raw_sql("SET THREADS=100;")
set_secrets(con)

bucket = "public-census"
s3_prefix = "2025/cousub"

#get fips code for each state
fips_url = 'https://www2.census.gov/geo/docs/reference/codes2020/national_state2020.txt'
fips_codes = con.read_csv(fips_url).filter(_.STATE.notin(['AS','GU','MP','PR','UM','VI'])).select("STATEFP").execute().values.flatten().tolist()
state_names = con.read_csv(fips_url).filter(_.STATE.notin(['AS','GU','MP','PR','UM','VI'])).select('STATE').execute().values.flatten().tolist()


In [10]:
def shape_to_parquet(fips_codes, state_names, s3_prefix, bucket):
    '''
    download zip/shape file -> convert to parquet -> save to minio 
    '''
    for fips, state_name in zip(fips_codes,state_names):

        # 1 shape file per state, need to get each one with fips code 
        url = f"https://www2.census.gov/geo/tiger/TIGER2025/COUSUB/tl_2025_{fips}_cousub.zip"
        print(f"Processing state {state_name}: {url}")
    
        # download zip 
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to download for state {state_name}")
            continue
    
        zip_bytes = io.BytesIO(response.content)
    
        # extract zip in temp directory 
        with tempfile.TemporaryDirectory() as temp_dir:
            with zipfile.ZipFile(zip_bytes) as zf:
                shapefile_prefix = f"tl_2025_{fips}_cousub"
                shp_name = f"{shapefile_prefix}.shp"

                if shp_name not in zf.namelist():
                    print(f"Shapefile not found for {state_name}")
                    continue

                zf.extractall(temp_dir)

             # these towns are in landvote and need to be manually changed to match with census spatial data  
            replace_vals = {'Amesbury Town city': 'Amesbury city',
                           'Watertown Town city': 'Watertown city',
                           'Easthampton Town city':'Easthampton city',
                           'Methuen Town city': 'Methuen city'}

            county_vals = {'St. Louis city':'St. Louis County'}
            
            # adding the counties to the places 
            places_url = "https://www2.census.gov/geo/docs/reference/codes2020/national_cousub2020.txt"
            counties = (con.read_csv(places_url)
                .rename(state='STATE', county='COUNTYNAME', cousubname='COUSUBNAME')
                .mutate(cousubname = _.cousubname.substitute(value = replace_vals,else_= _.cousubname))
                .mutate(county = _.county.substitute(value = county_vals,else_= _.county))
                .select('state','county','cousubname')
            )
    
            shp_path = os.path.join(temp_dir, shp_name)
            gdf = (con.read_geo(shp_path)
                .rename(name = 'NAME',geometry='geom',cousubname='NAMELSAD')
                .cast({'geometry':'geometry'})
                .mutate(geometry =_.geometry.convert('EPSG:4269','EPSG:4326'))
                .join(counties,'cousubname', how = 'inner')
                  )

        # convert to parquet
            parquet_name = f"{shapefile_prefix}.parquet"
            parquet_path = f"s3://{bucket}/{s3_prefix}/{parquet_name}"
            gdf.to_parquet(parquet_path)
            # ibis isn't good at crs, so use gpd 
            # d = gpd.read_file(shp_path)
            # print(d.crs)
        
    return 

In [11]:
#convert places
shape_to_parquet(fips_codes, state_names, s3_prefix, bucket)

Processing state MO: https://www2.census.gov/geo/tiger/TIGER2025/COUSUB/tl_2025_29_cousub.zip
