# Census: Downloading places data
---




In [2]:
import os
import io
import requests
import zipfile
import tempfile
import shutil
import geopandas as gpd
import ibis
from ibis import _
from cng.utils import *
from cng.h3 import *

duckdb_install_h3()
con = ibis.duckdb.connect(extensions = ["spatial", "h3"])
con.raw_sql("SET THREADS=100;")
set_secrets(con)

bucket = "public-census"
s3_prefix = "2025/places"

#get fips code for each state
fips_url = 'https://www2.census.gov/geo/docs/reference/codes2020/national_state2020.txt'
fips_codes = con.read_csv(fips_url).filter(_.STATE.notin(['AS','GU','MP','PR','UM','VI'])).select("STATEFP").execute().values.flatten().tolist()
state_names = con.read_csv(fips_url).filter(_.STATE.notin(['AS','GU','MP','PR','UM','VI'])).select('STATE').execute().values.flatten().tolist()


In [3]:
def shape_to_parquet(fips_codes, state_names, s3_prefix, bucket):
    '''
    download zip/shape file -> convert to parquet -> save to minio 
    '''
    for fips, state in zip(fips_codes,state_names):

        # 1 shape file per state, need to get each one with fips code 
        url = f"https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_{fips}_place.zip"
        print(f"Processing state {state}: {url}")
    
        # download zip 
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to download for state {state}")
            continue
    
        zip_bytes = io.BytesIO(response.content)
    
        # extract zip in temp directory 
        with tempfile.TemporaryDirectory() as temp_dir:
            with zipfile.ZipFile(zip_bytes) as zf:
                shapefile_prefix = f"tl_2025_{fips}_place"
                shp_name = f"{shapefile_prefix}.shp"

                if shp_name not in zf.namelist():
                    print(f"Shapefile not found for {state}")
                    continue

                zf.extractall(temp_dir)

            # adding the counties to the places 
            places_url = "https://www2.census.gov/geo/docs/reference/codes2020/national_place_by_county2020.txt"
            counties = (con.read_csv(places_url)
                .rename(state='STATE', county='COUNTYNAME', placename='PLACENAME')
                .mutate(placename=
                .select('state','county','placename')
            )
    
            shp_path = os.path.join(temp_dir, shp_name)
            gdf = (con.read_geo(shp_path)
                .rename(name = 'NAME',geometry='geom',placename='NAMELSAD')
                # .mutate(state=ibis.literal(state))
                .cast({'geometry':'geometry'})
                .mutate(geometry =_.geometry.convert('EPSG:4269','EPSG:4326'))
                .join(counties,'placename', how = 'left')
                  )

        # convert to parquet
            parquet_name = f"{shapefile_prefix}.parquet"
            parquet_path = f"s3://{bucket}/{s3_prefix}/{parquet_name}"
            gdf.to_parquet(parquet_path)
            # ibis isn't good at crs, so use gpd 
            # d = gpd.read_file(shp_path)
            # print(d.crs)
        
    return 

In [4]:
#convert places
shape_to_parquet(fips_codes, state_names, s3_prefix, bucket)

Processing state AL: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_01_place.zip
Processing state AK: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_02_place.zip
Processing state AZ: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_04_place.zip
Processing state AR: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_05_place.zip
Processing state CA: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_06_place.zip
Processing state CO: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_08_place.zip
Processing state CT: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_09_place.zip
Processing state DE: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_10_place.zip
Processing state DC: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_11_place.zip
Processing state FL: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_12_place.zip
Processing state GA: https://www2.census.gov/geo/tiger/TIGER2025/PLACE/tl_2025_1

In [None]:
cities_url = 's3://public-census/2025/places/**'
con.read_parquet(cities_url).head().execute()