In [2]:
# !pip install censusgeocode

In [3]:
import glob
import json
import requests
import pandas as pd
from pprint import pprint



# Census Examples 

This notebook uses the `censusgeocode` package in Python (which is simply a wrapper around the US Census' official Geocoder API) to get census geographies for list of addresses or lat/longs

- https://pypi.org/project/censusgeocode/

### Step 1 | Grab your data at the address level

In [5]:
df = pd.read_csv('311_heat_hotwater.csv')
df

  df = pd.read_csv('311_heat_hotwater.csv')


Unnamed: 0,Unique Key,Created Date,Closed Date,Agency,Agency Name,Complaint Type,Descriptor,Location Type,Incident Zip,Incident Address,...,Bridge Highway Segment,Latitude,Longitude,Location,Zip Codes,Community Districts,Borough Boundaries,City Council Districts,Police Precincts,Police Precinct
0,60551398,03/11/2024 11:59:51 PM,,HPD,Department of Housing Preservation and Develop...,HEAT/HOT WATER,APARTMENT ONLY,RESIDENTIAL BUILDING,11233.0,2140 FULTON STREET,...,,40.678309,-73.911508,"(40.67830884414116, -73.91150841210155)",13516.0,55.0,2.0,17.0,46.0,46.0
1,60550029,03/11/2024 11:59:30 PM,,HPD,Department of Housing Preservation and Develop...,HEAT/HOT WATER,APARTMENT ONLY,RESIDENTIAL BUILDING,11106.0,31-35 CRESCENT STREET,...,,40.764684,-73.927184,"(40.76468368198577, -73.92718359841146)",16863.0,39.0,3.0,4.0,72.0,72.0
2,60555358,03/11/2024 11:59:09 PM,,HPD,Department of Housing Preservation and Develop...,HEAT/HOT WATER,APARTMENT ONLY,RESIDENTIAL BUILDING,10473.0,1965 LAFAYETTE AVENUE,...,,40.822300,-73.856192,"(40.82230025720139, -73.85619211834099)",11611.0,58.0,5.0,31.0,26.0,26.0
3,60554001,03/11/2024 11:58:09 PM,,HPD,Department of Housing Preservation and Develop...,HEAT/HOT WATER,APARTMENT ONLY,RESIDENTIAL BUILDING,10459.0,919 PROSPECT AVENUE,...,,40.821710,-73.900789,"(40.821710476862634, -73.90078928961124)",10937.0,34.0,5.0,43.0,25.0,25.0
4,60548718,03/11/2024 11:56:53 PM,,HPD,Department of Housing Preservation and Develop...,HEAT/HOT WATER,ENTIRE BUILDING,RESIDENTIAL BUILDING,11414.0,84-10 153 AVENUE,...,,40.666377,-73.849975,"(40.66637663749487, -73.84997505769617)",15314.0,62.0,3.0,41.0,64.0,64.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196536,58981137,10/01/2023 12:11:25 AM,10/02/2023 02:02:34 AM,HPD,Department of Housing Preservation and Develop...,HEAT/HOT WATER,ENTIRE BUILDING,RESIDENTIAL BUILDING,10032.0,76 ST NICHOLAS PLACE,...,,40.829462,-73.940797,"(40.829461589531974, -73.94079691863257)",12428.0,37.0,4.0,36.0,19.0,19.0
196537,59439994,10/01/2023 12:10:52 AM,10/03/2023 12:00:00 AM,HPD,Department of Housing Preservation and Develop...,Heat/Hot Water,Apartment Only,Apartment,11221.0,441 WILSON AVENUE,...,,40.693805,-73.913590,"(40.693804667081345, -73.91359021281768)",18181.0,42.0,2.0,37.0,53.0,53.0
196538,58986035,10/01/2023 12:10:09 AM,10/01/2023 08:03:34 PM,HPD,Department of Housing Preservation and Develop...,HEAT/HOT WATER,ENTIRE BUILDING,RESIDENTIAL BUILDING,10452.0,15 CLARKE PLACE EAST,...,,40.838102,-73.918274,"(40.83810190786086, -73.91827438644238)",10930.0,50.0,5.0,42.0,27.0,27.0
196539,59436693,10/01/2023 12:09:48 AM,10/01/2023 12:00:00 AM,HPD,Department of Housing Preservation and Develop...,Heat/Hot Water,Entire Building,Building-Wide,10452.0,15 EAST CLARKE PLACE,...,,40.838102,-73.918274,"(40.83810190786086, -73.91827438644238)",10930.0,50.0,5.0,42.0,27.0,27.0


### Step 2 | Geoode Lat/Long if they're not already present

It already exists in this dataset. Census geocode has a function to go from addresss --> lat/long, but I haven't had time to implement it here. This dataset already has lat/longs. Message me if you're struggling with this step.

### Step 3 | Get Census Geographies

In [None]:
# Code adapted from:
# https://gis.stackexchange.com/questions/363830/applying-the-censusgeocode-package-to-an-entire-dataframe-of-geocoded-data
# Defines a geocode function that accepts lat/long and spits out geographies
# The code then runs that funciton in parllel (for speed).

import pandas as pd
import censusgeocode as cg
from concurrent.futures import ThreadPoolExecutor
from tqdm.notebook import tqdm

import requests_cache
cache = requests_cache.CachedSession("geocode_cache", backend="filesystem")

def geocode(lat, lng):
    try:
        url = "https://geocoding.geo.census.gov/geocoder/geographies/coordinates"
        params = {
            "x": lng,
            "y": lat,
            "benchmark": "Public_AR_Census2020",
            "vintage": "Census2020_Census2020",
            "format": "json"
        }
        response = cache.get(url, params=params)
        response.raise_for_status()
        data = response.json()
        census = data['result']['geographies']['Census Blocks'][0]
        return census
    except Exception as e:
        print(f"Error geocoding ({lat}, {lng}): {e}")
        return None

def bulk_geocode(latitudes, longitudes):
    """
    Geocode a list of latitudes and longitudes in parallel (for speed).
    """

    with ThreadPoolExecutor() as tpe:
        latitudes = df['Latitude']
        longitudes = df['Longitude']
        mapped_results = tpe.map(geocode, latitudes, longitudes)
        data = list(tqdm(mapped_results, total=len(df)))

    return pd.DataFrame(data)

census_geos_df = bulk_geocode(df['Latitude'], df['Longitude']) 
census_geos_df.head()


In [None]:
to_keep = ['GEOID', 'STATE', 'COUNTY', 'TRACT', 'BLOCK']
census_geos_df = census_geos_df[to_keep]
census_geos_df

In [None]:
df_with_geos = pd.concat(
    [ 
        df.reset_index(drop=True),
        census_geos_df.reset_index(drop=True)
    ], 
    axis=1)

df_with_geos.head()

# Step 4 | Pick a geographical level and get Census data
Do you want Census data at the state level? county? tract? block?

1. Pick a geographical level.
2. See `census-example.ipynb` if you want to learn how to get Census data at your desired level

# Hope that helps!