In [15]:
# !pip install censusgeocode

In [1]:
import glob
import json
import requests
import pandas as pd
from pprint import pprint

# Census Examples 

This notebook uses the `censusgeocode` package in Python (which is simply a wrapper around the US Census' official Geocoder API) to get census geographies for list of addresses or lat/longs

- https://pypi.org/project/censusgeocode/

### Step 1 | Grab your data at the address level

In [58]:
df = pd.read_csv('plants_with_coordinates.csv')

In [59]:
df.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category,map_link,longitude,latitude
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-73.91048,42.87657&level=14",-73.91048,42.87657
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-70.726675,41.808547&level=14",-70.726675,41.808547
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-71.237,42.115&level=14",-71.237,42.115
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-71.49625,41.63269&level=14",-71.49625,41.63269
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.57594,40.55812&level=14",-74.57594,40.55812


In [60]:
df['plant_name'].nunique()

12633

In [61]:
df.shape

(12661, 14)

### Step 2 | Geoode Lat/Long if they're not already present

It already exists in this dataset. Census geocode has a function to go from addresss --> lat/long, but I haven't had time to implement it here. This dataset already has lat/longs. Message me if you're struggling with this step.

In [63]:
import pandas as pd
import requests_cache
import time
import os
from tqdm import tqdm

# Enable caching to speed up repeated requests
cache = requests_cache.CachedSession("geocode_cache", backend="filesystem")

def geocode(lat, lng, retries=3, delay=1):
    url = "https://geocoding.geo.census.gov/geocoder/geographies/coordinates"
    params = {
        "x": lng,
        "y": lat,
        "benchmark": "Public_AR_Census2020",
        "vintage": "Census2020_Census2020",
        "format": "json"
    }
    for attempt in range(retries):
        try:
            response = cache.get(url, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()
            tract = data['result']['geographies']['Census Tracts'][0]
            return tract
        except Exception as e:
            if attempt < retries - 1:
                time.sleep(delay * (attempt + 1))  # Exponential backoff
            else:
                return {"error": str(e), "lat": lat, "lng": lng}

def process_chunk(chunk, chunk_index, save_prefix="census_geos_part"):
    results = []
    for _, row in tqdm(chunk.iterrows(), total=len(chunk), desc=f"Processing chunk {chunk_index + 1}"):
        lat, lng = row['latitude'], row['longitude']
        result = geocode(lat, lng)
        results.append(result)

    # Convert the results into a DataFrame
    df_chunk = pd.DataFrame(results)
    output_path = f"{save_prefix}_{chunk_index + 1}.csv"
    df_chunk.to_csv(output_path, index=False)

    # Free memory by deleting intermediate data
    del results, df_chunk  
    return output_path

def geocode_in_chunks_safe(df, chunk_size=500, save_prefix="census_geos_part"):
    total_chunks = (len(df) + chunk_size - 1) // chunk_size  # Calculate number of chunks
    
    for i in range(total_chunks):
        output_file = f"{save_prefix}_{i + 1}.csv"
        print(f"Processing chunk {i + 1} of {total_chunks}")
        
        # Skip checking if file exists; process each chunk regardless
        chunk = df.iloc[i*chunk_size : (i+1)*chunk_size]  # Select the chunk
        process_chunk(chunk, i, save_prefix=save_prefix)
        time.sleep(2)  # Sleep between requests to avoid hitting API rate limits

# Now, run this on your DataFrame
geocode_in_chunks_safe(df)


Processing chunk 1 of 26


Processing chunk 1: 100%|█| 500/500 [00:03<00:00, 140.60it/s


Processing chunk 2 of 26


Processing chunk 2: 100%|█| 500/500 [00:04<00:00, 123.38it/s


Processing chunk 3 of 26


Processing chunk 3: 100%|█| 500/500 [00:00<00:00, 950.74it/s


Processing chunk 4 of 26


Processing chunk 4: 100%|█| 500/500 [00:00<00:00, 919.80it/s


Processing chunk 5 of 26


Processing chunk 5: 100%|█| 500/500 [00:03<00:00, 138.65it/s


Processing chunk 6 of 26


Processing chunk 6: 100%|█| 500/500 [00:07<00:00, 67.96it/s]


Processing chunk 7 of 26


Processing chunk 7: 100%|█| 500/500 [00:00<00:00, 956.13it/s


Processing chunk 8 of 26


Processing chunk 8: 100%|█| 500/500 [00:03<00:00, 125.46it/s


Processing chunk 9 of 26


Processing chunk 9: 100%|█| 500/500 [00:07<00:00, 65.37it/s]


Processing chunk 10 of 26


Processing chunk 10: 100%|█| 500/500 [00:00<00:00, 911.20it/


Processing chunk 11 of 26


Processing chunk 11: 100%|█| 500/500 [00:00<00:00, 1010.32it


Processing chunk 12 of 26


Processing chunk 12: 100%|█| 500/500 [00:07<00:00, 67.47it/s


Processing chunk 13 of 26


Processing chunk 13: 100%|█| 500/500 [00:00<00:00, 1025.18it


Processing chunk 14 of 26


Processing chunk 14: 100%|█| 500/500 [00:04<00:00, 123.31it/


Processing chunk 15 of 26


Processing chunk 15: 100%|█| 500/500 [00:04<00:00, 115.49it/


Processing chunk 16 of 26


Processing chunk 16: 100%|█| 500/500 [00:00<00:00, 808.17it/


Processing chunk 17 of 26


Processing chunk 17: 100%|█| 500/500 [00:00<00:00, 875.25it/


Processing chunk 18 of 26


Processing chunk 18: 100%|█| 500/500 [00:00<00:00, 859.93it/


Processing chunk 19 of 26


Processing chunk 19: 100%|█| 500/500 [00:03<00:00, 140.33it/


Processing chunk 20 of 26


Processing chunk 20: 100%|█| 500/500 [00:03<00:00, 128.21it/


Processing chunk 21 of 26


Processing chunk 21: 100%|█| 500/500 [00:00<00:00, 901.72it/


Processing chunk 22 of 26


Processing chunk 22: 100%|█| 500/500 [00:00<00:00, 938.71it/


Processing chunk 23 of 26


Processing chunk 23: 100%|█| 500/500 [00:00<00:00, 1006.18it


Processing chunk 24 of 26


Processing chunk 24: 100%|█| 500/500 [00:00<00:00, 864.95it/


Processing chunk 25 of 26


Processing chunk 25: 100%|█| 500/500 [00:15<00:00, 33.03it/s


Processing chunk 26 of 26


Processing chunk 26: 100%|█| 161/161 [00:00<00:00, 809.72it/


In [64]:
import glob

# Adjust this pattern to match your saved files
chunk_files = sorted(glob.glob("census_geos_part_*.csv"))

# Load and concatenate them
combined_df = pd.concat([pd.read_csv(f) for f in chunk_files], ignore_index=True)

# Optional: Save combined result to disk
combined_df.to_csv("census_geocoded_full.csv", index=False)

# Check it out
combined_df.head()


Unnamed: 0,POP100,GEOID,CENTLAT,AREAWATER,STATE,BASENAME,OID,LSADC,FUNCSTAT,INTPTLAT,...,CENTLON,HU100,AREALAND,INTPTLON,MTFCC,UR,COUNTY,error,lat,lng
0,3494.0,36093030000.0,42.881043,69888.0,36.0,325.02,20790740000000.0,CT,S,42.881277,...,-73.913102,1320.0,7206301.0,-73.911677,G5020,M,93.0,,,
1,6075.0,25023540000.0,41.857932,4840087.0,25.0,5442.0,20790210000000.0,CT,S,41.858697,...,-70.744259,2689.0,69485830.0,-70.745874,G5020,M,23.0,,,
2,5862.0,25021410000.0,42.119699,338878.0,25.0,4141.0,20790260000000.0,CT,S,42.120093,...,-71.205044,2145.0,28397869.0,-71.203693,G5020,M,21.0,,,
3,6876.0,44003020000.0,41.633007,129489.0,44.0,209.03,20790230000000.0,CT,S,41.633184,...,-71.517378,2301.0,31664883.0,-71.524229,G5020,M,3.0,,,
4,7319.0,34035050000.0,40.562949,620006.0,34.0,510.0,20790310000000.0,CT,S,40.563551,...,-74.571689,2702.0,11276319.0,-74.570409,G5020,U,35.0,,,


In [65]:
combined_df.shape

(12661, 23)

In [66]:
combined_df['GEOID'] = combined_df['GEOID'].apply(
    lambda x: str(int(float(x))).zfill(11) if pd.notnull(x) else x
)

In [67]:
to_keep = ['GEOID', 'STATE', 'COUNTY', 'TRACT']
census_geos_df = combined_df[to_keep]
census_geos_df

Unnamed: 0,GEOID,STATE,COUNTY,TRACT
0,36093032502,36.0,93.0,32502.0
1,25023544200,25.0,23.0,544200.0
2,25021414100,25.0,21.0,414100.0
3,44003020903,44.0,3.0,20903.0
4,34035051000,34.0,35.0,51000.0
...,...,...,...,...
12656,33007950900,33.0,7.0,950900.0
12657,23005004001,23.0,5.0,4001.0
12658,16019970100,16.0,19.0,970100.0
12659,06105000500,6.0,105.0,500.0


In [68]:
df_with_geos = pd.concat(
    [ 
        df.reset_index(drop=True),
        census_geos_df.reset_index(drop=True)
    ], 
    axis=1)

df_with_geos.head()

Unnamed: 0,plant_name,Plant Code,State,Sector Name,Prime Movers,Fuel Types,Primary Technology,withdrawal_volume_million_gallons,discharge_volume_million_gallons,consumption_volume_million_gallons,energy_category,map_link,longitude,latitude,GEOID,STATE,COUNTY,TRACT
0,(3K) 59 Hetcheltown Rd,66729,NY,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-73.91048,42.87657&level=14",-73.91048,42.87657,36093032502,36.0,93.0,32502.0
1,0 Hammond St CSG,64876,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-70.726675,41.808547&level=14",-70.726675,41.808547,25023544200,25.0,23.0,544200.0
2,1 Commercial,67464,MA,IPP Non-CHP,"BA, PV","MWH, SUN",Multiple,,,,Other,"map/?center=-71.237,42.115&level=14",-71.237,42.115,25021414100,25.0,21.0,414100.0
3,"10 Briggs Solar NG, LLC (East)",62781,RI,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-71.49625,41.63269&level=14",-71.49625,41.63269,44003020903,44.0,3.0,20903.0
4,"10 Finderne Avenue Solar, LLC",64023,NJ,IPP Non-CHP,PV,SUN,Solar Photovoltaic,,,,Renewable Energy Sources,"map/?center=-74.57594,40.55812&level=14",-74.57594,40.55812,34035051000,34.0,35.0,51000.0


In [69]:
df_with_geos['plant_name'].nunique()

12633

In [70]:
df_with_geos.to_csv('plants_with_geos.csv')

In [31]:
df_grants = pd.read_csv('US Environmental Protection Agency Environmental Justice Grants - Data.csv')

In [32]:
df_grants.head()

Unnamed: 0,Announcement Date,Award Date,Project Title,Project Description,Recipient,Funding Source,Type of Award,Funding Status,Federal Award Identification Number,Award Amount,...,Zip Code,City,County,State,Assistance Listing,Program,Investment Category,Website Url,Announcement Url,Data Extract Date
0,Date of announcement (for data on projects in ...,Date of award or rebate.,Descriptive title of the project.,Brief description of the project.,Name of recipient of the award.,Bipartisan Infrastructure Law (BIL) or Inflati...,Identifies if a grant is primary or sub-award.,Selected or Awarded.,Federal Award Identification Number (FAIN).,Funds (dollars) received from BIL or IRA.,...,Zip code of award/project place of performance.,City of award/project place of performance.,County of award/project place of performance.,State of award/project place of performance.,The Assistance Listing Number (formerly known ...,Bipartisan Infrastructure Law or Inflation Red...,"Investment category (Climate Action, Air Inves...",URL containing more detailed information about...,URL containing press release (announcing fundi...,Date data extracted.
1,,05-31-2023,Morrisonville CUSD 1,With funding from the Bipartisan Infrastructur...,Morrisonville Cusd 1,BIL,Primary,Awarded,,790000,...,,,,IL,,Clean School Bus Rebates,Air Investments,https://www.epa.gov/cleanschoolbus/awarded-cle...,https://www.epa.gov/cleanschoolbus/awarded-cle...,01-31-2025
2,,01-07-2025,Saint Paul Island Renewable Energy Integration...,The purpose of this award is to provide fundin...,Aleut Community of Saint Paul Island,IRA,Primary,Awarded,84105401,14820331,...,,St Paul,Aleutians West Census Area,AK,66.046,CPRG - Implementation Grant,Climate Action,https://www.epa.gov/inflation-reduction-act/ab...,,01-31-2025
3,,07-10-2024,Cleanup Cooperative Agreement for City of West...,"Brownfields are real property, the expansion, ...",CITY OF WESTBROOK,BIL,Primary,Awarded,00A01199,4000000,...,,Westbrook,,ME,66.818,Brownfields Projects,Land Investments,https://www.epa.gov/brownfields/bipartisan-inf...,,01-31-2025
4,,07-19-2023,Inflation Reduction Act &ndash; Climate Pollut...,The purpose of this grant agreement is to prov...,Sacramento Metropolitan AQMD,IRA,Primary,Awarded,98T74301,1000000,...,,,Sacramento County,CA,66.046,CPRG - Planning Grant,Climate Action,https://www.epa.gov/inflation-reduction-act/ab...,,01-31-2025


In [17]:
df_grants = df_grants.drop(0, axis=0).reset_index(drop=True)

In [18]:
df_grants.head()

Unnamed: 0,Announcement Date,Award Date,Project Title,Project Description,Recipient,Funding Source,Type of Award,Funding Status,Federal Award Identification Number,Award Amount,...,Zip Code,City,County,State,Assistance Listing,Program,Investment Category,Website Url,Announcement Url,Data Extract Date
0,,05-31-2023,Morrisonville CUSD 1,With funding from the Bipartisan Infrastructur...,Morrisonville Cusd 1,BIL,Primary,Awarded,,790000,...,,,,IL,,Clean School Bus Rebates,Air Investments,https://www.epa.gov/cleanschoolbus/awarded-cle...,https://www.epa.gov/cleanschoolbus/awarded-cle...,01-31-2025
1,,01-07-2025,Saint Paul Island Renewable Energy Integration...,The purpose of this award is to provide fundin...,Aleut Community of Saint Paul Island,IRA,Primary,Awarded,84105401,14820331,...,,St Paul,Aleutians West Census Area,AK,66.046,CPRG - Implementation Grant,Climate Action,https://www.epa.gov/inflation-reduction-act/ab...,,01-31-2025
2,,07-10-2024,Cleanup Cooperative Agreement for City of West...,"Brownfields are real property, the expansion, ...",CITY OF WESTBROOK,BIL,Primary,Awarded,00A01199,4000000,...,,Westbrook,,ME,66.818,Brownfields Projects,Land Investments,https://www.epa.gov/brownfields/bipartisan-inf...,,01-31-2025
3,,07-19-2023,Inflation Reduction Act &ndash; Climate Pollut...,The purpose of this grant agreement is to prov...,Sacramento Metropolitan AQMD,IRA,Primary,Awarded,98T74301,1000000,...,,,Sacramento County,CA,66.046,CPRG - Planning Grant,Climate Action,https://www.epa.gov/inflation-reduction-act/ab...,,01-31-2025
4,,09-11-2023,Large Diameter Water Main Dead Ends Eliminatio...,This Bipartisan Infrastructure Law (BIL) (also...,DC Water and Sewer Authority,BIL,Primary,Awarded,95328501,5133600,...,,,,DC,66.468,Drinking Water State Revolving Fund,Water Investments,https://www.epa.gov/dwsrf,,01-31-2025


In [33]:
import pandas as pd
import requests_cache
import time
import os
from tqdm import tqdm

# Enable caching to speed up repeated requests
cache = requests_cache.CachedSession("geocode_cache", backend="filesystem")

def geocode(lat, lng, retries=3, delay=1):
    url = "https://geocoding.geo.census.gov/geocoder/geographies/coordinates"
    params = {
        "x": lng,
        "y": lat,
        "benchmark": "Public_AR_Census2020",
        "vintage": "Census2020_Census2020",
        "format": "json"
    }
    for attempt in range(retries):
        try:
            response = cache.get(url, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()
            tract = data['result']['geographies']['Census Tracts'][0]
            return tract
        except Exception as e:
            if attempt < retries - 1:
                time.sleep(delay * (attempt + 1))  # Exponential backoff
            else:
                return {"error": str(e), "lat": lat, "lng": lng}

def process_chunk(chunk, chunk_index, save_prefix="census_geos_part"):
    results = []
    for _, row in tqdm(chunk.iterrows(), total=len(chunk), desc=f"Processing chunk {chunk_index + 1}"):
        lat, lng = row['Latitude'], row['Longitude']
        result = geocode(lat, lng)
        results.append(result)

    # Convert the results into a DataFrame
    df_chunk = pd.DataFrame(results)
    output_path = f"{save_prefix}_{chunk_index + 1}.csv"
    df_chunk.to_csv(output_path, index=False)

    # Free memory by deleting intermediate data
    del results, df_chunk  
    return output_path

def geocode_in_chunks_safe(df, chunk_size=500, save_prefix="grants_geos_part"):
    total_chunks = (len(df) + chunk_size - 1) // chunk_size  # Calculate number of chunks
    
    for i in range(total_chunks):
        output_file = f"{save_prefix}_{i + 1}.csv"
        print(f"Processing chunk {i + 1} of {total_chunks}")
        
        # Skip checking if file exists; process each chunk regardless
        chunk = df.iloc[i*chunk_size : (i+1)*chunk_size]  # Select the chunk
        process_chunk(chunk, i, save_prefix=save_prefix)
        time.sleep(2)  # Sleep between requests to avoid hitting API rate limits

# Now, run this on your DataFrame
geocode_in_chunks_safe(df_grants)


Processing chunk 1 of 11


Processing chunk 1: 100%|█| 500/500 [00:03<00:00, 126.25it/s


Processing chunk 2 of 11


Processing chunk 2: 100%|█| 500/500 [00:00<00:00, 954.09it/s


Processing chunk 3 of 11


Processing chunk 3: 100%|█| 500/500 [00:00<00:00, 940.66it/s


Processing chunk 4 of 11


Processing chunk 4: 100%|█| 500/500 [00:00<00:00, 880.27it/s


Processing chunk 5 of 11


Processing chunk 5: 100%|█| 500/500 [00:00<00:00, 903.77it/s


Processing chunk 6 of 11


Processing chunk 6: 100%|█| 500/500 [00:00<00:00, 903.81it/s


Processing chunk 7 of 11


Processing chunk 7: 100%|█| 500/500 [00:01<00:00, 496.90it/s


Processing chunk 8 of 11


Processing chunk 8: 100%|█| 500/500 [00:00<00:00, 879.07it/s


Processing chunk 9 of 11


Processing chunk 9: 100%|█| 500/500 [00:00<00:00, 757.50it/s


Processing chunk 10 of 11


Processing chunk 10: 100%|█| 500/500 [00:00<00:00, 1069.83it


Processing chunk 11 of 11


Processing chunk 11: 100%|█| 226/226 [00:00<00:00, 906.61it/


In [36]:
import glob

# Adjust this pattern to match your saved files
chunk_files = sorted(glob.glob("grants_geos_part_*.csv"))

# Load and concatenate them
grants_df = pd.concat([pd.read_csv(f) for f in chunk_files], ignore_index=True)

# Optional: Save combined result to disk
grants_df.to_csv("grants_geocoded_full.csv", index=False)

# Check it out
grants_df.head()


Unnamed: 0,error,lat,lng,POP100,GEOID,CENTLAT,AREAWATER,STATE,BASENAME,OID,...,NAME,OBJECTID,TRACT,CENTLON,HU100,AREALAND,INTPTLON,MTFCC,UR,COUNTY
0,400 Client Error: for url: https://geocoding....,Latitude of award/project place of performance.,Longitude of award/project place of performance.,,,,,,,,...,,,,,,,,,,
1,,,,4796.0,17021960000.0,39.441237,4022030.0,17.0,9590.0,20790150000000.0,...,Census Tract 9590,83511.0,959000.0,-89.387215,2119.0,491006900.0,-89.388553,G5020,M,21.0
2,,,,978.0,2016000000.0,52.593343,22079080000.0,2.0,1.0,2079045000000.0,...,Census Tract 1,17011.0,100.0,-176.45497,555.0,8556804000.0,178.338813,G5020,R,16.0
3,,,,5175.0,23005000000.0,43.710389,128617.0,23.0,27.0,20790200000000.0,...,Census Tract 27,48443.0,2700.0,-70.362032,2289.0,12719360.0,-70.36523,G5020,M,5.0
4,,,,1609.0,6067009000.0,38.438152,0.0,6.0,93.32,207903700000000.0,...,Census Tract 93.32,66858.0,9332.0,-121.339287,498.0,7870005.0,-121.339287,G5020,M,67.0


In [37]:
grants_df['GEOID'] = grants_df['GEOID'].apply(
    lambda x: str(int(float(x))).zfill(11) if pd.notnull(x) else x
)

In [38]:
to_keep = ['GEOID', 'STATE', 'COUNTY', 'TRACT']
grants_geos_df = grants_df[to_keep]
grants_geos_df

Unnamed: 0,GEOID,STATE,COUNTY,TRACT
0,,,,
1,17021959000,17.0,21.0,959000.0
2,02016000100,2.0,16.0,100.0
3,23005002700,23.0,5.0,2700.0
4,06067009332,6.0,67.0,9332.0
...,...,...,...,...
5221,04001944301,4.0,1.0,944301.0
5222,72107955001,72.0,107.0,955001.0
5223,17201004004,17.0,201.0,4004.0
5224,27035951100,27.0,35.0,951100.0


In [39]:
grants_with_geos = pd.concat(
    [ 
        df_grants.reset_index(drop=True),
        grants_geos_df.reset_index(drop=True)
    ], 
    axis=1)

grants_with_geos.head()

Unnamed: 0,Announcement Date,Award Date,Project Title,Project Description,Recipient,Funding Source,Type of Award,Funding Status,Federal Award Identification Number,Award Amount,...,Assistance Listing,Program,Investment Category,Website Url,Announcement Url,Data Extract Date,GEOID,STATE,COUNTY,TRACT
0,Date of announcement (for data on projects in ...,Date of award or rebate.,Descriptive title of the project.,Brief description of the project.,Name of recipient of the award.,Bipartisan Infrastructure Law (BIL) or Inflati...,Identifies if a grant is primary or sub-award.,Selected or Awarded.,Federal Award Identification Number (FAIN).,Funds (dollars) received from BIL or IRA.,...,The Assistance Listing Number (formerly known ...,Bipartisan Infrastructure Law or Inflation Red...,"Investment category (Climate Action, Air Inves...",URL containing more detailed information about...,URL containing press release (announcing fundi...,Date data extracted.,,,,
1,,05-31-2023,Morrisonville CUSD 1,With funding from the Bipartisan Infrastructur...,Morrisonville Cusd 1,BIL,Primary,Awarded,,790000,...,,Clean School Bus Rebates,Air Investments,https://www.epa.gov/cleanschoolbus/awarded-cle...,https://www.epa.gov/cleanschoolbus/awarded-cle...,01-31-2025,17021959000.0,17.0,21.0,959000.0
2,,01-07-2025,Saint Paul Island Renewable Energy Integration...,The purpose of this award is to provide fundin...,Aleut Community of Saint Paul Island,IRA,Primary,Awarded,84105401,14820331,...,66.046,CPRG - Implementation Grant,Climate Action,https://www.epa.gov/inflation-reduction-act/ab...,,01-31-2025,2016000100.0,2.0,16.0,100.0
3,,07-10-2024,Cleanup Cooperative Agreement for City of West...,"Brownfields are real property, the expansion, ...",CITY OF WESTBROOK,BIL,Primary,Awarded,00A01199,4000000,...,66.818,Brownfields Projects,Land Investments,https://www.epa.gov/brownfields/bipartisan-inf...,,01-31-2025,23005002700.0,23.0,5.0,2700.0
4,,07-19-2023,Inflation Reduction Act &ndash; Climate Pollut...,The purpose of this grant agreement is to prov...,Sacramento Metropolitan AQMD,IRA,Primary,Awarded,98T74301,1000000,...,66.046,CPRG - Planning Grant,Climate Action,https://www.epa.gov/inflation-reduction-act/ab...,,01-31-2025,6067009332.0,6.0,67.0,9332.0


In [40]:
grants_with_geos.to_csv('grants_with_geos.csv')

### Step 3 | Get Census Geographies

In [None]:
# Code adapted from:
# https://gis.stackexchange.com/questions/363830/applying-the-censusgeocode-package-to-an-entire-dataframe-of-geocoded-data
# Defines a geocode function that accepts lat/long and spits out geographies
# The code then runs that funciton in parllel (for speed).

import pandas as pd
import censusgeocode as cg
from concurrent.futures import ThreadPoolExecutor
from tqdm.notebook import tqdm

import requests_cache
cache = requests_cache.CachedSession("geocode_cache", backend="filesystem")

def geocode(lat, lng):
    try:
        url = "https://geocoding.geo.census.gov/geocoder/geographies/coordinates"
        params = {
            "x": lng,
            "y": lat,
            "benchmark": "Public_AR_Census2020",
            "vintage": "Census2020_Census2020",
            "format": "json"
        }
        response = cache.get(url, params=params)
        response.raise_for_status()
        data = response.json()
        census = data['result']['geographies']['Census Tracts'][0]
        return census
    except Exception as e:
        print(f"Error geocoding ({lat}, {lng}): {e}")
        return None

def bulk_geocode(latitudes, longitudes):
    """
    Geocode a list of latitudes and longitudes in parallel (for speed).
    """

    with ThreadPoolExecutor() as tpe:
        latitudes = df['latitude']
        longitudes = df['longitude']
        mapped_results = tpe.map(geocode, latitudes, longitudes)
        mapped_results = [result for result in mapped_results if result is not None]
        data = list(tqdm(mapped_results, total=len(mapped_results)))  # Use filtered results

    return pd.DataFrame(data)

census_geos_df = bulk_geocode(df['latitude'], df['longitude']) 
census_geos_df.head()

Error geocoding (29.449827, -101.06011): 'Census Tracts'


In [None]:
to_keep = ['GEOID', 'STATE', 'COUNTY', 'TRACT']
census_geos_df = census_geos_df[to_keep]
census_geos_df

In [None]:
df_with_geos = pd.concat(
    [ 
        df.reset_index(drop=True),
        census_geos_df.reset_index(drop=True)
    ], 
    axis=1)

df_with_geos.head()

In [None]:
df_with_geos = df_with_geos.drop(columns=["Unnamed: 0"])

In [None]:
df_with_geos.to_csv('plants_with_geos.csv', index=False)

# Step 4 | Pick a geographical level and get Census data
Do you want Census data at the state level? county? tract? block?

1. Pick a geographical level.
2. See `census-example.ipynb` if you want to learn how to get Census data at your desired level

# Hope that helps!