Map CBSA to census tracts. Note that CBSA contains MSA data.

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm
target_epsg = 4269  # What the CBSA files use natively

In [2]:
fn_cbsa = '/Volumes/Extreme SSD/energy_communities/raw_input/cb_2023_us_all_20m/cb_2023_us_cbsa_20m.zip'
fn_csa = '/Volumes/Extreme SSD/energy_communities/raw_input/cb_2023_us_all_20m/cb_2023_us_csa_20m.zip'
fn_out_cbsa_to_tract = '/Volumes/Extreme SSD/energy_communities/clean_input/geography/cbsa_to_tract.parquet'

In [3]:
CbsaRaw = gpd.read_file(fn_cbsa)
CbsaRaw['cbsa_type'] = CbsaRaw['LSAD'].replace({'M1':'metro', 'M2':'micro'})
Cbsa = CbsaRaw.rename(columns={'CBSAFP':'cbsa_fips', 'GEOIDFQ':'cbsa_geoidfq', 'GEOID':'cbsa_geoid', 'NAME':'cbsa_name'})
Cbsa = Cbsa[['cbsa_name', 'cbsa_type', 'cbsa_fips', 'cbsa_geoid', 'cbsa_geoidfq',  'geometry']]
Cbsa = Cbsa.set_geometry('geometry')
Cbsa = Cbsa.to_crs(epsg=target_epsg)
Cbsa.head()

Unnamed: 0,cbsa_name,cbsa_type,cbsa_fips,cbsa_geoid,cbsa_geoidfq,geometry
0,"Laurel, MS",micro,29860,29860,310M700US29860,"POLYGON ((-89.40123 31.79686, -89.31656 31.802..."
1,"Fort Payne, AL",micro,22840,22840,310M700US22840,"POLYGON ((-86.11889 34.40384, -86.05771 34.475..."
2,"Washington Court House, OH",micro,47920,47920,310M700US47920,"POLYGON ((-83.65333 39.71688, -83.25244 39.695..."
3,"Fort Wayne, IN",metro,23060,23060,310M700US23060,"POLYGON ((-85.68657 41.17838, -85.65272 41.178..."
4,"Cedartown, GA",micro,16340,16340,310M700US16340,"POLYGON ((-85.42107 34.08081, -85.25807 34.079..."


In [12]:
# CbsaRaw['LSAD'].unique()  # Just metro or micro areas

In [4]:
census_dirs = '/Volumes/Extreme SSD/energy_communities/raw_input/census_tract/2023'
list_of_census_files = [ f.path for f in os.scandir(census_dirs) if f.is_dir() ]
print(list_of_census_files[:3])

['/Volumes/Extreme SSD/energy_communities/raw_input/census_tract/2023/tl_2023_04_tract', '/Volumes/Extreme SSD/energy_communities/raw_input/census_tract/2023/tl_2023_02_tract', '/Volumes/Extreme SSD/energy_communities/raw_input/census_tract/2023/tl_2023_01_tract']


In [5]:
intersection_list = []

for census_subfolder in tqdm(list_of_census_files):
    CensusObservationRaw = gpd.read_file(census_subfolder)
    CensusObservation = CensusObservationRaw.rename(columns={'GEOID':'census_geoid', 'GEOIDFQ':'census_geoidfq', 'geometry':'census_geometry'})
    CensusObservation = CensusObservation[['census_geoid', 'census_geoidfq', 'census_geometry']]
    CensusObservation = CensusObservation.set_geometry('census_geometry')
    CensusObservation = CensusObservation.to_crs(epsg=target_epsg)

    Intersection = gpd.sjoin(Cbsa, CensusObservation, how='inner', predicate='intersects')
    IntersectionIdOnly = Intersection[['cbsa_name', 'cbsa_type', 'cbsa_fips', 'cbsa_geoid', 'cbsa_geoidfq', 'census_geoid', 'census_geoidfq']]
    
    if len(Intersection) > 0:
        intersection_list.append(IntersectionIdOnly)
    else:
        pass

  0%|          | 0/56 [00:00<?, ?it/s]

In [13]:
Concat = pd.concat(intersection_list, axis=0, ignore_index=True)
Concat

Unnamed: 0,cbsa_name,cbsa_type,cbsa_fips,cbsa_geoid,cbsa_geoidfq,census_geoid,census_geoidfq
0,"St. George, UT",metro,41100,41100,310M700US41100,04015950101,1400000US04015950101
1,"St. George, UT",metro,41100,41100,310M700US41100,04015950103,1400000US04015950103
2,"Yuma, AZ",metro,49740,49740,310M700US49740,04027980005,1400000US04027980005
3,"Yuma, AZ",metro,49740,49740,310M700US49740,04027011407,1400000US04027011407
4,"Yuma, AZ",metro,49740,49740,310M700US49740,04027011405,1400000US04027011405
...,...,...,...,...,...,...,...
94060,"Ponce, PR",metro,38660,38660,310M700US38660,72107954901,1400000US72107954901
94061,"Ponce, PR",metro,38660,38660,310M700US38660,72001956500,1400000US72001956500
94062,"Ponce, PR",metro,38660,38660,310M700US38660,72093960100,1400000US72093960100
94063,"Ponce, PR",metro,38660,38660,310M700US38660,72001956400,1400000US72001956400


In [14]:
Concat.duplicated().sum() == 0

True

In [15]:
Concat.to_parquet(fn_out_cbsa_to_tract)