In [1]:
import pandas as pd, geopandas as gpd

In [2]:
# specify location of census tracts shapefile and centroids output file
cal_tracts_file = '../data/tl_2010_06_tract10/'
tract_centroids_file = '../data/bay_tract_centroids.csv'

# identify bay area counties by fips code
counties = {'Alameda':'001',
            'Contra Costa':'013',
            'Marin':'041',
            'Napa':'055',
            'San Francisco':'075',
            'San Mateo':'081',
            'Santa Clara':'085',
            'Solano':'095',
            'Sonoma':'097'}

In [3]:
# load the tracts shapefile
gdf_cal = gpd.read_file(cal_tracts_file)
len(gdf_cal)

8057

In [4]:
# retain only those tracts that are in the bay area counties
gdf_cal['county_fips'] = gdf_cal['GEOID10'].str.slice(start=2, stop=5)
gdf_bay = gdf_cal[gdf_cal['county_fips'].isin(counties.values())]
len(gdf_bay)

1588

In [5]:
# calculate the centroid of each tract polygon then extract lat and lng coordinates
centroids = gdf_bay.centroid
lng = centroids.apply(lambda point: point.x)
lat = centroids.apply(lambda point: point.y)

In [6]:
# assemble into a dataframe to save
df_save = pd.DataFrame({'GEOID10':gdf_bay['GEOID10'],
                        'lat':lat,
                        'lng':lng})
len(df_save)

1588

In [7]:
# save to disk
df_save.to_csv(tract_centroids_file, index=False, encoding='utf-8')