In [11]:
import geopandas as gpd
import requests
import io
import pandas as pd

CENSUS_BLK_SHP = 'tl_2020_06_tract10.shp'

In [42]:
# Make the API query (get population-H001001 for every block in every tract in every county in California-06)
response = requests.get('https://api.census.gov/data/2019/acs/acs5/profile?get=NAME,DP02_0060PE,DP02_0061PE&for=tract:*&in=state:06')
response

<Response [200]>

In [45]:
# Reformat the response content to CSV format so pandas can read it
content = response.text.replace('[', '').replace(']', '').replace(',\n','\n')
# Create dataframe, read all data as str so leading 0s aren't removed
pop_df = pd.read_csv(io.StringIO(content), dtype=str)
pop_df

Unnamed: 0,NAME,DP02_0060PE,DP02_0061PE,state,county,tract
0,"Census Tract 5087.04, Santa Clara County, Cali...",5.3,4.1,06,085,508704
1,"Census Tract 5079.04, Santa Clara County, Cali...",1.4,0.4,06,085,507904
2,"Census Tract 5085.04, Santa Clara County, Cali...",4.5,4.8,06,085,508504
3,"Census Tract 5085.05, Santa Clara County, Cali...",0.5,1.5,06,085,508505
4,"Census Tract 5094.03, Santa Clara County, Cali...",4.7,4.5,06,085,509403
...,...,...,...,...,...,...
8052,"Census Tract 1041.08, Los Angeles County, Cali...",22.8,16.4,06,037,104108
8053,"Census Tract 1042.03, Los Angeles County, Cali...",29.6,15.0,06,037,104203
8054,"Census Tract 1042.04, Los Angeles County, Cali...",21.0,18.0,06,037,104204
8055,"Census Tract 1044.03, Los Angeles County, Cali...",26.6,9.8,06,037,104403


In [46]:
# Concatenate the geography IDs to make GEOID10; we'll use it to join with the shapefile
pop_df['GEOID10'] = pop_df['state'].astype(str) + pop_df['county'].astype(str) + pop_df['tract'].astype(str)

# x contained NaN
pop_df = pop_df[~pop_df['DP02_0060PE'].isnull()]

# final conversion now worked
pop_df[['N_9Grade']] = pop_df[['DP02_0060PE']].astype(float)

# x contained NaN
pop_df = pop_df[~pop_df['DP02_0061PE'].isnull()]

# final conversion now worked
pop_df[['N_Diploma']] = pop_df[['DP02_0061PE']].astype(float)

# Only keep GEOID10 and pop column
pop_df = pop_df[['GEOID10', 'N_9Grade', 'N_Diploma']]
pop_df

Unnamed: 0,GEOID10,N_9Grade,N_Diploma
0,06085508704,5.3,4.1
1,06085507904,1.4,0.4
2,06085508504,4.5,4.8
3,06085508505,0.5,1.5
4,06085509403,4.7,4.5
...,...,...,...
8052,06037104108,22.8,16.4
8053,06037104203,29.6,15.0
8054,06037104204,21.0,18.0
8055,06037104403,26.6,9.8


In [47]:
# Delete these to free up memory
del content, response

In [48]:
# Read in shapefile as dataframe
shp_df = gpd.read_file(CENSUS_BLK_SHP)
shp_df.rename(columns = {'GEOID':'GEOID10'}, inplace = True)

# Only keep GEOID10 and pop column, drop duplicate rows, rename 'geometry' column to 'geom' (optional)
shp_df = shp_df[['GEOID10', 'geometry']].drop_duplicates('GEOID10')
shp_df

Unnamed: 0,GEOID10,geometry
0,06037650004,"POLYGON ((-118.33514 33.88002, -118.33320 33.8..."
1,06037650401,"POLYGON ((-118.35360 33.85025, -118.35359 33.8..."
2,06037650901,"POLYGON ((-118.32875 33.84683, -118.32868 33.8..."
3,06037500403,"POLYGON ((-118.07272 33.99598, -118.07210 33.9..."
4,06037212702,"POLYGON ((-118.33292 34.05894, -118.33211 34.0..."
...,...,...
8052,06037910001,"POLYGON ((-117.97775 34.55814, -117.97767 34.5..."
8053,06037920011,"POLYGON ((-118.54254 34.48617, -118.54247 34.4..."
8054,06037920012,"POLYGON ((-118.45431 34.59261, -118.45415 34.5..."
8055,06037920030,"POLYGON ((-118.50267 34.41850, -118.50229 34.4..."


In [49]:
# Join the 2 dataframes based on GEOID10
df = pd.merge(shp_df, pop_df, on='GEOID10')
df

Unnamed: 0,GEOID10,geometry,N_9Grade,N_Diploma
0,06037650004,"POLYGON ((-118.33514 33.88002, -118.33320 33.8...",3.4,4.5
1,06037650401,"POLYGON ((-118.35360 33.85025, -118.35359 33.8...",1.9,2.3
2,06037650901,"POLYGON ((-118.32875 33.84683, -118.32868 33.8...",2.7,2.0
3,06037500403,"POLYGON ((-118.07272 33.99598, -118.07210 33.9...",10.1,9.7
4,06037212702,"POLYGON ((-118.33292 34.05894, -118.33211 34.0...",3.9,4.1
...,...,...,...,...
8051,06037910001,"POLYGON ((-117.97775 34.55814, -117.97767 34.5...",20.9,15.7
8052,06037920011,"POLYGON ((-118.54254 34.48617, -118.54247 34.4...",15.2,5.1
8053,06037920012,"POLYGON ((-118.45431 34.59261, -118.45415 34.5...",3.8,4.2
8054,06037920030,"POLYGON ((-118.50267 34.41850, -118.50229 34.4...",4.8,6.0


In [50]:
# Output to shpfile
df.to_file('Education.shp')