In [1]:
import pandas as pd
import geopandas as gpd
import shapely
import requests
import json
import math

## CENSUS Block Group Cartographic Boundary Files

In [2]:
# read block group shape-file from CENSUS
fips_bg = gpd.read_file('../data/cb_2021_us_all_500k/cb_2021_47_bg_500k/cb_2021_47_bg_500k.shp')

In [3]:
# Extract TN, Davidson county
davidson_fips_bg = fips_bg\
    .loc[fips_bg['COUNTYFP'] == '037']\
    [['GEOID', 'geometry']]

In [5]:
# Export to csv
davidson_fips_bg.to_csv('../data/davidson_fips_bg.csv')

## CENSUS Tract Cartographic Boundary Files

In [6]:
# read tract shape-file from CENSUS
fips_tract = gpd.read_file('../data/cb_2021_us_all_500k/cb_2021_us_tract_500k/cb_2021_us_tract_500k.shp')

In [7]:
# Extract TN, Davidson county
davidson_fips_tract = fips_tract\
    .loc[fips_tract['COUNTYFP'] == '037']\
    [['GEOID', 'geometry']]

In [8]:
# Export to csv
davidson_fips_tract.to_csv('../data/davidson_fips_tract.csv')

## Metro Parks Boundaries API
Metro park API offered by https://data.nashville.gov/

In [9]:
# Variables for API query
select = 'select=the_geom,name,common_nam,year_estab,status,acres'
limit = '&$limit=50000'

# Endpoint
query_url = f'https://data.nashville.gov/resource/544k-ba3u.geojson?${select}{limit}'

In [10]:
# Create gdf
parks = gpd.read_file(query_url)

In [11]:
# Clean up dataframe
parks = parks[['name', 'common_nam', 'year_estab', 'status', 'acres', 'geometry']]

In [12]:
# Export to csv
parks.to_csv('../data/parks.csv')

## Spaital Join FIPS + Parks Boundaries

In [13]:
# check crs of shape-file
print(davidson_fips_bg.crs)
print(davidson_fips_tract.crs)

epsg:4269
epsg:4269


In [14]:
# Change crs of parks gdf
parks = parks.set_crs(epsg = 4269, allow_override = True)

In [15]:
# Spatial join between block group and parks gdf
davidson_fips_bg_park = \
    gpd.sjoin(
        davidson_fips_bg, parks[['geometry']], 
        how = 'left', 
        op = 'intersects')\
    .sort_values('GEOID')\
    .drop(columns = ['geometry'])\
    .rename(columns = {'index_right' : 'park_no'})

# clean-up
davidson_fips_bg_park['park_no'] = davidson_fips_bg_park['park_no']\
    .astype(str).str.extract('(\d+).')

# Spatial join between block group and parks gdf
davidson_fips_tract_park = \
    gpd.sjoin(
        davidson_fips_tract, parks[['geometry']], 
        how = 'left', 
        op = 'intersects')\
    .sort_values('GEOID')\
    .drop(columns = ['geometry'])\
    .rename(columns = {'index_right' : 'park_no'})

# clean-up
davidson_fips_tract_park['park_no'] = davidson_fips_tract_park['park_no']\
    .astype(str).str.extract('(\d+).')

In [16]:
# Export to csv
davidson_fips_bg_park.to_csv('../data/davidson_fips_bg_park.csv')
davidson_fips_tract_park.to_csv('../data/davidson_fips_tract_park.csv')

## American Community Survey 5-Year API: Block Group(2013-2021)

In [17]:
# API key
with open('api_key.json') as fi:
    credentials = json.load(fi)

In [18]:
# Import variable list
acs5_bg_variables_list = pd.read_csv('../data/acs5_variables.csv')

In [19]:
# CENSUS API
census_api_key = '&key=' + credentials['census']
host = 'https://api.census.gov/data'
dataset = '/acs/acs5'

# Location
location = '&for=block%20group:*&in=state:47&in=county:037&in=tract:*'

# create empty DataFrame
acs5_bg = pd.DataFrame()

# for loop year range
years = range(2013, 2021+1)

for y in years :
    # Empty df
    temp_all_index = pd.DataFrame()
    
    # for loop index range
    n = range(0, math.trunc(len(acs5_bg_variables_list)/50)+1)
    
    for n in n :
        # set varianles
        index_str = ','.join(acs5_bg_variables_list['index'][n*50 : (n+1)*50-1])
        variables = '?get=' + index_str 

         # query
        query_url = f"{host}/{y}{dataset}{variables}{location}{census_api_key}"

        # get response
        response = requests.get(query_url)
        temp = pd.DataFrame(response.json())

        # data clean-up
        temp.columns = temp.iloc[0]
        temp = temp[1:]
        temp['geoid'] = temp['state'].str.cat(temp[['county', 'tract', 'block group']])
        temp = temp.drop(columns = ['state', 'county', 'tract', 'block group'])
        temp_all_index = pd.concat([temp_all_index, temp], axis = 1, join = 'outer')
     
    # add year columns
    temp_all_index['year'] = y
    
    # concat all years
    acs5_bg = pd.concat([acs5_bg, temp_all_index], axis = 0)
    
    print(y)

2013
2014
2015
2016
2017
2018
2019
2020
2021


In [21]:
# Update columns name
acs5_bg.rename(columns = acs5_bg_variables_list.set_index('index')['label'], inplace = True)

In [33]:
acs5_bg = acs5_bg.loc[:, ~acs5_bg.columns.duplicated()]

In [48]:
acs5_bg['geoid']

1      470370169003
2      470370169005
3      470370188013
4      470370191053
5      470370191061
           ...     
483    470370196001
484    470370196002
485    470370196003
486    470379801001
487    470379802001
Name: geoid, Length: 4285, dtype: object

In [47]:
acs5_bg['geoid'].str.slice(stop=-1)

1      47037016900
2      47037016900
3      47037018801
4      47037019105
5      47037019106
          ...     
483    47037019600
484    47037019600
485    47037019600
486    47037980100
487    47037980200
Name: geoid, Length: 4285, dtype: object

In [38]:
# export to csv
acs5_bg.to_csv('../data/acs5_bg.csv')

## American Community Survey 5-Year API: Tract(2009+2021)

In [24]:
# Import variable list (remove unsupported variables from BG variables list)
acs5_tract_variables_list = acs5_bg_variables_list\
            .loc[~acs5_bg_variables_list['index'].str.contains('B09')]\
            .loc[~acs5_bg_variables_list['index'].str.contains('B15')]

In [25]:
# CENSUS API
census_api_key = '&key=' + credentials['census']
host = 'https://api.census.gov/data'
dataset = '/acs/acs5'

# Location
location = '&for=tract:*&in=state:47&in=county:037'

# create empty DataFrame
acs5_tract = pd.DataFrame()

# for loop year range
years = range(2009, 2021+1)

for y in years :
    # empty df
    temp_all_index = pd.DataFrame()
    
    # for loop index range
    n = range(0, math.trunc(len(acs5_tract_variables_list)/50))

    for n in n :
        # set varianles
        index_str = ','.join(acs5_tract_variables_list['index'][n*50 : n*50+49])
        variables = '?get=' + index_str 

         # query
        query_url = f"{host}/{y}{dataset}{variables}{location}{census_api_key}"

        # get response
        response = requests.get(query_url)
        temp = pd.DataFrame(response.json())

        # data clean-up
        temp.columns = temp.iloc[0]
        temp = temp[1:]
        temp['geoid'] = temp['state'].str.cat(temp[['county', 'tract']])
        temp = temp.drop(columns = ['state', 'county', 'tract'])
        temp_all_index = pd.concat([temp_all_index, temp], axis = 1, join = 'outer')
    
    # add year column
    temp_all_index['year'] = y
    
    # concat all years
    acs5_tract = pd.concat([acs5_tract, temp_all_index], axis = 0)
    
    print(y)

2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021


In [26]:
# Update columns name
acs5_tract.rename(columns = acs5_tract_variables_list.set_index('index')['label'], inplace = True)

In [39]:
acs5_tract = acs5_tract.loc[:, ~acs5_tract.columns.duplicated()]

In [40]:
# Export to csv
acs5_tract.to_csv('../data/acs5_tract.csv')

## Decennial

In [None]:
dec_2010 = pd.read_csv('../data/DECENNIALSF32000.H009-2023-04-17T195749.csv')

In [None]:
dec_2010 = dec_2010.T.reset_index()
dec_2010.columns = dec_2010.iloc[0]
dec_2010 = dec_2010[1:]
dec_2010 = dec_2010.rename(columns = {'Label (Grouping)' : 'label'})

In [None]:
dec_2010

## Variables list

ACS 5 year Variables

# get variables.json file
endpoint = 'https://api.census.gov/data/2000/dec/sf3profile/variables.json'
response = requests.get(endpoint)

# convert json to df
variables_dec_list = response.json()
variables_dec_list = pd.DataFrame(variables_dec_list['variables']).transpose().reset_index()

variables_dec_list.loc[variables_dec_list['index'].str.contains('H039')]

variables_dec_list.loc[variables_dec_list['label'].str.contains('Move')]

# get variables.json file
endpoint = 'https://api.census.gov/data/2021/acs/acs5/variables.json'
response = requests.get(endpoint)

# convert json to df
variables_acs5_list = response.json()
variables_acs5_list = pd.DataFrame(variables_acs5_list['variables']).transpose().reset_index()

variables_list.loc[variables_list['index'].isin(['B02001_001E','B02001_002E','B02001_003E','B02001_004E','B02001_005E','B02001_006E','B02001_007E','B02001_008E','B02001_009E','B02001_010E','B03001_003E'])]

# 1st row of each groups
variables_cat = variables_list\
    .sort_values('index')\
    .groupby('group').nth(0)

income 19  x
race 02 X
household type B09019 X
Educational Attainment B15003 x
occupation industry C24010 (full time over 16) x
Family Type by Presence and Age of Own Children B11003 D
Household Type by Household Size B11016 D
Housing Units B25001 X
Occupancy Status B25002 X
Tenure B25003 X
Total Population in Occupied Housing Units by Tenure by Year Householder Moved Into Unit B25026 NOT AVAILABLE
housing Value B25075 x
Lower Value Quartile (Dollars) B25076 x
Median Value (Dollars) B25077 x 
Upper Value Quartile (Dollars) B25078 x
Mortgage Status and Selected Monthly Owner Costs B25087
Median Contract Rent (Dollars) B25058
Median Selected Monthly Owner Costs as a Percentage of Household Income B25092
Family Income B19101

# create empty DataFrame
res = pd.DataFrame()


for y in range(2009, 2020+1) : 

    c = Census(credentials['census'], year = y)
    temp = c.acs5.state_county_tract((variables), states.TN.fips, '037', Census.ALL)
    
    # save data to temp_df
    temp = pd.DataFrame(temp)
    temp['year'] = y
    temp['geoid'] = temp['state'].str.cat(temp[['county', 'tract', 'block group']])
    temp = temp.drop(columns = ['state', 'county', 'tract', 'block group'])

    # add temp_df to res_df
    ref = pd.concat([ref, temp])
    
    print(y)