In [1]:
import requests
import json
import pandas as pd
import geopandas as gpd
import shapely

## CENSUS Block Group

In [2]:
fips = gpd.read_file('../data/cb_2021_47_bg_500k/cb_2021_47_bg_500k.shp')

In [3]:
davidson_fips = fips.loc[fips['COUNTYFP'] == '037']
davidson_fips = davidson_fips[['GEOID', 'geometry']]

In [None]:
davidson_fips.to_csv('../data/davidson_fips.csv')

## Metro Parks Boundaries API
Metro park API offered by https://data.nashville.gov/

In [4]:
# Variables for API
select = 'select=the_geom,name,common_nam,year_estab,status,acres'
limit = '&$limit=50000'

# endpoint
query_url = f'https://data.nashville.gov/resource/544k-ba3u.geojson?${select}{limit}'

In [5]:
# create gdf
parks = gpd.read_file(query_url)

In [6]:
# Clean up dataframe
parks = parks[['name', 'common_nam', 'year_estab', 'status', 'acres', 'geometry']]

In [None]:
parks.to_csv('../data/parks.csv')

## Spaital Join BG + Parks Boundaries

In [7]:
davidson_fips.crs

<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - NAD83
- bounds: (167.65, 14.92, -47.74, 86.46)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [14]:
parks = parks.set_crs(epsg = 4269, allow_override = True)

In [15]:
davidson_fips_park = gpd.sjoin(davidson_fips, parks[['common_nam', 'year_estab', 'geometry']], 
          how = 'left', 
          op = 'intersects')\
    .sort_values(['GEOID', 'year_estab'])\
    .drop(columns = 'geometry')\
    .rename(columns = {'index_right' : 'park_no'})

In [16]:
davidson_fips_park.to_csv('../data/davidson_fips_park.csv')

In [55]:
davidson_fips_park['park_no'] = davidson_fips_park['park_no'].astype(str)

In [59]:
davidson_fips_park['park_no'] = davidson_fips_park['park_no'].str.extract('(\d+).')

In [60]:
davidson_fips_park.to_csv('../data/davidson_fips_park.csv')

## Decennial

In [61]:
dec_2010 = pd.read_csv('../data/DECENNIALSF32000.H009-2023-04-17T195749.csv')

In [69]:
dec_2010 = dec_2010.T.reset_index()
dec_2010.columns = dec_2010.iloc[0]
dec_2010 = dec_2010[1:]
dec_2010 = dec_2010.rename(columns = {'Label (Grouping)' : 'label'})

In [75]:
dec_2010['label'].str.extract('Group (\d+),')

Unnamed: 0,0
1,
2,
3,
4,
5,
6,
7,
8,


In [71]:
dec_2010

Unnamed: 0,label,"Block Group 1, Census Tract 101.01, Davidson County, Tennessee","Block Group 2, Census Tract 101.01, Davidson County, Tennessee","Block Group 3, Census Tract 101.01, Davidson County, Tennessee","Block Group 4, Census Tract 101.01, Davidson County, Tennessee","Block Group 5, Census Tract 101.01, Davidson County, Tennessee","Block Group 1, Census Tract 101.02, Davidson County, Tennessee","Block Group 2, Census Tract 101.02, Davidson County, Tennessee","Block Group 3, Census Tract 101.02, Davidson County, Tennessee","Block Group 4, Census Tract 101.02, Davidson County, Tennessee",...,"Block Group 1, Census Tract 191.09, Davidson County, Tennessee","Block Group 2, Census Tract 191.09, Davidson County, Tennessee","Block Group 3, Census Tract 191.09, Davidson County, Tennessee","Block Group 1, Census Tract 191.10, Davidson County, Tennessee","Block Group 2, Census Tract 191.10, Davidson County, Tennessee","Block Group 3, Census Tract 191.10, Davidson County, Tennessee","Block Group 1, Census Tract 191.11, Davidson County, Tennessee","Block Group 1, Census Tract 191.12, Davidson County, Tennessee","Block Group 1, Census Tract 191.13, Davidson County, Tennessee","Block Group 1, Census Tract 191.14, Davidson County, Tennessee"
1,Total:,291,526,557,346,456,306,256,265,328,...,559,441,621,366,275,608,1867,890,2528,756
2,Householder who is White alone,291,465,557,334,426,27,0,40,207,...,354,379,425,302,205,473,1330,575,1855,699
3,Householder who is Black or African Americ...,0,61,0,0,12,270,244,218,101,...,152,42,167,48,34,95,412,216,572,28
4,Householder who is American Indian and Ala...,0,0,0,0,0,0,0,7,0,...,0,0,0,0,9,0,9,0,9,0
5,Householder who is Asian alone,0,0,0,0,10,9,12,0,20,...,0,20,8,8,27,8,45,29,33,0
6,Householder who is Native Hawaiian and Oth...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Householder who is Some other race alone,0,0,0,0,0,0,0,0,0,...,26,0,4,0,0,21,11,59,41,9
8,Householder who is Two or more races,0,0,0,12,8,0,0,0,0,...,27,0,17,8,0,11,60,11,18,20


## American Community Survey 5-Year API

In [125]:
# API key
with open('api_key.json') as fi:
    credentials = json.load(fi)

IndentationError: expected an indented block (3494986504.py, line 3)

In [None]:
DP4_C57

In [135]:
query_url = "https://api.census.gov/data/2000/dec/sf3profile?get=DP4_C57&for=block%20group:*&in=state:47&in=county:037&in=tract:*&key=4f42dc16d5e52e4a14a2e683141484c4f39714dd"

# get response and create df
response = requests.get(query_url)
temp_df = pd.DataFrame(response.json())

In [129]:
temp_df

Unnamed: 0,0,1,2,3,4
0,DP4_C57,state,county,tract,block group
1,20,47,037,010101,1
2,66,47,037,010101,2
3,68,47,037,010101,3
4,24,47,037,010101,4
...,...,...,...,...,...
463,73,47,037,019110,3
464,842,47,037,019111,1
465,177,47,037,019112,1
466,1145,47,037,019113,1


In [132]:
## 2010

query_url = f"https://api.census.gov/data/2000/dec/pl?get=H039001&for=state:*&key=4f42dc16d5e52e4a14a2e683141484c4f39714dd"

# get response and create df
response = requests.get(query_url)
temp_df = pd.DataFrame(response.json())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [32]:
race_var = 'B02001_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B02001_008E,B03001_003E'

race_dict = {'B02001_001E' : 'Total', 
             'B02001_002E' : 'White',
             'B02001_003E' : 'Black',
             'B02001_004E' : 'American Indian',
             'B02001_005E' : 'Asian',
             'B02001_006E' : 'Native Hawaiian',
             'B02001_007E' : 'Some other race alone',
             'B02001_008E' : 'Two or more races',
             'B03001_003E' : 'Hispanic or Latino'}

moved_var = 'B25038_001E,B25038_002E,B25038_003E,B25038_004E,B25038_005E,B25038_006E,B25038_007E,B25038_008E,B25038_009E,B25038_010E,B25038_011E,B25038_012E,B25038_013E,B25038_014E,B25038_015E'

moved_dict = 

In [78]:
# ACS 5-years
census_api_key = '&key=' + credentials['census']
host = 'https://api.census.gov/data'
dataset = '/acs/acs5'
location = '&for=block%20group:*&in=state:47&in=county:037&in=tract:*'

# Variables
variables = 'B02001_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B02001_008E,B02001_009E,B02001_010E,B03001_003E'

# create empty df
res_df = pd.DataFrame()

# for loop to get acs 5-years data from 2013 to 2020
for y in range(2012, 2013+1) :
    
    # query
    query_url = f"{host}/{y}{dataset}?get={variables}{location}{census_api_key}"

    # get response and create df
    response = requests.get(query_url)
    temp_df = pd.DataFrame(response.json())

    # clean-up
    temp_df.columns = temp_df.iloc[0]
    temp_df = temp_df[1:]
    temp_df['year'] = y
    temp_df['geoid'] = temp_df['state'].str.cat(temp_df[['county', 'tract', 'block group']])
    temp_df = temp_df.drop(columns = ['state', 'county', 'tract', 'block group'])
    
    # add temp_df to res_df
    res_df = pd.concat([res_df, temp_df])
    
    print(y)

response

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
res_df.loc[res_df['geoid'].str.contains('10103')]

In [None]:
res_df.to_csv('../data/year_moved.csv')

## Variables list

In [136]:
# get variables.json file
endpoint = 'https://api.census.gov/data/2000/dec/sf3profile/variables.json'
response = requests.get(endpoint)

# convert json to df
variables_dec_list = response.json()
variables_dec_list = pd.DataFrame(variables_dec_list['variables']).transpose().reset_index()

In [115]:
variables_dec_list.loc[variables_dec_list['index'].str.contains('H039')]

Unnamed: 0,index,label,concept,predicateType,group,limit,predicateOnly,hasGeoCollectionSupport,attributes,required,values
214,PCT068H039,Total!!Female!!21 to 64 years!!With a disabili...,SEX BY AGE BY DISABILITY STATUS BY EMPLOYMENT ...,int,PCT068H,0,,,,,
3946,P160H039,Total!!Income in 1999 at or above poverty leve...,POVERTY STATUS IN 1999 OF FAMILIES BY FAMILY T...,int,P160H,0,,,,,
4552,PCT075H039,Total!!Income in 1999 at or above poverty leve...,POVERTY STATUS IN 1999 BY SEX BY AGE (HISPANIC...,int,PCT075H,0,,,,,
6342,H039001,Median year householder moved into unit!!Total,MEDIAN YEAR HOUSEHOLDER MOVED INTO UNIT BY TEN...,int,H039,0,,,H039001A,,
6343,H039002,Median year householder moved into unit!!Owner...,MEDIAN YEAR HOUSEHOLDER MOVED INTO UNIT BY TEN...,int,H039,0,,,H039002A,,
6344,H039003,Median year householder moved into unit!!Rente...,MEDIAN YEAR HOUSEHOLDER MOVED INTO UNIT BY TEN...,int,H039,0,,,H039003A,,
8018,PCT071H039,Total!!Female!!Worked in 1999!!Usually worked ...,SEX BY WORK STATUS IN 1999 BY USUAL HOURS WORK...,int,PCT071H,0,,,,,
8653,P145H039,Total!!Female!!45 to 49 years,SEX BY AGE (HISPANIC OR LATINO) [49],int,P145H,0,,,,,
11044,PCT072H039,"Total!!Householder 35 to 44 years!!$15,000 to ...",AGE OF HOUSEHOLDER BY HOUSEHOLD INCOME IN 1999...,int,PCT072H,0,,,,,
14221,PCT073H039,"Total!!Male!!Other!!With earnings!!$25,000 to ...",SEX BY WORK EXPERIENCE IN 1999 BY EARNINGS IN ...,int,PCT073H,0,,,,,


In [96]:
variables_dec_list.loc[variables_dec_list['label'].str.contains('Move')]

Unnamed: 0,index,label,concept,predicateType,group,limit,predicateOnly,hasGeoCollectionSupport,attributes,required


In [97]:
# get variables.json file
endpoint = 'https://api.census.gov/data/2021/acs/acs5/variables.json'
response = requests.get(endpoint)

# convert json to df
variables_acs5_list = response.json()
variables_acs5_list = pd.DataFrame(variables_acs5_list['variables']).transpose().reset_index()

In [48]:
variables_list.loc[variables_list['index'].isin(['B02001_001E','B02001_002E','B02001_003E','B02001_004E','B02001_005E','B02001_006E','B02001_007E','B02001_008E','B02001_009E','B02001_010E','B03001_003E'])]

Unnamed: 0,group,index,label,concept,predicateType,limit,attributes
26644,B03,B03001_003E,Estimate!!Total:!!Hispanic or Latino:,HISPANIC OR LATINO ORIGIN BY SPECIFIC ORIGIN,int,0,"B03001_003EA,B03001_003M,B03001_003MA"
26807,B02,B02001_009E,Estimate!!Total:!!Two or more races:!!Two race...,RACE,int,0,"B02001_009EA,B02001_009M,B02001_009MA"
26811,B02,B02001_007E,Estimate!!Total:!!Some other race alone,RACE,int,0,"B02001_007EA,B02001_007M,B02001_007MA"
26812,B02,B02001_008E,Estimate!!Total:!!Two or more races:,RACE,int,0,"B02001_008EA,B02001_008M,B02001_008MA"
26862,B02,B02001_010E,Estimate!!Total:!!Two or more races:!!Two race...,RACE,int,0,"B02001_010EA,B02001_010M,B02001_010MA"
26962,B02,B02001_002E,Estimate!!Total:!!White alone,RACE,int,0,"B02001_002EA,B02001_002M,B02001_002MA"
26965,B02,B02001_001E,Estimate!!Total:,RACE,int,0,"B02001_001EA,B02001_001M,B02001_001MA"
26969,B02,B02001_006E,Estimate!!Total:!!Native Hawaiian and Other Pa...,RACE,int,0,"B02001_006EA,B02001_006M,B02001_006MA"
26971,B02,B02001_005E,Estimate!!Total:!!Asian alone,RACE,int,0,"B02001_005EA,B02001_005M,B02001_005MA"
26973,B02,B02001_004E,Estimate!!Total:!!American Indian and Alaska N...,RACE,int,0,"B02001_004EA,B02001_004M,B02001_004MA"


In [None]:
# 1st row of each groups
variables_cat = variables_list\
    .sort_values('index')\
    .groupby('group').nth(0)

race

In [None]:
variables_list_movedin = variables_list\
    .loc[(variables_list['group'] == 'B03')]\
    [['index','label']]

In [None]:
variables_list_movedin.to_csv('qwae.csv')

In [None]:
variables_list.loc[variables_list['label'].str.contains('Latino')]

In [None]:
variables_list_movedin.sort_values('index').to_csv('adff.csv')

In [None]:
variables_list_movedin.loc[variables_list_movedin['index'].str.contains('B25038')].to_csv('asd.csv')

In [None]:
Housing units with a mortgage
Housing units without a mortgage

B25027_001E
B25027_002E
B25027_003E
B25027_004E
B25027_005E
B25027_006E
B25027_007E
B25027_008E
B25027_009E
B25027_010E
B25027_011E
B25027_012E
B25027_013E
B25027_014E
B25027_015E
B25027_016E
B25027_017E


In [None]:
Estimate!!Total population in occupied housing units:!!Owner occupied:
Estimate!!Total population in occupied housing units:!!Renter occupied:

B25026_001E
B25026_002E
B25026_003E
B25026_004E
B25026_005E
B25026_006E
B25026_007E
B25026_008E
B25026_009E
B25026_010E
B25026_011E
B25026_012E
B25026_013E
B25026_014E
B25026_015E


In [None]:
B25032_002E	Estimate!!Total:!!Owner-occupied housing units:
B25032_003E	Estimate!!Total:!!Owner-occupied housing units:!!1, detached
B25032_004E	Estimate!!Total:!!Owner-occupied housing units:!!1, attached
B25032_005E	Estimate!!Total:!!Owner-occupied housing units:!!2
B25032_006E	Estimate!!Total:!!Owner-occupied housing units:!!3 or 4
B25032_007E	Estimate!!Total:!!Owner-occupied housing units:!!5 to 9
B25032_008E	Estimate!!Total:!!Owner-occupied housing units:!!10 to 19
B25032_009E	Estimate!!Total:!!Owner-occupied housing units:!!20 to 49
B25032_010E	Estimate!!Total:!!Owner-occupied housing units:!!50 or more
B25032_011E	Estimate!!Total:!!Owner-occupied housing units:!!Mobile home
B25032_012E	Estimate!!Total:!!Owner-occupied housing units:!!Boat, RV, van, etc.
B25032_013E	Estimate!!Total:!!Renter-occupied housing units:
B25032_014E	Estimate!!Total:!!Renter-occupied housing units:!!1, detached
B25032_015E	Estimate!!Total:!!Renter-occupied housing units:!!1, attached
B25032_016E	Estimate!!Total:!!Renter-occupied housing units:!!2
B25032_017E	Estimate!!Total:!!Renter-occupied housing units:!!3 or 4
B25032_018E	Estimate!!Total:!!Renter-occupied housing units:!!5 to 9
B25032_019E	Estimate!!Total:!!Renter-occupied housing units:!!10 to 19
B25032_020E	Estimate!!Total:!!Renter-occupied housing units:!!20 to 49
B25032_021E	Estimate!!Total:!!Renter-occupied housing units:!!50 or more
B25032_022E	Estimate!!Total:!!Renter-occupied housing units:!!Mobile home
B25032_023E	Estimate!!Total:!!Renter-occupied housing units:!!Boat, RV, van, etc.

        
B25032_002E
B25032_003E
B25032_004E
B25032_005E
B25032_006E
B25032_007E
B25032_008E
B25032_009E
B25032_010E
B25032_011E
B25032_012E
B25032_013E
B25032_014E
B25032_015E
B25032_016E
B25032_017E
B25032_018E
B25032_019E
B25032_020E
B25032_021E
B25032_022E
B25032_023E


In [None]:
GEO_ID
P1_001N
P1_002N
P1_003N
P1_004N
P1_005N
P1_006N
P1_007N
P1_008N
P2_002N