# Get Vacancy Data

This file uses the Census API to get data on vacant housing units nationwide at the block group from the 2020 Decennial Census.

In [15]:
from config import CENSUS_KEY
import requests
import pandas as pd
import json

In [23]:
# Get state & territory FIPS codes
f = requests.get("https://api.census.gov/data/2020/dec/pl?get=NAME&for=state:*&key=" + CENSUS_KEY)
states = json.loads(f.text)
states_df = pd.DataFrame(states[1:], columns = states[0])
states_df.head()

Unnamed: 0,NAME,state
0,Alabama,1
1,Alaska,2
2,Arizona,4
3,Arkansas,5
4,California,6


In [28]:
# Get county FIPS codes from state
def get_county_fips(state_fips):
    f = requests.get("https://api.census.gov/data/2020/dec/pl?get=NAME&for=county:*&in=state:" + state_fips + "&key=" + CENSUS_KEY)
    counties = json.loads(f.text)
    return pd.DataFrame(counties[1:], columns = counties[0]) 

get_county_fips("01")

Unnamed: 0,NAME,state,county
0,"Covington County, Alabama",01,039
1,"Dale County, Alabama",01,045
2,"Henry County, Alabama",01,067
3,"Elmore County, Alabama",01,051
4,"Etowah County, Alabama",01,055
...,...,...,...
62,"Sumter County, Alabama",01,119
63,"Tallapoosa County, Alabama",01,123
64,"Tuscaloosa County, Alabama",01,125
65,"Washington County, Alabama",01,129


In [27]:
#variable key: https://api.census.gov/data/2020/dec/pl/variables.html
variables = [
    ("H1_001N", "total_units"),
    ("H1_002N", "occupied_units"),
    ("H1_003N", "vacant_units"),
];

In [37]:
rows = None

for i, state in states_df.iterrows(): 
    STATE_CODE = state["state"]
    STATE_NAME = state["NAME"]
    
    counties_df = get_county_fips(STATE_CODE)
    for i, county in counties_df.iterrows(): 
        COUNTY_CODE = county["county"]
        COUNTY_NAME = county["NAME"]

        url = "https://api.census.gov/data/2020/dec/pl?key=" + CENSUS_KEY + "&get=" + (",".join(list(map(lambda x: x[0], variables)))) + "&for=block%20group:*&in=state:" + STATE_CODE + "+county:" + COUNTY_CODE
        f = requests.get(url)
        blocks = json.loads(f.text)
        
        if rows == None: rows = blocks 
        else: rows += blocks[1:]

bgs = pd.DataFrame(rows[1:], columns = rows[0]) 
print(bgs.shape)
bgs.head()

(242335, 7)


Unnamed: 0,H1_001N,H1_002N,H1_003N,state,county,tract,block group
0,473,338,135,1,39,961600,2
1,378,325,53,1,39,961600,3
2,803,627,176,1,39,961700,1
3,338,293,45,1,39,961700,2
4,773,635,138,1,39,961700,3


In [38]:
bgs.to_csv("data/bgs.csv")