# Get Vacancy Data

This file uses the Census API to get data on vacant housing units nationwide at the block group from the 2020 Decennial Census.

In [1]:
from config import CENSUS_KEY
import requests
import pandas as pd
import json

In [23]:
# Get state & territory FIPS codes
f = requests.get("https://api.census.gov/data/2020/dec/pl?get=NAME&for=state:*&key=" + CENSUS_KEY)
states = json.loads(f.text)
states_df = pd.DataFrame(states[1:], columns = states[0])
states_df.head()

Unnamed: 0,NAME,state
0,Alabama,1
1,Alaska,2
2,Arizona,4
3,Arkansas,5
4,California,6


In [28]:
# Get county FIPS codes from state
def get_county_fips(state_fips):
    f = requests.get("https://api.census.gov/data/2020/dec/pl?get=NAME&for=county:*&in=state:" + state_fips + "&key=" + CENSUS_KEY)
    counties = json.loads(f.text)
    return pd.DataFrame(counties[1:], columns = counties[0]) 

get_county_fips("01")

Unnamed: 0,NAME,state,county
0,"Covington County, Alabama",01,039
1,"Dale County, Alabama",01,045
2,"Henry County, Alabama",01,067
3,"Elmore County, Alabama",01,051
4,"Etowah County, Alabama",01,055
...,...,...,...
62,"Sumter County, Alabama",01,119
63,"Tallapoosa County, Alabama",01,123
64,"Tuscaloosa County, Alabama",01,125
65,"Washington County, Alabama",01,129


In [27]:
#variable key: https://api.census.gov/data/2020/dec/pl/variables.html
variables = [
    ("H1_001N", "total_units"),
    ("H1_002N", "occupied_units"),
    ("H1_003N", "vacant_units"),
];

In [None]:
rows = None

for i, state in states_df.iterrows(): 
    STATE_CODE = state["state"]
    STATE_NAME = state["NAME"]
    
    counties_df = get_county_fips(STATE_CODE)
    for i, county in counties_df.iterrows(): 
        COUNTY_CODE = county["county"]
        COUNTY_NAME = county["NAME"]

        url = "https://api.census.gov/data/2020/dec/pl?key=" + CENSUS_KEY + "&get=" + (",".join(list(map(lambda x: x[0], variables)))) + "&for=block%20group:*&in=state:" + STATE_CODE + "+county:" + COUNTY_CODE
        f = requests.get(url)
        blocks = json.loads(f.text)
        
        if rows == None: rows = blocks 
        else: rows += blocks[1:]

bgs = pd.DataFrame(rows[1:], columns = rows[0]) 
print(bgs.shape)
bgs.head()

In [38]:
bgs.to_csv("data/bgs.csv")

## Get Block Groups

In [6]:
from bs4 import BeautifulSoup

In [24]:
# Download bgs from each state/territory
base_url = "https://www2.census.gov/geo/tiger/TIGER2020/BG/"
f = requests.get(base_url)
soup = BeautifulSoup(f.text, 'html.parser')
table = soup.find("table")

for row in table.find_all("tr")[3:-1]:
    file = row.find_all("td")[1]
    name = file.get_text()
    url = base_url + name
    
    print("downloading", name)
    r = requests.get(url, allow_redirects=True)
    open('data/bg_shapefiles/' + name, 'wb').write(r.content)

downloading tl_2020_01_bg.zip
downloading tl_2020_02_bg.zip
downloading tl_2020_04_bg.zip
downloading tl_2020_05_bg.zip
downloading tl_2020_06_bg.zip
downloading tl_2020_08_bg.zip
downloading tl_2020_09_bg.zip
downloading tl_2020_10_bg.zip
downloading tl_2020_11_bg.zip
downloading tl_2020_12_bg.zip
downloading tl_2020_13_bg.zip
downloading tl_2020_15_bg.zip
downloading tl_2020_16_bg.zip
downloading tl_2020_17_bg.zip
downloading tl_2020_18_bg.zip
downloading tl_2020_19_bg.zip
downloading tl_2020_20_bg.zip
downloading tl_2020_21_bg.zip
downloading tl_2020_22_bg.zip
downloading tl_2020_23_bg.zip
downloading tl_2020_24_bg.zip
downloading tl_2020_25_bg.zip
downloading tl_2020_26_bg.zip
downloading tl_2020_27_bg.zip
downloading tl_2020_28_bg.zip
downloading tl_2020_29_bg.zip
downloading tl_2020_30_bg.zip
downloading tl_2020_31_bg.zip
downloading tl_2020_32_bg.zip
downloading tl_2020_33_bg.zip
downloading tl_2020_34_bg.zip
downloading tl_2020_35_bg.zip
downloading tl_2020_36_bg.zip
downloadin

In [29]:
# Unzipping all files 
from os import listdir
import zipfile

for f in filter(lambda f: "zip" in f, listdir("data/bg_shapefiles/")):
    print("unzipping", f)
    with zipfile.ZipFile("data/bg_shapefiles/" + f, 'r') as zip_ref:
        zip_ref.extractall("data/bg_shapefiles/" + f.replace(".zip", "/"))

unzipping tl_2020_12_bg.zip
unzipping tl_2020_36_bg.zip
unzipping tl_2020_55_bg.zip
unzipping tl_2020_28_bg.zip
unzipping tl_2020_24_bg.zip
unzipping tl_2020_47_bg.zip
unzipping tl_2020_02_bg.zip
unzipping tl_2020_10_bg.zip
unzipping tl_2020_45_bg.zip
unzipping tl_2020_38_bg.zip
unzipping tl_2020_26_bg.zip
unzipping tl_2020_49_bg.zip
unzipping tl_2020_34_bg.zip
unzipping tl_2020_53_bg.zip
unzipping tl_2020_30_bg.zip
unzipping tl_2020_41_bg.zip
unzipping tl_2020_22_bg.zip
unzipping tl_2020_69_bg.zip
unzipping tl_2020_18_bg.zip
unzipping tl_2020_06_bg.zip
unzipping tl_2020_20_bg.zip
unzipping tl_2020_32_bg.zip
unzipping tl_2020_51_bg.zip
unzipping tl_2020_04_bg.zip
unzipping tl_2020_16_bg.zip
unzipping tl_2020_08_bg.zip
unzipping tl_2020_29_bg.zip
unzipping tl_2020_54_bg.zip
unzipping tl_2020_37_bg.zip
unzipping tl_2020_46_bg.zip
unzipping tl_2020_25_bg.zip
unzipping tl_2020_13_bg.zip
unzipping tl_2020_01_bg.zip
unzipping tl_2020_27_bg.zip
unzipping tl_2020_39_bg.zip
unzipping tl_2020_44

In [None]:
# Merge all files 
import geopandas as gpd
shapefiles = []

for f in filter(lambda f: "zip" in f, listdir("data/bg_shapefiles/")):
    path = "data/bg_shapefiles/" + f.replace(".zip", "/") + f.replace(".zip", ".shp")
    print("reading", f)
    shapefiles.append(gpd.read_file(path))
    
gdf = gpd.GeoDataFrame(pd.concat(shapefiles))

In [35]:
gdf.to_file("data/bgs_2020/bgs_2020.shp")