In [1]:
import json

import requests
import pandas as pd
from pathlib import Path

import vacancyrates
from censuscodes import county_lookup

In [2]:
# Set up the project resources directory, and the vacancy rates output directory
project_resources_dir = "resources"
project_resources_path = Path(project_resources_dir)

# Set to True to force download from the Census Bureau's API
force_download = False

In [3]:
# Range of years for the project
project_years = list(range(2012, 2023))

# All counties for the project
project_counties = list(map(county_lookup.by.full_name.get, [
    # Counties for NYC boroughs
    "Bronx County, New York",
    "Kings County, New York",  # Brooklyn
    "New York County, New York",  # Manhattan
    "Queens County, New York",
    "Richmond County, New York",  # Staten Island
    
    # Counties adjacent to NYC boroughs
    "Westchester County, New York",
    "Rockland County, New York",
    "Nassau County, New York",
    "Bergen County, New Jersey",
    "Essex County, New Jersey",
    "Hudson County, New Jersey",
    "Middlesex County, New Jersey",
    "Union County, New Jersey",
    "Fairfield County, Connecticut",
]))

In [4]:

# Display year and county information for all project years and counties
print(f"Project Years: {project_years}")
print("Project Counties:")
for county in project_counties:
    print(f"- {county.full_name} ("
          f"state_fips={county.state_fips}, "
          f"county_fips={county.fips})")

Project Years: [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
Project Counties:
- Bronx County, New York (state_fips=36, county_fips=005)
- Kings County, New York (state_fips=36, county_fips=047)
- New York County, New York (state_fips=36, county_fips=061)
- Queens County, New York (state_fips=36, county_fips=081)
- Richmond County, New York (state_fips=36, county_fips=085)
- Westchester County, New York (state_fips=36, county_fips=119)
- Rockland County, New York (state_fips=36, county_fips=087)
- Nassau County, New York (state_fips=36, county_fips=059)
- Bergen County, New Jersey (state_fips=34, county_fips=003)
- Essex County, New Jersey (state_fips=34, county_fips=013)
- Hudson County, New Jersey (state_fips=34, county_fips=017)
- Middlesex County, New Jersey (state_fips=34, county_fips=023)
- Union County, New Jersey (state_fips=34, county_fips=039)
- Fairfield County, Connecticut (state_fips=09, county_fips=001)


In [5]:
# If a csv file exists and we don't want to force a download, load the data
# from the csv file and return it.
csv_path = Path(project_resources_dir) / "vacancy_rates.csv"
if csv_path.exists() and not force_download:
    merged_df = pd.read_csv(csv_path)
else:
    # Otherwise, iterate over the years and counties to get the data, ...
    dfs = []
    for year in project_years:
        for county in project_counties:
            df = vacancyrates.get_acs_1y_vacancy_rates(year, county)
            dfs.append(df)

    # ... merge the data, rename and reorder the columns, ...
    merged_df = pd.concat(dfs).reset_index(drop=True)
    merged_df = merged_df.rename(columns={
        "DP04_0004E": "homeowner_vacancy_rate",
        "DP04_0005E": "rental_vacancy_rate",
    })
    merged_df = merged_df[["year", "state", "county",
                            "homeowner_vacancy_rate", "rental_vacancy_rate"]]

    # ... and save the data to a csv file.  Saving the CSV file allows us to
    # avoid making time-consuming API requests to the Census Bureau in the
    # future, as well as merging, column renaming and reordering, etc.
    merged_df.to_csv(csv_path, index=False)

# Display the first few rows of the merged data frame to review the data
merged_df.head()

GET: https://api.census.gov/data/2012/acs/acs1/profile?get=DP04_0004E%2CDP04_0005E&for=county%3A005&in=state%3A36&key=API_KEY_REDACTED
GET: https://api.census.gov/data/2012/acs/acs1/profile?get=DP04_0004E%2CDP04_0005E&for=county%3A047&in=state%3A36&key=API_KEY_REDACTED
GET: https://api.census.gov/data/2012/acs/acs1/profile?get=DP04_0004E%2CDP04_0005E&for=county%3A061&in=state%3A36&key=API_KEY_REDACTED
GET: https://api.census.gov/data/2012/acs/acs1/profile?get=DP04_0004E%2CDP04_0005E&for=county%3A081&in=state%3A36&key=API_KEY_REDACTED
GET: https://api.census.gov/data/2012/acs/acs1/profile?get=DP04_0004E%2CDP04_0005E&for=county%3A085&in=state%3A36&key=API_KEY_REDACTED
GET: https://api.census.gov/data/2012/acs/acs1/profile?get=DP04_0004E%2CDP04_0005E&for=county%3A119&in=state%3A36&key=API_KEY_REDACTED
GET: https://api.census.gov/data/2012/acs/acs1/profile?get=DP04_0004E%2CDP04_0005E&for=county%3A087&in=state%3A36&key=API_KEY_REDACTED
GET: https://api.census.gov/data/2012/acs/acs1/profile?

RuntimeError: Failed to get data: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>