In [42]:
import json

import requests
import pandas as pd
from pathlib import Path

import apifacade as api
from censuscodes import county_lookup

In [43]:
# Set up the project resources directory, and the vacancy rates output directory
project_resources_dir = "resources"
project_resources_path = Path(project_resources_dir)

# Set to True to force download from the Census Bureau's API
force_download = False

In [44]:
# Range of years for the project
project_years = list(range(2012, 2023))

# NYC borough counties
project_counties = list(map(county_lookup.by.full_name.get, [
    "Bronx County, New York",
    "Kings County, New York",  # Brooklyn
    "New York County, New York",  # Manhattan
    "Queens County, New York",
    "Richmond County, New York",  # Staten Island
]))

In [45]:
# Display year and county information for all project years and counties
print(f"Project Years: {project_years}")
print("Project Counties:")
for county in project_counties:
    print(f"- {county.full_name} ("
          f"state_fips={county.state_fips}, "
          f"county_fips={county.fips})")

Project Years: [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
Project Counties:
- Bronx County, New York (state_fips=36, county_fips=005)
- Kings County, New York (state_fips=36, county_fips=047)
- New York County, New York (state_fips=36, county_fips=061)
- Queens County, New York (state_fips=36, county_fips=081)
- Richmond County, New York (state_fips=36, county_fips=085)


In [46]:
# If a csv file exists and we don't want to force a download, load the data
# from the csv file and return it.
csv_path = Path(project_resources_dir) / "vacancy_rates.csv"
if csv_path.exists() and not force_download:
    df = pd.read_csv(csv_path)
else:
    # Otherwise, iterate over the years and counties to get the data, ...
    dfs = []
    for year in project_years:
        for county in project_counties:
            try:
                df = api.get_vacancy_rate(year, county)
                dfs.append(df)
            except RuntimeError as e:
                print(f"Error: {e}")
                continue

    # ... merge the data, rename and reorder the columns, ...
    df = pd.concat(dfs).reset_index(drop=True)
    df = df[["year", "state_fips", "county_fips",
             "vacant_units", "total_units",
             "vacancy_rate"]]
    
    # ... reformat the state and county FIPS codes, ...
    df["state_fips"] = df["state_fips"].astype(str).str.zfill(2)
    df["county_fips"] = df["county_fips"].astype(str).str.zfill(3)
    
    # ... and save the data to a csv file.  Saving the CSV file allows us to
    # avoid making time-consuming API requests to the Census Bureau in the
    # future, as well as merging, column renaming and reordering, etc.
    df.to_csv(csv_path, index=False)

In [47]:
# Display the head and tail of the merged data frame to review the data.
print(df.head())
print(df.tail())

   year  state_fips  county_fips  vacant_units  total_units  vacancy_rate
0  2012          36            5         38073       514051      7.406464
1  2012          36           47         83781      1003114      8.352092
2  2012          36           61        103804       850490     12.205199
3  2012          36           81         59798       840147      7.117564
4  2012          36           85         14011       177479      7.894455
    year  state_fips  county_fips  vacant_units  total_units  vacancy_rate
50  2022          36            5         24950       557985      4.471446
51  2022          36           47         75068      1101429      6.815510
52  2022          36           61        119395       923239     12.932188
53  2022          36           81         72060       911913      7.902070
54  2022          36           85         14551       184497      7.886849
