In [33]:
import pandas as pd
import geopandas as gpd
import numpy as np

from census import Census
from matplotlib import pyplot as plt
from tqdm import tqdm

# Notebook for Downloading Census Data

## Part 1: Decennial Census (2010, and 2020)

In [11]:
census = Census("")

In [25]:
# decennial census columns by year
# loading data for the 2020 and 2010 decennial census
dec_dict = {
    2020: {
        "P1_001N": "total",
        "P2_002N": "hispanic_latino",
        "P2_005N": "nh_white",
        "P2_006N": "nh_black",
        "P2_007N": "nh_aian",
        "P2_008N": "nh_asian",
        "P2_009N": "nh_nhpi",
        "P2_010N": "nh_other",
        "P2_011N": "nh_multi",
    },
    2010: {
        "P001001": "total",
        "P002002": "hispanic_latino",
        "P002005": "nh_white",
        "P002006": "nh_black",
        "P002007": "nh_aian",
        "P002008": "nh_asian",
        "P002009": "nh_nhpi",
        "P002010": "nh_other",
        "P002011": "nh_multi",
    },
}

# set NYS
state_fips = 36

In [26]:
# get 2020,2010 census data first
dec_gdf_list = []
for yr in [2020, 2010]:
    print(f"Loading data for decennial census {yr} for state={state_fips}")
    dec_gdf_yr = census.pl.get(
        (["NAME", "GEO_ID"] + list(dec_dict[yr].keys())),
        geo={
            "for": "tract:*",
            "in": f"state:{state_fips} county:*",
        },
        year=yr,
    )
    dec_gdf_list += [
        pd.DataFrame(dec_gdf_yr).rename(columns=dec_dict[yr]).assign(year=yr)
    ]
dec_gdf = pd.concat(dec_gdf_list)
dec_gdf.head()

Loading data for decennial census 2020 for state=36
Loading data for decennial census 2010 for state=36


Unnamed: 0,NAME,GEO_ID,total,hispanic_latino,nh_white,nh_black,nh_aian,nh_asian,nh_nhpi,nh_other,nh_multi,state,county,tract,year
0,"Census Tract 1, Albany County, New York",1400000US36001000100,2073.0,258.0,370.0,1236.0,4.0,42.0,0.0,13.0,150.0,36,1,100,2020
1,"Census Tract 2.01, Albany County, New York",1400000US36001000201,3125.0,345.0,566.0,1875.0,20.0,104.0,0.0,44.0,171.0,36,1,201,2020
2,"Census Tract 2.02, Albany County, New York",1400000US36001000202,2598.0,413.0,184.0,1805.0,3.0,61.0,0.0,12.0,120.0,36,1,202,2020
3,"Census Tract 3.01, Albany County, New York",1400000US36001000301,3190.0,524.0,497.0,1805.0,20.0,88.0,4.0,19.0,233.0,36,1,301,2020
4,"Census Tract 3.02, Albany County, New York",1400000US36001000302,3496.0,380.0,1916.0,646.0,7.0,358.0,0.0,26.0,163.0,36,1,302,2020


In [27]:
dec_gdf.shape

(10330, 15)

## Part 2: American Community Survey Data

In [28]:
# load acs data dictionary / cross-walk
acs_dict = (
    pd.read_excel("_input/acs_dict.xlsx").set_index("colname").to_dict()["textname"]
)

In [35]:
# load data from the 2018, 2020 and 2023 ACS
# for all Census tracts in NYS
acs_gdf = [
    pd.DataFrame(
        census.acs5.get(
            (tuple(["NAME", "GEO_ID"] + list(acs_dict.keys()))),
            geo={
                "for": "tract:*",
                "in": f"state:{state_fips} county:*",
            },
            year=yr,
        )
    ).assign(year=yr)
    for yr in tqdm([2018, 2020, 2023])
]

100%|██████████| 3/3 [02:15<00:00, 45.16s/it]


In [37]:
# combine dataframes
acs_gdf_clean = pd.concat(acs_gdf).rename(columns=acs_dict)
acs_gdf_clean.head()

Unnamed: 0,NAME,GEO_ID,totalpop,totalpop_male,totalpop_male_u5,totalpop_male_5to9,totalpop_male_10to14,totalpop_male_15to17,totalpop_male_18to19,totalpop_male_20,...,aggregate_income,total_over75,total_occupation,management_business_occupation,service_occupation,sales_office_occupation,natural_res_construction_occupation,production_transp_occupation,military_occupation,year
0,"Census Tract 1, Albany County, New York",1400000US36001000100,2022.0,935.0,113.0,57.0,46.0,111.0,15.0,7.0,...,36923700.0,68.0,812.0,250.0,232.0,167.0,33.0,130.0,0.0,2018
1,"Census Tract 2, Albany County, New York",1400000US36001000200,4700.0,2022.0,85.0,132.0,82.0,108.0,23.0,73.0,...,123594400.0,149.0,2267.0,934.0,566.0,370.0,48.0,349.0,0.0,2018
2,"Census Tract 3, Albany County, New York",1400000US36001000300,5966.0,2801.0,167.0,127.0,217.0,156.0,0.0,34.0,...,139489700.0,501.0,2598.0,767.0,969.0,521.0,59.0,282.0,0.0,2018
3,"Census Tract 4.01, Albany County, New York",1400000US36001000401,2479.0,913.0,0.0,0.0,42.0,10.0,13.0,0.0,...,101118800.0,999.0,933.0,503.0,139.0,226.0,33.0,32.0,0.0,2018
4,"Census Tract 4.03, Albany County, New York",1400000US36001000403,4236.0,2130.0,81.0,55.0,113.0,0.0,76.0,0.0,...,192939200.0,285.0,2654.0,1633.0,329.0,496.0,119.0,77.0,0.0,2018
