# Retrieve Census Data

In [1]:
import fiona
import pandas as pd
import numpy as np
import geopandas as gpd
import requests
import pickle

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv('CENSUS_API_KEY')
print(api_key)

494f201c0638a5f1c615209eedee3711d608203d


In [3]:
nj_tract_table_picklefile_path = 'data/nj_tract_data.pkl'

In [4]:
#specify the data source by year and survey
year = '2022'
dsource = 'acs' #American Community Survey
dname = 'acs5' #5 year average from American Community Survey
base_url = f'https://api.census.gov/data/{year}/{dsource}/{dname}'


In [5]:
# this request
cols = 'NAME,B01003_001E,B02001_002E,B19013_001E' #NAME of geography as well as the variables I want to pull

In [6]:
# create picklefile nj_tract_table

def make_nj_tract_table():

    # load fips codes and iterate over counties to build up statewide dataframe
    fips_list = pd.read_csv("data/nj_county_fips.csv", dtype=object)

    county_dfs = []

    for index,county in fips_list.iterrows():

        geo = 'tract' #county geography level
        state = f'state:{county.fips_state}' 
        county = f'county:{county.fips_county}' 

        #add unique request features to the base_url
        data_url = f'{base_url}?get={cols}&for={geo}&in={state}&in={county}&key={api_key}'


        #go get the data + make df
        response = requests.get(data_url)
        data = response.json()

        df = (pd.DataFrame(data = data[1:], columns = data[0]) #first row is column names, everything else is data.
                .rename(columns = {'NAME' : 'tract_name',
                                   'B01003_001E' : 'population',
                                   'B02001_002E' : 'population_white_alone',
                                   'B19013_001E' : 'med_hh_inc',
                                   'state' : 'state_fips',
                                   'county' : 'county_fips',
                                   'tract' : 'tract_fips'}))

        #the fips for a tract is a concatenation of state, county, and tract fips
        df['fips'] = df['state_fips'] + df['county_fips'] + df['tract_fips'] #make sure these are strings so it concatenates and doesn't add. 

        #changing the data to be numeric, since everything starts as string
        df[['population', 'population_white_alone', 'med_hh_inc']] = df[['population', 'population_white_alone', 'med_hh_inc']].apply(pd.to_numeric)

        print(f"Fetched {df.size} tracts for {county}")
        county_dfs.append(df)

    # combine
    return pd.concat(county_dfs)



In [7]:
# load or create the nj_tract_table statewide tract-level demographics

# If the file exists, load the DataFrame
if os.path.exists(nj_tract_table_picklefile_path ):

    with open(nj_tract_table_picklefile_path , 'rb') as file:
        nj_tract_table = pickle.load(file)
        print(f"Loaded {nj_tract_table.size} tracts.")

# Else fetch the data and pickle it
else:
    print(f"The file '{nj_tract_table_picklefile_path}' does not exist. Fetching data")
    nj_tract_table = make_nj_tract_table()
    with open(nj_tract_table_picklefile_path, 'wb') as file:
        pickle.dump(nj_tract_table, file)
    



The file 'data/nj_tract_table_pickle.pkl' does not exist. Fetching data
Fetched 592 tracts for county:001
Fetched 1624 tracts for county:003
Fetched 936 tracts for county:005
Fetched 1032 tracts for county:007
Fetched 264 tracts for county:009
Fetched 336 tracts for county:011
Fetched 1688 tracts for county:013
Fetched 552 tracts for county:015
Fetched 1464 tracts for county:017
Fetched 240 tracts for county:019
Fetched 672 tracts for county:021
Fetched 1536 tracts for county:023
Fetched 1240 tracts for county:025
Fetched 880 tracts for county:027
Fetched 1160 tracts for county:029
Fetched 960 tracts for county:031
Fetched 200 tracts for county:033
Fetched 592 tracts for county:035
Fetched 336 tracts for county:037
Fetched 960 tracts for county:039
Fetched 184 tracts for county:041
