# ACS Data

This code aggregates 2023 ACS census tract-level demographic and socioeconomic data, calculating factors such as population density, citizenship status, age distribution, car access, and poverty levels using the `pygris` package, with the data returned in the NAD 1983 coordinate reference system (EPSG 4269).

- [`pygris` info](https://walker-data.com/pygris/)
- [ACS variables info](https://api.census.gov/data/2023/acs/acs5/variables.html)


In [None]:
#Installing python package to download US Census Bureau ACS data 
pip install pygris

In [None]:
import pygris
from pygris.data import get_census
from pygris import tracts 

import pandas as pd 
import geopandas as gpd

pd.set_option('display.max_columns', None)


In [None]:
# Selecting required variables for the analysis 
census_data = get_census(dataset = "2023/acs/acs5",
                            variables = ["B01003_001E", "B09005_001E", "B16008_037E", "B01001B_001E",  "B01001I_001E", "B17001_002E", "B06012_001E", "B08014_002E", "B08201_002E", "B01001_003E", "B01001_004E",
                                        "B01001_005E", "B01001_006E", "B01001_007E", "B01001_008E", "B01001_009E", "B01001_010E", "B01001_020E", "B01001_021E", "B01001_022E", 
                                        "B01001_023E", "B01001_024E", "B01001_025E", "B01001_027E", "B01001_028E", "B01001_029E",  "B01001_030E", "B01001_031E", "B01001_032E",
                                        "B01001_033E", "B01001_034E", "B01001_044E", "B01001_045E", "B01001_046E", "B01001_047E", "B01001_048E", "B01001_049E", "B06010_004E", "B06010_005E",
                                        "B06010_006E", "B06010_007E", "B06010_008E", "B06010_009E", "B06010_010E" ],
                            params = {
                                "for": "tract:*",
                                "in": "state:06",
                            }, 
                            return_geoid = True, 
                            guess_dtypes = True
                        )


In [None]:
#Renaming variables 
census_data = census_data.rename(columns={
    'B01003_001E': 'total_pop', 'B09005_001E': 'households', 'B16008_037E': 'not_us_citizen_pop', 'B01001B_001E': 'black_pop', 'B01001I_001E': 'hispanic_pop', 'B17001_002E': 'poverty',
    'B06012_001E': 'pop_determined_poverty_status','B08014_002E': 'workers_with_no_car', 'B08201_002E': 'households_with_no_cars', 'B01001_003E': 'male_under_5', 
    'B01001_004E': 'male_5_to_9', 'B01001_005E': 'male_10_to_14', 'B01001_006E': 'male_15_to_17', 'B01001_007E': 'male_18_to_19', 'B01001_008E': 'male_20', 'B01001_009E': 'male_21', 
    'B01001_010E': 'male_22_to_24', 'B01001_020E': 'male_65_to_66', 'B01001_021E': 'male_67_to_69', 'B01001_022E': 'male_70_to_74', 'B01001_023E': 'male_75_to_79', 
    'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over', 'B01001_027E': 'female_under_5', 'B01001_028E': 'female_5_to_9', 'B01001_029E': 'female_10_to_14', 
    'B01001_030E': 'female_15_to_17', 'B01001_031E': 'female_18_to_19', 'B01001_032E': 'female_20', 'B01001_033E': 'female_21', 'B01001_034E': 'female_22_to_24', 
    'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', 'B01001_047E': 'female_75_to_79', 'B01001_048E': 'female_80_to_84', 
    'B01001_049E': 'female_85_and_over', 'B06010_004E': 'income_less_10000', 'B06010_005E': 'income_10000_14999', 'B06010_006E': 'income_15000_24999', 'B06010_007E': 'income_25000_34999', 
    'B06010_008E': 'income_35000_49999', 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999'

})

In [None]:
census_data.head(5)

In [None]:
#Creating custom income variables 
census_data['inc_extremelylow'] = census_data['income_less_10000'] + census_data['income_10000_14999'] + census_data['income_15000_24999']
census_data['inc_verylow'] = census_data['income_25000_34999'] + census_data['income_35000_49999']
census_data['inc_low'] = census_data['income_50000_64999'] + census_data['income_65000_74999']

In [None]:
# Aggregating age variables 
census_data['male_youth'] = census_data.loc[:,"male_under_5":"male_22_to_24"].sum(axis=1)
census_data['female_youth'] = census_data.loc[:,"female_under_5":"female_22_to_24"].sum(axis=1)

census_data['male_seniors'] = census_data.loc[:, "male_65_to_66":"male_85_and_over"].sum(axis=1)
census_data['female_seniors'] = census_data.loc[:, "female_65_to_66":"female_85_and_over"].sum(axis=1)

In [None]:
# Creating youth and senior population categories 
census_data['youth_pop'] = census_data['male_youth'] + census_data['female_youth']
census_data['seniors_pop'] = census_data['male_seniors'] + census_data['female_seniors']

In [None]:
census_data.columns

In [None]:
#Retrieving Tract Geometries for California
ca_tracts = tracts(state = "CA", cb = True,
                    year = 2023, cache = True)

In [None]:
ca_tracts.info()

In [None]:
# Merging the census tract geometries with the census data based on the GEOID
tracts_ca_acs = ca_tracts.merge(census_data, how="inner", on="GEOID")

In [None]:
#Selecting Relevant Columns:
tracts_ca_acs = tracts_ca_acs[['ALAND', 'geometry', 'GEOID', 'total_pop', 'households', 'not_us_citizen_pop', 'black_pop', 'hispanic_pop', 'inc_extremelylow', 'inc_verylow',
                               'inc_low', 'pop_determined_poverty_status', 'workers_with_no_car', 'households_with_no_cars', 'male_youth', 'female_youth', 'male_seniors',
                               'female_seniors', 'youth_pop', 'seniors_pop']]

In [None]:
tracts_ca_acs.info()