# Demographics by Targeted Census Block Group

This notebook looks the demographics of one or more targeted block groups. This can be used to explore one or more ACS datasets across one or more geographies. 

## User Input

In [17]:
# input required ACS tables into list below
requested_tables = ['B02001'] # race
"""
example UCGID
    150 00 00 US 48 029 181902 2
    
    150 = Summary level for census tract
    00 = No geographic variant
    00 = Not a geographic component
    US = United States
    48 = Texas
    083 = Bexar County
    181902 = Census Tract 1819.02
    2 = Block Group 2
"""
requested_geographies = ['1500000US480291819023']

In [18]:
import pandas as pd
import os
import sqlalchemy
import requests
import warnings

## ETL
This section extracts, transforms, and loads data for our analysis. Note: this requires local database and shapefiles to be available. 

### Database ETL
This code loads ACS data from local datasources.

In [19]:
# imports Census survey data
demographics_db_filepath = os.path.join('data/databases/census_acs5_2022_block_group.db')

# creates connection to SQLite database
sql_engine = sqlalchemy.create_engine('sqlite:///' + demographics_db_filepath)

### Table and Table Column Crosswalks
These crosswalks iare used to map names to individual columns within ACS tables.

In [20]:
# imports crosswalk
surveys = ['acsse', 'acs5']
# creates dictionary to contain data for ACS 1yr and ACS 5yr data
acs_data_dict = {}

for survey in surveys:
    crosswalk_df = pd.DataFrame()
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36'}
    crosswalk_url = f'https://api.census.gov/data/2022/acs/{survey}/variables/'
    
    crosswalk_response = requests.get(crosswalk_url, headers=headers)
    if crosswalk_response.status_code == 200:
        crosswalk_df = pd.DataFrame(crosswalk_response.json())
    
    acs_data_dict[f'{survey}_crosswalk_df'] = crosswalk_df
    
    crosswalk_df = acs_data_dict[f'{survey}_crosswalk_df']
    crosswalk_df.columns = crosswalk_df.iloc[0]
    crosswalk_df = crosswalk_df[1:]
    
    # removes rows not used to for naming columns locally
    crosswalk_df = crosswalk_df[crosswalk_df['name'].str.startswith('K') | crosswalk_df['name'].str.startswith('B')]
    
    if survey == 'acs5':
        idx = crosswalk_df.index[crosswalk_df['name'] == 'BLKGRP']
        crosswalk_df.drop(idx, inplace=True)
        
    acs_data_dict[f'{survey}_crosswalk_df'] = crosswalk_df

In [21]:
acs_data_dict['acs5_crosswalk_df']

Unnamed: 0,name,label,concept
4,B24022_060E,Estimate!!Total:!!Female:!!Service occupations...,Sex by Occupation and Median Earnings in the P...
5,B19001B_014E,"Estimate!!Total:!!$100,000 to $124,999",Household Income in the Past 12 Months (in 202...
6,B07007PR_019E,Estimate!!Total:!!Moved from different municip...,Geographical Mobility in the Past Year by Citi...
7,B19101A_004E,"Estimate!!Total:!!$15,000 to $19,999",Family Income in the Past 12 Months (in 2022 I...
8,B24022_061E,Estimate!!Total:!!Female:!!Service occupations...,Sex by Occupation and Median Earnings in the P...
...,...,...,...
28189,B25124_022E,Estimate!!Total:!!Owner occupied:!!3-person ho...,Tenure by Household Size by Units in Structure
28190,B20005I_071E,"Estimate!!Total:!!Female:!!Worked full-time, y...",Sex by Work Experience in the Past 12 Months b...
28191,B08113_054E,Estimate!!Total:!!Worked from home:!!Speak oth...,Means of Transportation to Work by Language Sp...
28192,B06009_006E,Estimate!!Total:!!Graduate or professional degree,Place of Birth by Educational Attainment in th...


In [22]:
# transforms crosswalk_df by truncating `name` column to its table 'group' name (and deleting anything that's not a table name) and normalizing text in `concept` field to lowercase/no spaces format
for survey in surveys:
    tables_df = acs_data_dict[f'{survey}_crosswalk_df'].copy()
    tables_df['name'] = acs_data_dict[f'{survey}_crosswalk_df']['name'].str.split('_').str[0]
    tables_df = tables_df.drop_duplicates(subset='name')
    tables_df = tables_df.drop(columns='label')
    tables_df['concept'] = tables_df['concept'].str.replace(' ', '_').str.lower()
    
    acs_data_dict[f'{survey}_tables_df'] = tables_df    

In [23]:
acs_data_dict['acs5_tables_df'].head()

Unnamed: 0,name,concept
4,B24022,sex_by_occupation_and_median_earnings_in_the_p...
5,B19001B,household_income_in_the_past_12_months_(in_202...
6,B07007PR,geographical_mobility_in_the_past_year_by_citi...
7,B19101A,family_income_in_the_past_12_months_(in_2022_i...
14,B01001B,sex_by_age_(black_or_african_american_alone)


### ACS Datasets
This loads the tables selected below from the options presented in the table of tables above.

In [24]:
if len(requested_tables) == 1:
    df = pd.read_sql_table(requested_tables[0], con=sql_engine)    

In [25]:
df = df[df['ucgid'].str.startswith(requested_geographies[0])]

In [26]:
df

Unnamed: 0,index,B02001_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B02001_008E,B02001_009E,B02001_010E,NAME,ucgid
816,816,4558,2354,77,48,363,0,45,1671,1301,370,Block Group 3; Census Tract 1819.02; Bexar Cou...,1500000US480291819023


In [27]:
# uses column name crosswalk to transform column names to human-readable format
for survey in surveys:
    for series_name in df.columns:
        with warnings.catch_warnings():
            if acs_data_dict[f'{survey}_crosswalk_df']['name'].str.contains(series_name).any():
                # if the crosswalk contains the name of the DataFrame column (i.e., for any data column as opposed to names, descriptors, etc. ) replace table name based on key to one based on descriptor, then strip spaces, punctuation, etc. and replace with underscores for easier data manipulation and normalization, then convert from Series object to int dtype
                new_label = str(acs_data_dict[f'{survey}_crosswalk_df'][acs_data_dict[f'{survey}_crosswalk_df']['name'].str.startswith(series_name)][['concept', 'label']].values)
                new_label = new_label.replace('[', '').replace(']', '').replace('\' \'', '__').replace(' ', '_').replace('\'', '').replace('!!', '_').replace(':', '').replace('\n', '').lower()
                try:
                    if series_name != 'ucgid':
                        df.rename(columns={series_name: series_name + ':' + new_label}, inplace=True, errors='raise')
                        df.rename(columns={series_name: new_label}, inplace=True, errors='raise')
                        df = df.astype({series_name: 'int'})
                except (TypeError, ValueError, KeyError):
                    pass
            elif series_name == 'NAME':
                df.rename(columns={series_name: str(series_name).lower()}, inplace=True, errors='raise')

In [28]:
df

Unnamed: 0,index,B02001_001E:race__estimate_total,B02001_002E:race__estimate_total_white_alone,B02001_003E:race__estimate_total_black_or_african_american_alone,B02001_004E:race__estimate_total_american_indian_and_alaska_native_alone,B02001_005E:race__estimate_total_asian_alone,B02001_006E:race__estimate_total_native_hawaiian_and_other_pacific_islander_alone,B02001_007E:race__estimate_total_some_other_race_alone,B02001_008E:race__estimate_total_two_or_more_races,B02001_009E:race__estimate_total_two_or_more_races_two_races_including_some_other_race,"B02001_010E:race__estimate_total_two_or_more_races_two_races_excluding_some_other_race,_and_three_or_more_races",name,ucgid
816,816,4558,2354,77,48,363,0,45,1671,1301,370,Block Group 3; Census Tract 1819.02; Bexar Cou...,1500000US480291819023


In [29]:
df = df.T

In [30]:
df.columns = df.loc['name']
df.drop(['index', 'name', 'ucgid'], inplace=True)

## Exploratory Data Analysis

In [31]:
df

name,Block Group 3; Census Tract 1819.02; Bexar County; Texas
B02001_001E:race__estimate_total,4558
B02001_002E:race__estimate_total_white_alone,2354
B02001_003E:race__estimate_total_black_or_african_american_alone,77
B02001_004E:race__estimate_total_american_indian_and_alaska_native_alone,48
B02001_005E:race__estimate_total_asian_alone,363
B02001_006E:race__estimate_total_native_hawaiian_and_other_pacific_islander_alone,0
B02001_007E:race__estimate_total_some_other_race_alone,45
B02001_008E:race__estimate_total_two_or_more_races,1671
B02001_009E:race__estimate_total_two_or_more_races_two_races_including_some_other_race,1301
"B02001_010E:race__estimate_total_two_or_more_races_two_races_excluding_some_other_race,_and_three_or_more_races",370
