# Census Bureau ACS Table Crosswalk

Looks for correlated tables inside both Census Bureau ACS 5yr and ACS Supplemental Estimate 1yr datasets that can be compared against one another.

Note: requires ACS and ACSSE databases to be stored locally in `data/databases` directory before running.

In [1]:
import pandas as pd
import os
import sqlalchemy

## User Variables

In [2]:
acs_survey_year = '2022'
acsse_survey_year = '2022'

## Extract

In [3]:
# imports Census survey data
acs_db_filepath = os.path.join(f'data/databases/census_bureau/census_acs5_{acs_survey_year}_block_group.db')
acs_sql_engine = sqlalchemy.create_engine('sqlite:///' + acs_db_filepath)
acs_tables = pd.read_sql_table('tables', acs_sql_engine)
acs_tables.head()

Unnamed: 0,index,name,concept
0,4,B24022,sex_by_occupation_and_median_earnings_in_the_p...
1,5,B19001B,household_income_in_the_past_12_months_(in_202...
2,6,B07007PR,geographical_mobility_in_the_past_year_by_citi...
3,7,B19101A,family_income_in_the_past_12_months_(in_2022_i...
4,14,B01001B,sex_by_age_(black_or_african_american_alone)


In [4]:
acsse_db_filepath = os.path.join(f'data/databases/census_bureau/census_acsse_{acsse_survey_year}_congressional_district.db')
# creates connection to SQLite database
acsse_sql_engine = sqlalchemy.create_engine('sqlite:///' + acsse_db_filepath)
acsse_tables = pd.read_sql_table('tables', acsse_sql_engine)
acsse_tables.head()

Unnamed: 0,index,name,concept
0,4,K202101,veteran_status_for_the_civilian_population_18_...
1,5,K200201,race
2,6,K202505,year_structure_built
3,11,K201802,work_experience_by_disability_status
4,12,K200104,population_by_age


## Transform

In [5]:
matching_acs_index_list = []
matching_acsse_index_list = []
for acsse_index, acsse_concept in acsse_tables['concept'].items():
    for acs_index, acs_concept in acs_tables['concept'].items():    
        if acsse_concept == acs_concept:
            matching_acs_index_list.append(acs_index)
            matching_acsse_index_list.append(acsse_index)

In [6]:
matching_acs_tables = acs_tables.iloc[matching_acs_index_list]
matching_acs_tables['acs_5yr_table_name'] = matching_acs_tables['name']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matching_acs_tables['acs_5yr_table_name'] = matching_acs_tables['name']


In [7]:
matching_acsse_tables = acsse_tables.iloc[matching_acsse_index_list]
matching_acsse_tables['acs_1yr_table_name'] = matching_acsse_tables['name']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matching_acsse_tables['acs_1yr_table_name'] = matching_acsse_tables['name']


In [8]:
# merges our two tables
matching_tables = matching_acs_tables.merge(acsse_tables, how='left', on='concept')
matching_tables

Unnamed: 0,index_x,name_x,concept,acs_5yr_table_name,index_y,name_y
0,27074,B02001,race,B02001,5,K200201
1,3898,B25034,year_structure_built,B25034,6,K202505
2,10187,B14001,school_enrollment_by_level_of_school_for_the_p...,B14001,14,K201401
3,22226,B15003,educational_attainment_for_the_population_25_y...,B15003,49,K201501
4,26686,B26001,group_quarters_population,B26001,50,K202601
5,19081,B09001,population_under_18_years_by_age,B09001,67,K200102
6,16351,B19202,median_nonfamily_household_income_in_the_past_...,B19202,79,K201905
7,3185,B20017,median_earnings_in_the_past_12_months_(in_2022...,B20017,81,K202002
8,11939,B25008,total_population_in_occupied_housing_units_by_...,B25008,83,K202503
9,24809,B08303,travel_time_to_work,B08303,154,K200802


In [9]:
# removes `index` and old 'name' columns
matching_tables.drop(columns=['index_x', 'index_y', 'name_x'], inplace=True)
matching_tables.rename(columns={'name_y': 'acs_1yr_table_name'}, inplace=True)
matching_tables

Unnamed: 0,concept,acs_5yr_table_name,acs_1yr_table_name
0,race,B02001,K200201
1,year_structure_built,B25034,K202505
2,school_enrollment_by_level_of_school_for_the_p...,B14001,K201401
3,educational_attainment_for_the_population_25_y...,B15003,K201501
4,group_quarters_population,B26001,K202601
5,population_under_18_years_by_age,B09001,K200102
6,median_nonfamily_household_income_in_the_past_...,B19202,K201905
7,median_earnings_in_the_past_12_months_(in_2022...,B20017,K202002
8,total_population_in_occupied_housing_units_by_...,B25008,K202503
9,travel_time_to_work,B08303,K200802


## Load

In [10]:
databases_dirpath = os.path.join('data/databases/census_bureau')
crosswalk_db_filepath = os.path.join(databases_dirpath, f'census_acs-acsse_crosswalk.db')
crosswalk_sql_engine = sqlalchemy.create_engine('sqlite:///' + crosswalk_db_filepath)
matching_tables.to_sql(f'acs_{acs_survey_year}-acsse_{acsse_survey_year}_crosswalk', crosswalk_sql_engine, if_exists='replace')

30