# CMS Bilingual Needs and Bilingual Pay (BNBP)
CMS uses census data to examine the county-by-county distribution of Low English Proficency citizens. This script automates the process of obtaining that census data.

You can download any data that is available in American Fact Finder. Use their search tool to find the table, and note the name. Then, go to api.census.gov, and navigate to the available APIs. Choose the correct vintage of the survey, and click on the html version of your desired year, under the 'Detailed Tables' section. Once it loads (quite large!), you can Ctrl+F to find the table name. All of the data feeds that make that table will be grouped under that name. Check out the ACS Table IDs explained page to figure out where the variables get their names.

In [1]:
## Import
from census import Census
from us import states
import pandas as pd
import os

In [2]:
## Select Year
yr = 2016

In [3]:
## Set Up Census Key
c = Census("7626c35db5ebda42db3c3b7d0b59939b2489cbd7", year = yr)

# Language Spoken Data

In [9]:
# Gather total population age 5 and up
total_pop_5up = c.acs5.state_county(('NAME', 'B16004_001E'), states.IL.fips, "*")
total_pop_5up_pd = pd.DataFrame(total_pop_5up)
total_pop_5up_pd.rename(columns={'B16004_001E':'Total_5andUp'}, 
                 inplace=True)

In [17]:
# Calculate Number of Spanish Language Speakers 5 and up
spanish_all = c.acs5.state_county(('NAME', 'B16004_004E', 'B16004_026E', 'B16004_048E'), states.IL.fips, "*")
spanish_all_pd = pd.DataFrame(spanish_all)
spanish_all_pd.rename(columns={'B16004_004E':'Spanish_5_17', 'B16004_026E':'Spanish_18_64', 'B16004_048E':'Spanish_65' }, 
                 inplace=True)
spanish_all_pd['Spanish_Speakers'] = spanish_all_pd['Spanish_5_17'] + spanish_all_pd['Spanish_18_64'] + spanish_all_pd['Spanish_65']

In [19]:
# Calculate Number of English Only Speakers 5 and up
english_all = c.acs5.state_county(('NAME', 'B16004_003E', 'B16004_025E', 'B16004_047E'), states.IL.fips, "*")
english_all_pd = pd.DataFrame(english_all)
english_all_pd.rename(columns={'B16004_003E':'English_5_17', 'B16004_025E':'English_18_64', 'B16004_047E':'English_65' }, 
                 inplace=True)
english_all_pd['English_Speakers'] = english_all_pd['English_5_17'] + english_all_pd['English_18_64'] + english_all_pd['English_65']

In [23]:
# Calculate Number of Other Indo-European Language Speakers 5 and up
IndoEuro_all = c.acs5.state_county(('NAME', 'B16004_009E', 'B16004_031E', 'B16004_053E'), states.IL.fips, "*")
IndoEuro_all_pd = pd.DataFrame(IndoEuro_all)
IndoEuro_all_pd.rename(columns={'B16004_009E':'IndoEuro_5_17', 'B16004_031E':'IndoEuro_18_64', 'B16004_053E':'IndoEuro_65' }, 
                 inplace=True)
IndoEuro_all_pd['IndoEuro_Speakers'] = IndoEuro_all_pd['IndoEuro_5_17'] + IndoEuro_all_pd['IndoEuro_18_64'] + IndoEuro_all_pd['IndoEuro_65']

In [28]:
# Calculate Number of Other Asian and Pacific Island Language Speakers 5 and up
api_all = c.acs5.state_county(('NAME', 'B16004_014E', 'B16004_036E', 'B16004_058E'), states.IL.fips, "*")
api_all_pd = pd.DataFrame(api_all)
api_all_pd.rename(columns={'B16004_014E':'APIlang_5_17', 'B16004_036E':'APIlang_18_64', 'B16004_058E':'APIlang_65' }, 
                 inplace=True)
api_all_pd['APIlang_Speakers'] = api_all_pd['APIlang_5_17'] + api_all_pd['APIlang_18_64'] + api_all_pd['APIlang_65']

In [32]:
# Calculate Number of Other Speakers 5 and up
other_all = c.acs5.state_county(('NAME', 'B16004_019E', 'B16004_041E', 'B16004_063E'), states.IL.fips, "*")
other_all_pd = pd.DataFrame(other_all)
other_all_pd.rename(columns={'B16004_019E':'Other_5_17', 'B16004_041E':'Other_18_64', 'B16004_063E':'Other_65' }, 
                 inplace=True)
other_all_pd['Other_Speakers'] = other_all_pd['Other_5_17'] + other_all_pd['Other_18_64'] + other_all_pd['Other_65']

# LEP Data