# CMS Bilingual Needs and Bilingual Pay (BNBP)
CMS uses census data to examine the county-by-county distribution of Low English Proficency citizens. This script automates the process of obtaining that census data.

You can download any data that is available in American Fact Finder. Use their search tool to find the table, and note the name. Then, go to api.census.gov, and navigate to the available APIs. Choose the correct vintage of the survey, and click on the html version of your desired year, under the 'Detailed Tables' section. Once it loads (quite large!), you can Ctrl+F to find the table name. All of the data feeds that make that table will be grouped under that name. Check out the ACS Table IDs explained page to figure out where the variables get their names.

In [1]:
## Import
from census import Census
from us import states
import pandas as pd
import os

In [2]:
## Select Year
yr = 2016

In [3]:
## Set Up Census Key
c = Census("7626c35db5ebda42db3c3b7d0b59939b2489cbd7", year = yr)

# Total Population 5 and Up

In [4]:
# Gather total population age 5 and up
total_pop_5up = c.acs5.state_county(('NAME', 'B16004_001E'), states.IL.fips, "*")
total_pop_5up_pd = pd.DataFrame(total_pop_5up)
total_pop_5up_pd.rename(columns={'B16004_001E':'Total_5andUp'}, 
                 inplace=True)

# Data on Language Spoken

In [27]:
# Calculate Number of Spanish Language Speakers 5 and up
spanish_all = c.acs5.state_county(('NAME', 'B16004_004E', 'B16004_026E', 'B16004_048E'), states.IL.fips, "*")
spanish_all_pd = pd.DataFrame(spanish_all)
spanish_all_pd.rename(columns={'B16004_004E':'Spanish_5_17', 'B16004_026E':'Spanish_18_64', 'B16004_048E':'Spanish_65' }, 
                 inplace=True)
spanish_all_pd['Spanish_Speakers'] = spanish_all_pd['Spanish_5_17'] + spanish_all_pd['Spanish_18_64'] + spanish_all_pd['Spanish_65']

In [6]:
# Calculate Number of English Only Speakers 5 and up
english_all = c.acs5.state_county(('NAME', 'B16004_003E', 'B16004_025E', 'B16004_047E'), states.IL.fips, "*")
english_all_pd = pd.DataFrame(english_all)
english_all_pd.rename(columns={'B16004_003E':'English_5_17', 'B16004_025E':'English_18_64', 'B16004_047E':'English_65' }, 
                 inplace=True)
english_all_pd['English_Speakers'] = english_all_pd['English_5_17'] + english_all_pd['English_18_64'] + english_all_pd['English_65']

In [7]:
# Calculate Number of Other Indo-European Language Speakers 5 and up
IndoEuro_all = c.acs5.state_county(('NAME', 'B16004_009E', 'B16004_031E', 'B16004_053E'), states.IL.fips, "*")
IndoEuro_all_pd = pd.DataFrame(IndoEuro_all)
IndoEuro_all_pd.rename(columns={'B16004_009E':'IndoEuro_5_17', 'B16004_031E':'IndoEuro_18_64', 'B16004_053E':'IndoEuro_65' }, 
                 inplace=True)
IndoEuro_all_pd['IndoEuro_Speakers'] = IndoEuro_all_pd['IndoEuro_5_17'] + IndoEuro_all_pd['IndoEuro_18_64'] + IndoEuro_all_pd['IndoEuro_65']

In [8]:
# Calculate Number of Other Asian and Pacific Island Language Speakers 5 and up
api_all = c.acs5.state_county(('NAME', 'B16004_014E', 'B16004_036E', 'B16004_058E'), states.IL.fips, "*")
api_all_pd = pd.DataFrame(api_all)
api_all_pd.rename(columns={'B16004_014E':'APIlang_5_17', 'B16004_036E':'APIlang_18_64', 'B16004_058E':'APIlang_65' }, 
                 inplace=True)
api_all_pd['APIlang_Speakers'] = api_all_pd['APIlang_5_17'] + api_all_pd['APIlang_18_64'] + api_all_pd['APIlang_65']

In [9]:
# Calculate Number of Other Speakers 5 and up
other_all = c.acs5.state_county(('NAME', 'B16004_019E', 'B16004_041E', 'B16004_063E'), states.IL.fips, "*")
other_all_pd = pd.DataFrame(other_all)
other_all_pd.rename(columns={'B16004_019E':'Other_5_17', 'B16004_041E':'Other_18_64', 'B16004_063E':'Other_65' }, 
                 inplace=True)
other_all_pd['Other_Speakers'] = other_all_pd['Other_5_17'] + other_all_pd['Other_18_64'] + other_all_pd['Other_65']

# LEP Data

In [12]:
# Calculate Number of Spanish LEP Speakers 5 and up
spanish_LEP = c.acs5.state_county(('NAME', 'B16004_007E', 'B16004_008E', 'B16004_029E', 'B16004_030E', 'B16004_051E', 'B16004_052E'), states.IL.fips, "*")
spanish_LEP_pd = pd.DataFrame(spanish_LEP)
spanish_LEP_pd.rename(columns={'B16004_007E':'Spanish_5_17_nw', 'B16004_008E':'Spanish_18_64_nw', 'B16004_029E':'Spanish_65_nw', 
                 'B16004_030E':'Spanish_5_17_naa', 'B16004_051E':'Spanish_18_64_naa', 'B16004_052E':'Spanish_65_naa'}, inplace=True)
spanish_LEP_pd['Spanish_LEP'] = spanish_LEP_pd['Spanish_5_17_nw'] + spanish_LEP_pd['Spanish_18_64_nw'] + spanish_LEP_pd['Spanish_65_nw'] + spanish_LEP_pd['Spanish_5_17_naa'] + spanish_LEP_pd['Spanish_18_64_naa'] + spanish_LEP_pd['Spanish_65_naa']

In [14]:
# Calculate Number of Indo-European LEP Speakers 5 and up
IndoEuro_LEP = c.acs5.state_county(('NAME', 'B16004_012E', 'B16004_013E', 'B16004_034E', 'B16004_035E', 'B16004_056E', 'B16004_057E'), states.IL.fips, "*")
IndoEuro_LEP_pd = pd.DataFrame(IndoEuro_LEP)
IndoEuro_LEP_pd.rename(columns={'B16004_012E':'IndoEuro_5_17_nw', 'B16004_013E':'IndoEuro_18_64_nw', 'B16004_034E':'IndoEuro_65_nw', 
                 'B16004_035E':'IndoEuro_5_17_naa', 'B16004_056E':'IndoEuro_18_64_naa', 'B16004_057E':'IndoEuro_65_naa'}, inplace=True)
IndoEuro_LEP_pd['IndoEuro_LEP'] = IndoEuro_LEP_pd['IndoEuro_5_17_nw'] + IndoEuro_LEP_pd['IndoEuro_18_64_nw'] + IndoEuro_LEP_pd['IndoEuro_65_nw'] + IndoEuro_LEP_pd['IndoEuro_5_17_naa'] + IndoEuro_LEP_pd['IndoEuro_18_64_naa'] + IndoEuro_LEP_pd['IndoEuro_65_naa']

In [18]:
# Calculate Number of Asian and Pacific Islander LEP Speakers 5 and up
api_LEP = c.acs5.state_county(('NAME', 'B16004_017E', 'B16004_018E', 'B16004_039E', 'B16004_040E', 'B16004_061E', 'B16004_062E'), states.IL.fips, "*")
api_LEP_pd = pd.DataFrame(api_LEP)
api_LEP_pd.rename(columns={'B16004_017E':'API_5_17_nw', 'B16004_018E':'API_18_64_nw', 'B16004_039E':'API_65_nw', 
                 'B16004_040E':'API_5_17_naa', 'B16004_061E':'API_18_64_naa', 'B16004_062E':'API_65_naa'}, inplace=True)
api_LEP_pd['API_LEP'] = api_LEP_pd['API_5_17_nw'] + api_LEP_pd['API_18_64_nw'] + api_LEP_pd['API_65_nw'] + api_LEP_pd['API_5_17_naa'] + api_LEP_pd['API_18_64_naa'] + api_LEP_pd['API_65_naa']

In [22]:
# Calculate Number of Other LEP Speakers 5 and up
other_LEP = c.acs5.state_county(('NAME', 'B16004_022E', 'B16004_023E', 'B16004_044E', 'B16004_045E', 'B16004_066E', 'B16004_067E'), states.IL.fips, "*")
other_LEP_pd = pd.DataFrame(other_LEP)
other_LEP_pd.rename(columns={'B16004_022E':'Other_5_17_nw', 'B16004_023E':'Other_18_64_nw', 'B16004_044E':'Other_65_nw', 
                 'B16004_045E':'Other_5_17_naa', 'B16004_066E':'Other_18_64_naa', 'B16004_067E':'Other_65_naa'}, inplace=True)
other_LEP_pd['Other_LEP'] = other_LEP_pd['Other_5_17_nw'] + other_LEP_pd['Other_18_64_nw'] + other_LEP_pd['Other_65_nw'] + other_LEP_pd['Other_5_17_naa'] + other_LEP_pd['Other_18_64_naa'] + other_LEP_pd['Other_65_naa']

# Join Data

In [28]:
# Drop duplicative columns
spanish_all_pd = spanish_all_pd.drop(['NAME', 'county', 'state'], axis=1)
english_all_pd = english_all_pd.drop(['NAME', 'county', 'state'], axis=1)
IndoEuro_all_pd = IndoEuro_all_pd.drop(['NAME', 'county', 'state'], axis=1)
api_all_pd = api_all_pd.drop(['NAME', 'county', 'state'], axis=1)
other_all_pd = other_all_pd.drop(['NAME', 'county', 'state'], axis=1)
spanish_LEP_pd = spanish_LEP_pd.drop(['NAME', 'county', 'state'], axis=1)
IndoEuro_LEP_pd = IndoEuro_LEP_pd.drop(['NAME', 'county', 'state'], axis=1)
api_LEP_pd = api_LEP_pd.drop(['NAME', 'county', 'state'], axis=1)
other_LEP_pd = other_LEP_pd.drop(['NAME', 'county', 'state'], axis=1)

In [29]:
# Join Data
language_full = total_pop_5up_pd.join([spanish_all_pd, english_all_pd, IndoEuro_all_pd, api_all_pd, other_all_pd, spanish_LEP_pd, IndoEuro_LEP_pd, api_LEP_pd, other_LEP_pd])

In [30]:
language_full

Unnamed: 0,Total_5andUp,NAME,county,state,Spanish_5_17,Spanish_18_64,Spanish_65,Spanish_Speakers,English_5_17,English_18_64,...,API_18_64_naa,API_65_naa,API_LEP,Other_5_17_nw,Other_18_64_nw,Other_65_nw,Other_5_17_naa,Other_18_64_naa,Other_65_naa,Other_LEP
0,62879.0,"Adams County, Illinois",001,17,56.0,209.0,33.0,298.0,10883.0,38294.0,...,14.0,0.0,137.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0
1,6551.0,"Alexander County, Illinois",003,17,13.0,19.0,0.0,32.0,1146.0,4000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,16284.0,"Bond County, Illinois",005,17,28.0,330.0,63.0,421.0,2504.0,10308.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0
3,50525.0,"Boone County, Illinois",007,17,2246.0,5440.0,213.0,7899.0,8445.0,25616.0,...,0.0,0.0,76.0,0.0,0.0,0.0,0.0,5.0,0.0,5.0
4,6555.0,"Brown County, Illinois",009,17,0.0,247.0,7.0,254.0,764.0,4583.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,32059.0,"Bureau County, Illinois",011,17,473.0,1165.0,131.0,1769.0,5073.0,18028.0,...,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,4706.0,"Calhoun County, Illinois",013,17,14.0,12.0,3.0,29.0,774.0,2765.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,14033.0,"Carroll County, Illinois",015,17,22.0,248.0,22.0,292.0,2090.0,8093.0,...,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,12236.0,"Cass County, Illinois",017,17,448.0,1406.0,29.0,1883.0,1817.0,6143.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,194936.0,"Champaign County, Illinois",019,17,1475.0,6861.0,266.0,8602.0,24476.0,117283.0,...,160.0,111.0,1694.0,0.0,0.0,61.0,0.0,10.0,0.0,71.0


# Export Data

In [31]:
cwd = os.getcwd()
language_full.to_csv(os.path.join(cwd,'Language_LEP_' + str(yr) + '.csv'))