# [ACS api 2018](https://api.census.gov/data/2018/acs/acs5.html)

In [1]:
import os
import pandas as pd
import requests
import json
from bs4 import BeautifulSoup
import re

# 1. ACS Groups

In [2]:
groups_df = pd.read_html('https://api.census.gov/data/2018/acs/acs5/groups.html')

In [3]:
groups_df = groups_df[0].iloc[:, [0,1]]
groups_df.head(3)

Unnamed: 0,Name,Description
0,B00001,UNWEIGHTED SAMPLE COUNT OF THE POPULATION
1,B00002,UNWEIGHTED SAMPLE HOUSING UNITS
2,B01001,SEX BY AGE


## 1.1 Search groups

In [4]:
mask = groups_df.Description.str.contains('Poverty', flags=re.IGNORECASE, regex=True)
groups_df.loc[mask].head(3)

Unnamed: 0,Name,Description
64,B05010,RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 1...
104,B06012,PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 1...
105,B06012PR,PLACE OF BIRTH BY POVERTY STATUS IN THE PAST 1...


# 2. Create download class
- Copy in your API in the YOURAPIHERE code


In [5]:
class api_fetch:   
    def __init__(self, variable_name, database):
        self.base = 'https://api.census.gov/data/2018/acs/'
        self.api_base = self.base+database
        self.api_key = 'key=YOURAPIHERE'
        self.variable_name = "?&get="+variable_name
        
        self.api_final = f'{self.api_base}{self.variable_name}{self.api_key}'
        
    def request(self):
        return requests.get(self.api_final)
    
    def download(self):
        r = requests.get(self.api_final)
        data = r.json()
        df = pd.DataFrame(data)
        df = df.rename(columns=df.iloc[0]).drop(df.index[0])
        df = df.astype(float)
        return df

# 3. Demographic downloads

## 3.1 [Download Population stats](https://api.census.gov/data/2018/acs/acs5/examples.html)

In [6]:
population_code = 'B01003_001E'
county = '&for=county:*&'

In [7]:
population = api_fetch(population_code+county, database='acs5')
population.api_final

'https://api.census.gov/data/2018/acs/acs5?&get=B01003_001E&for=county:*&key=b15b5130e290307b3306f78acfb401828bb24e84'

In [8]:
df = population.download()
df.head()

Unnamed: 0,B01003_001E,state,county
1,47086.0,28.0,151.0
2,12028.0,28.0,111.0
3,8321.0,28.0,19.0
4,23480.0,28.0,57.0
5,10129.0,28.0,15.0


### 3.1.1 Download multiple demographics

In [9]:
population_code = 'B01003_001E,B02001_002E,B02001_003E,B02001_005E'
county = '&for=county:*&'

In [10]:
demographics = api_fetch(population_code+county, database='acs5')
demographics.api_final

'https://api.census.gov/data/2018/acs/acs5?&get=B01003_001E,B02001_002E,B02001_003E,B02001_005E&for=county:*&key=b15b5130e290307b3306f78acfb401828bb24e84'

In [11]:
demographics_df = demographics.download()

In [12]:
demographics_df.columns = ['Total_pop', 'White', 'Black', 'Asian','state','county']
demographics_df

Unnamed: 0,Total_pop,White,Black,Asian,state,county
1,47086.0,12097.0,33982.0,282.0,28.0,151.0
2,12028.0,9475.0,2368.0,17.0,28.0,111.0
3,8321.0,5631.0,2596.0,15.0,28.0,19.0
4,23480.0,21346.0,1681.0,57.0,28.0,57.0
5,10129.0,6523.0,3503.0,0.0,28.0,15.0
...,...,...,...,...,...,...
3216,17672.0,17215.0,181.0,79.0,19.0,43.0
3217,20260.0,15684.0,551.0,2155.0,19.0,21.0
3218,10674.0,10372.0,3.0,27.0,19.0,77.0
3219,9566.0,9315.0,25.0,40.0,19.0,91.0


## 3.2 [Poverty](https://api.census.gov/data/2018/acs/acs5/cprofile/variables/CP03_2014_2018_128E.json)

In [13]:
poverty_code = 'CP03_2014_2018_128E'

In [14]:
poverty_request = api_fetch(poverty_code+county, 'acs5/cprofile')

In [15]:
poverty_df = poverty_request.download()
poverty_df.columns = ['poverty_pct', 'state', 'county']

poverty_df.head(3)

Unnamed: 0,poverty_pct,state,county
1,33.3,28.0,151.0
2,15.4,28.0,57.0
3,14.8,28.0,15.0


## 3.3 [Income]

### 3.4.1 Incorrect measure but good example of erasing outlier
(https://api.census.gov/data/2018/acs/acs5/cprofile/variables/CP03_2014_2018_059E.json)

2014-2018 Estimate!!INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS)!!Total households!!$100,000 to $149,999

In [16]:
income_code = 'CP03_2014_2018_059E'

In [17]:
income_request = api_fetch(income_code+county, 'acs5/cprofile')

In [18]:
income_df = income_request.download()

#### 3.4.1.1 Erase outlier
- You can't have a negative for percentage of population

In [19]:
income_df.loc[income_df.CP03_2014_2018_059E<0]

Unnamed: 0,CP03_2014_2018_059E,state,county
247,-888888888.0,35.0,39.0


In [20]:
income_df.loc[income_df.CP03_2014_2018_059E<0, ['CP03_2014_2018_059E']]=0

In [21]:
income_df.describe()

Unnamed: 0,CP03_2014_2018_059E,state,county
count,2906.0,2906.0,2906.0
mean,11.935857,31.15967,102.335169
std,4.307429,16.510675,107.067224
min,0.0,1.0,1.0
25%,9.1,18.0,35.0
50%,11.7,29.0,79.0
75%,14.5,46.75,133.0
max,31.7,72.0,840.0


## 3.4.2 Mean
2014-2018 Estimate!!INCOME AND BENEFITS (IN 2018 INFLATION-ADJUSTED DOLLARS)!!Total households!!Mean household income (dollars)	

In [22]:
income_code = 'CP03_2014_2018_063E'
income_request = api_fetch(income_code+county, 'acs5/cprofile')
income_df = income_request.download()
income_df.columns = ['income_mean', 'state', 'county']

income_df.head(3)

Unnamed: 0,income_mean,state,county
1,49433.0,28.0,151.0
2,53717.0,28.0,57.0
3,57919.0,28.0,15.0


## 3.4 [Gender](https://api.census.gov/data/2018/acs/acs5/groups/B01002.html)
Estimate!!Total!!Female	

In [23]:
female_code = 'B01001_026E'
female_code  = api_fetch(female_code+county, 'acs5')

female_df = female_code.download()
female_df.columns = ['female_count', 'state', 'county']
female_df.head()

Unnamed: 0,female_count,state,county
1,25068.0,28.0,151.0
2,6183.0,28.0,111.0
3,4147.0,28.0,19.0
4,11659.0,28.0,57.0
5,5013.0,28.0,15.0


## 3.5 [College degree](https://api.census.gov/data/2018/acs/acs5/cprofile/variables.html)
2014-2018 Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Bachelor's degree	

In [24]:
college_code = 'CP02_2014_2018_064E'

In [25]:
college_code  = api_fetch(college_code+county, 'acs5/cprofile')

college_df = college_code.download()
college_df.columns = ['college', 'state', 'county']
college_df.head()

Unnamed: 0,college,state,county
1,11.5,28.0,151.0
2,8.5,28.0,57.0
3,9.5,28.0,15.0
4,12.4,28.0,43.0
5,5.4,28.0,63.0


## 3.6 [Age](https://api.census.gov/data/2018/acs/acs5/groups/B01002.html)

In [26]:
age_code = 'B01002_001E'
age_code  = api_fetch(age_code+county, 'acs5')

age_df = age_code.download()
age_df.columns = ['age_median', 'state', 'county']
age_df.head()

Unnamed: 0,age_median,state,county
1,36.9,28.0,151.0
2,40.9,28.0,111.0
3,44.0,28.0,19.0
4,40.0,28.0,57.0
5,47.0,28.0,15.0


# 4. Merge data and export to csv

In [27]:
merged = pd.concat([income_df, poverty_df,college_df, female_df, demographics_df, age_df], axis=1)
merged.head(3)

Unnamed: 0,income_mean,state,county,poverty_pct,state.1,county.1,college,state.2,county.2,female_count,...,county.3,Total_pop,White,Black,Asian,state.3,county.4,age_median,state.4,county.5
1,49433.0,28.0,151.0,33.3,28.0,151.0,11.5,28.0,151.0,25068.0,...,151.0,47086.0,12097.0,33982.0,282.0,28.0,151.0,36.9,28.0,151.0
2,53717.0,28.0,57.0,15.4,28.0,57.0,8.5,28.0,57.0,6183.0,...,111.0,12028.0,9475.0,2368.0,17.0,28.0,111.0,40.9,28.0,111.0
3,57919.0,28.0,15.0,14.8,28.0,15.0,9.5,28.0,15.0,4147.0,...,19.0,8321.0,5631.0,2596.0,15.0,28.0,19.0,44.0,28.0,19.0


In [28]:
merged = income_df
dfs = [poverty_df,college_df, female_df, demographics_df, age_df]
for df in dfs:
    merged = pd.merge(merged, df, on=['state', 'county'], how='outer')

In [35]:
merged.set_index(['state','county'], inplace=True)
merged

Unnamed: 0_level_0,Unnamed: 1_level_0,income_mean,poverty_pct,college,female_count,Total_pop,White,Black,Asian,age_median
state,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
28.0,151.0,49433.0,33.3,11.5,25068.0,47086.0,12097.0,33982.0,282.0,36.9
28.0,57.0,53717.0,15.4,8.5,11659.0,23480.0,21346.0,1681.0,57.0,40.0
28.0,15.0,57919.0,14.8,9.5,5013.0,10129.0,6523.0,3503.0,0.0,47.0
28.0,43.0,54163.0,24.5,12.4,11175.0,21278.0,11736.0,9238.0,23.0,39.8
28.0,63.0,34501.0,49.7,5.4,3704.0,7346.0,979.0,6310.0,33.0,39.3
...,...,...,...,...,...,...,...,...,...,...
13.0,61.0,,,,1528.0,3001.0,902.0,2036.0,41.0,45.1
16.0,71.0,,,,2243.0,4326.0,4206.0,4.0,0.0,37.9
16.0,37.0,,,,1976.0,4141.0,3834.0,4.0,0.0,53.3
17.0,151.0,,,,1900.0,4249.0,3910.0,223.0,27.0,53.7


In [37]:
merged.to_csv('Control_data.csv')