## acs_etl.acs



Pull data using the Census API and perform basic data cleaning.

### Source Code

In [None]:
# default_exp acs

In [None]:
#export
import os

os.chdir('../')

In [None]:
#export
from dataclasses import dataclass
from nbdev.showdoc import *
from us import states
import numpy as np
import pandas as pd
import requests
import json
import census


In [None]:
#export
@dataclass
class ACS_Blockgroup_Data_Pull:
    
    raw_data: pd.DataFrame = None
    cleaned_data: pd.DataFrame = None
    var_dict: dict = None
    key: str = 'c4753d52a740be73d2c0b6d950b75e5dd0f1c8f1'
    
    def __post_init__(self, year=2019, state='PA', variables=('NAME', 'B01003_001E', 'B01002_001E', 'B19301_001E', 'B25071_001E', 'B25010_001E')):
        
        '''Get raw data from Census API and execute basic data cleaning.
        '''
        # --------- Get Raw Data --------------
        
        c = census.Census(key=self.key, year=year)
        state_fips = getattr(states, state).fips
        
        var_tuple = variables
        geo = {'for': 'block group:{}'.format('*'),
               'in' : 'state:{} county:{} tract:{}'.format(state_fips, '*', '*')}
        
        response = c.acs5.get(var_tuple, geo)
        self.raw_data = pd.DataFrame(response)
        
        # --------- Basic Data Cleaning -------------
        
        # definitions_url = "https://api.census.gov/data/2019/acs/acs5/variables.json" 
        # self.var_dict = json.loads(requests.get(definitions_url).content)['variables']
        
        var_dict = {'B01003_001E': 'total_population', 
                    'B01002_001E': 'median_age',
                    'B19301_001E': 'per_capita_income', 
                    'B25071_001E': 'median_rent_to_household_income',
                    'B25010_001E': 'avg_household_size'}
        
        self.cleaned_data = self.raw_data.copy()
        
        self.cleaned_data.rename(columns=var_dict, inplace=True)
        self.cleaned_data.rename(columns={'state': 'state_id', 'county': 'county_id'}, inplace=True)
        
        self.cleaned_data['state'] = state
        self.cleaned_data['county'] = [x.split(',')[2].strip() for x in self.raw_data['NAME']]
        
        self.cleaned_data = self.cleaned_data[['state_id', 'state', 'county_id', 'county', 'block group', 'tract', 
                                               'total_population', 'median_age', 'per_capita_income', 'median_rent_to_household_income',
                                               'avg_household_size', 'NAME']]
        

### Test Examples

In [None]:
a = ACS_Blockgroup_Data_Pull()
a.cleaned_data.head()

Unnamed: 0,state_id,state,county_id,county,block group,tract,total_population,median_age,per_capita_income,median_rent_to_household_income,avg_household_size,NAME
0,42,PA,129,Westmoreland County,3,801300,1779.0,39.7,28437.0,25.4,2.43,"Block Group 3, Census Tract 8013, Westmoreland..."
1,42,PA,129,Westmoreland County,3,801900,2162.0,47.2,28352.0,-666666666.0,2.71,"Block Group 3, Census Tract 8019, Westmoreland..."
2,42,PA,129,Westmoreland County,2,801900,958.0,50.1,47710.0,-666666666.0,2.6,"Block Group 2, Census Tract 8019, Westmoreland..."
3,42,PA,129,Westmoreland County,1,801900,1387.0,49.7,20723.0,51.0,2.24,"Block Group 1, Census Tract 8019, Westmoreland..."
4,42,PA,129,Westmoreland County,4,801900,2001.0,56.8,35746.0,51.0,1.81,"Block Group 4, Census Tract 8019, Westmoreland..."
