## Update the ACS data dictionaries

In [1]:
import json
from urllib.request import urlopen, Request
import pandas as pd
from tqdm import tqdm

In [2]:
# Retrieve ACS disctionary files from the new URLS (provided by Emily)
request_14 = Request('https://api.census.gov/data/2014/acs/acs5/subject/variables.json')
request_19 = Request('https://api.census.gov/data/2019/acs/acs5/subject/variables.json')
response_14 = urlopen(request_14)
response_19 = urlopen(request_19)
elevations_14 = response_14.read()
elevations_19 = response_19.read()
acs_14_raw = json.loads(elevations_14)
acs_19_raw = json.loads(elevations_19)

In [4]:
# Reorganize results of json (returned in the prev step) in a pandas df
# ---Explore a vectorized alternative for the following function----

def reorganize_acs_dic(dic):
    dic_1 = pd.json_normalize(dic['variables'], max_level = 0)
    dic_cols = ['variable_code', 'label', 'concept', 'predicateType', 'group', 'limit', 'attributes', 'predicateOnly']
    dic_2 = pd.DataFrame(columns = dic_cols)
    l = dic_1.shape[1]
    i = 0
    for c in dic_1.columns:
        i+=1
        dic_3 = pd.json_normalize(dic_1.loc[0, c])
        dic_3['variable_code'] = c
        dic_2 = dic_2.append(dic_3, ignore_index = True)
        print('Processing {}%'.format((i*100)/l), end = '\r')
    return dic_2

In [5]:
acs_14_processed = reorganize_acs_dic(acs_14_raw)

Processing 100.0%21297678839%%%%

In [6]:
acs_19_processed = reorganize_acs_dic(acs_19_raw)

Processing 100.0%68396151188%%%%

In [7]:
acs_14_processed.head(5)

Unnamed: 0,variable_code,label,concept,predicateType,group,limit,attributes,predicateOnly,required,values.item.R1,...,values.item.H1,values.item.H2,values.item.J0,values.item.L0,values.item.M0,values.item.M1,values.item.M3,values.item.M5,values.item.M6,values.item.P0
0,for,Census API FIPS 'for' clause,Census API Geography Specification,fips-for,,0,,True,,,...,,,,,,,,,,
1,in,Census API FIPS 'in' clause,Census API Geography Specification,fips-in,,0,,True,,,...,,,,,,,,,,
2,ucgid,Uniform Census Geography Identifier clause,Census API Geography Specification,ucgid,,0,,True,,,...,,,,,,,,,,
3,S0804_C04_068E,Public transportation (excluding taxicab)!!Est...,MEANS OF TRANSPORTATION TO WORK BY SELECTED CH...,float,S0804,0,"S0804_C04_068EA,S0804_C04_068M,S0804_C04_068MA",,,,...,,,,,,,,,,
4,S0503_C02_078E,Foreign born; Born in Europe!!Estimate!!INDUST...,SELECTED CHARACTERISTICS OF THE FOREIGN-BORN P...,float,S0503,0,"S0503_C02_078EA,S0503_C02_078M,S0503_C02_078MA",,,,...,,,,,,,,,,


In [8]:
acs_14_processed_1.loc[~acs_14_processed_1['values.item.R1'].isna()]

NameError: name 'acs_14_processed_1' is not defined

In [None]:
# Data cleaning : 2014 ACS Dictionary
acs_14_processed_1 = (
    acs_14_processed
    .loc[3:, :]                                          # Drop first 3 rows
    .loc[acs_14_processed['variable_code'] != 'GEOCOMP'] # Drop GEOCOMP variable code
    .dropna(axis = 1, how = 'all')                       # Drop columns with all null entries 
    .drop('limit', axis = 1, errors = 'ignore')          # Limit column only contains 0s
    .reset_index(drop = True)
)

acs_14_processed_1['label'] = acs_14_processed_1['label'].str.upper() # Change labels to upper case

acs_14_processed_1.head(5)

In [None]:
acs_19_processed.head(5)

In [None]:
# Data cleaning : 2019 ACS Dictionary
acs_19_processed_1 = (
    acs_19_processed
    .loc[3:, :]                                          # Drop first 3 rows
    .loc[acs_19_processed['variable_code'] != 'GEOCOMP'] # Drop GEOCOMP variable code
    .dropna(axis = 1, how = 'all')                       # Drop columns with all null entries 
    .drop('limit', axis = 1, errors = 'ignore')          # Limit column only contains 0s
    .reset_index(drop = True)
)

acs_19_processed_1['label'] = acs_19_processed_1['label'].str.upper() # Change labels to upper case

acs_19_processed_1.head(5)

In [None]:
acs_14_processed_1.describe()

In [None]:
acs_19_processed_1.describe()

In [None]:
acs_14_processed_1.to_csv('data/acs_dictionary_2014_updated.csv')
acs_19_processed_1.to_csv('data/acs_dictionary_2019_updated.csv')