In [9]:
import sys
sys.path.append('..')
from ilm_ini.ilm_ini import ini
import os

import requests
import pandas as pd
import numpy as np

# References Google's Geocoding API
# https://developers.google.com/maps/documentation/geocoding/start
# Uses f-strings, Python 3.6+
def geocode_address(address):
    api_key = ilm_ini.read('Google APIs','geocoding')
    address = [x.replace(" ", "+") for x in address.split(",")]
    url = f'https://maps.googleapis.com/maps/api/geocode/json?address={address[0]},{address[1]},{address[2]}&key={api_key}'
    response = requests.get(url)
    response = response.json()
    try:
        if response['status'] == 'REQUEST_DENIED':
            print(response['status'])
            return(response['error_message'])
        elif response['status'] == 'OK':
            result = response['results'][0]
            output = {'address' : result['formatted_address'], 'location' : result['geometry']['location']}
    except:
        print("Error in geocoding. URL attempted: ", url)
        return(url)
    return(output)

# Expects an 'address' dictionary output by geocode_address
def address_parts(address, location):
    address_parts = [x.strip() for x in address.split(",")]
    output = {'address' : address, 'location' : location, 'address_parts' : {'street' : address_parts[0], 'city' : address_parts[1], 'state' : address_parts[2][:2], 'zip_code' : address_parts[2][-5:], 'country' : address_parts[3]}}
    return(output)

# Expects an 'address' dictionary output by geocode_address, with further modification by address_parts
def address_census_keys(address, location, address_parts, benchmark = 'Public_AR_Current', vintage = 'ACS2019_Current'):
    url = 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates'
    layers = ['2010 Census Blocks', 'Secondary School Districts', '2019 State Legislative Districts - Upper', 'County Subdivisions', 'Elementary School Districts', 'Metropolitan Statistical Areas', 'Counties', '2019 State Legislative Districts - Lower', 'Census Block Groups', 'Combined Statistical Areas', '2010 Census ZIP Code Tabulation Areas', 'Census Tracts']
    url_params = {'x' : location['lng'], 'y' : location['lat'], 'benchmark' : benchmark, 'vintage' : vintage, 'layers' : layers, 'format' : 'json'}
    response = requests.get(url, params = url_params)
    result = response.json()['result']
    geographies = {k1: {k2: v2 for k2, v2 in next(iter(v1 or []), dict()).items() if (k2 in ['GEOID', 'CENTLAT', 'BASENAME', 'NAME', 'CENTLON'])} for k1, v1 in result['geographies'].items()}
    output = {'address' : address, 'location' : location, 'address_parts' : address_parts, 'geographies' : geographies}
    return(output)

# Sample call - datasets_census(vintage = 2017)
def datasets_census(dataset = False, vintage = False):
    # URLs with list of datasets
    urls = ['https://api.census.gov/data/2010.json','https://api.census.gov/data.json']

    # Get list of datasets from both endpoints
    datasets = pd.DataFrame()
    for url in urls:
        response = requests.get(url) # GET
        response = response.json()
        df = pd.DataFrame(response['dataset'])
        if 'c_isTimeseries' not in df.columns:
            df.insert(loc = len(df.columns), column = 'c_isTimeseries', value = np.nan)
            datasets = pd.concat([datasets, df], sort = False, ignore_index = True)
        else:
            datasets = pd.concat([datasets, df], sort = False, ignore_index = True)

    # vintage parameter
    if vintage:
        datasets = datasets[datasets['c_vintage'] == vintage]  

    # dataset parameter        
    if dataset:
        dataset_tags = dict(zip(datasets['identifier'], datasets['c_dataset']))
        df_ids = list(dict(filter(lambda elem: len(list(filter(lambda k: dataset in k, elem[1])))>0, dataset_tags.items())).keys())
        datasets = datasets[datasets['identifier'].isin(df_ids)]

    return(datasets)

In [10]:
ilm_ini = ini()

Existing INI file found. Loading credentials...


In [11]:
orange = geocode_address('2074 East Orange Street, Tempe, AZ')
orange = address_parts(**orange)
orange = address_census_keys(**orange)
print(orange)

REQUEST_DENIED


TypeError: address_parts() argument after ** must be a mapping, not str

In [3]:
datasets = datasets_census(dataset = 'acs5', vintage = 2018)
datasets.head()

Unnamed: 0,c_vintage,c_dataset,c_geographyLink,c_variablesLink,c_tagsLink,c_examplesLink,c_groupsLink,c_valuesLink,c_documentationLink,c_isAggregate,...,license,modified,programCode,references,spatial,temporal,publisher,accessLevel,bureauCode,c_isTimeseries
407,2018.0,"[acs, acs5]",https://api.census.gov/data/2018/acs/acs5/geog...,https://api.census.gov/data/2018/acs/acs5/vari...,https://api.census.gov/data/2018/acs/acs5/tags...,https://api.census.gov/data/2018/acs/acs5/exam...,https://api.census.gov/data/2018/acs/acs5/grou...,https://api.census.gov/data/2018/acs/acs5/valu...,https://www.census.gov/developer/,True,...,https://creativecommons.org/publicdomain/zero/...,2019-10-22 16:28:02.0,[006:004],[https://www.census.gov/developers/],,unidentified,"{'@type': 'org:Organization', 'name': 'U.S. Ce...",,,
410,2018.0,"[acs, acs5, cprofile]",https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://www.census.gov/developer/,True,...,https://creativecommons.org/publicdomain/zero/...,2019-10-22 14:54:09.0,[006:004],[https://www.census.gov/developers/],,unidentified,"{'@type': 'org:Organization', 'name': 'U.S. Ce...",,,
412,2018.0,"[acs, acs5, profile]",https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://www.census.gov/developer/,True,...,https://creativecommons.org/publicdomain/zero/...,2019-10-22 16:22:18.0,[006:004],[https://www.census.gov/developers/],,unidentified,"{'@type': 'org:Organization', 'name': 'U.S. Ce...",,,
413,2018.0,"[acs, acs5, subject]",https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://www.census.gov/developer/,True,...,https://creativecommons.org/publicdomain/zero/...,2019-10-22 15:36:29.0,[006:004],[https://www.census.gov/developers/],,unidentified,"{'@type': 'org:Organization', 'name': 'U.S. Ce...",,,


In [4]:
print(list(datasets))

['c_vintage', 'c_dataset', 'c_geographyLink', 'c_variablesLink', 'c_tagsLink', 'c_examplesLink', 'c_groupsLink', 'c_valuesLink', 'c_documentationLink', 'c_isAggregate', 'c_isCube', 'c_isAvailable', '@type', 'title', 'description', 'distribution', 'contactPoint', 'identifier', 'keyword', 'license', 'modified', 'programCode', 'references', 'spatial', 'temporal', 'publisher', 'accessLevel', 'bureauCode', 'c_isTimeseries']


In [5]:
for elem in datasets['c_geographyLink']:
    print(elem)

https://api.census.gov/data/2018/acs/acs5/geography.json
https://api.census.gov/data/2018/acs/acs5/cprofile/geography.json
https://api.census.gov/data/2018/acs/acs5/profile/geography.json
https://api.census.gov/data/2018/acs/acs5/subject/geography.json


In [6]:
var_url = datasets.iloc[0,3]

def parse_dataset_varlist(url):
    response = requests.get(url)
    response = response.json()
    response = response['variables']
    df = pd.DataFrame.from_dict(response, orient = 'index')
    return(df)
variables = parse_dataset_varlist(var_url)
list(variables)

['label',
 'concept',
 'predicateType',
 'group',
 'limit',
 'predicateOnly',
 'attributes',
 'required']

In [7]:
geo_url = datasets.iloc[0,2]

def parse_dataset_geography(url):
    response = requests.get(url)
    response = response.json()
    response = response['fips']
    df = pd.DataFrame(response)
    return(df)

geography = parse_dataset_geography(geo_url)
geography[geography['name'] == 'tract']

Unnamed: 0,name,geoLevelDisplay,referenceDate,requires,wildcard,optionalWithWCFor
8,tract,140,2018-01-01,"[state, county]",[county],county


In [12]:
def api_call_url(base, dataset_vars, geography, key = ''):
    get = f"?get={','.join(dataset_vars)}"
    geo_for = f"&for={geography['geo_for']}"
    geo_in = f"&in={geography['geo_in']}"
    key = f"&key={key}"

    url = base + get + geo_for + geo_in + key
    return(url)

def _api_call(url):
    response = requests.get(url)
    response = response.text
    #response = response.split("\n")
    return(response)

def api_call(base, dataset_vars, geography, key = ''):
    url = api_call_url(base, dataset_vars, geography, key)
    response = _api_call(url)
    return(response)

In [14]:
base = list(datasets['distribution'])[0][0]['accessURL']
var_list = ['NAME','B19013E_001E']
geography = {'geo_for':'tract:*', 'geo_in':'state:01'}
key = ''

api_call_url(base, var_list, geography, key)
data = api_call(base, var_list, geography, key)
for elem in data:
    print(elem)

sus Tract 417, Houston County, Alabama","-666666666","01","069","041700"],
["Census Tract 404, Houston County, Alabama","-666666666","01","069","040400"],
["Census Tract 402.01, Houston County, Alabama","-666666666","01","069","040201"],
["Census Tract 9611, DeKalb County, Alabama","-666666666","01","049","961100"],
["Census Tract 9604, DeKalb County, Alabama","-666666666","01","049","960400"],
["Census Tract 9605, DeKalb County, Alabama","-666666666","01","049","960500"],
["Census Tract 9608, DeKalb County, Alabama","-666666666","01","049","960800"],
["Census Tract 9612, DeKalb County, Alabama","-666666666","01","049","961200"],
["Census Tract 9614, DeKalb County, Alabama","-666666666","01","049","961400"],
["Census Tract 9610, DeKalb County, Alabama","-666666666","01","049","961000"],
["Census Tract 9603, DeKalb County, Alabama","-666666666","01","049","960300"],
["Census Tract 9609, DeKalb County, Alabama","-666666666","01","049","960900"],
["Census Tract 9613, DeKalb County, Alabam