In [12]:
import sys
sys.path.append('..')
from ilm_ini.ilm_ini import ini
import os
import ast

import requests
import pandas as pd
import numpy as np

# References Google's Geocoding API
# https://developers.google.com/maps/documentation/geocoding/start
# Uses f-strings, Python 3.6+
def geocode_address(address):
    api_key = ilm_ini.read('Google APIs','geocoding')
    address = [x.replace(" ", "+") for x in address.split(",")]
    url = f'https://maps.googleapis.com/maps/api/geocode/json?address={address[0]},{address[1]},{address[2]}&key={api_key}'
    response = requests.get(url)
    response = response.json()
    try:
        if response['status'] == 'REQUEST_DENIED':
            print(response['status'])
            return(response['error_message'])
        elif response['status'] == 'OK':
            result = response['results'][0]
            output = {'address' : result['formatted_address'], 'location' : result['geometry']['location']}
    except:
        print("Error in geocoding. URL attempted: ", url)
        return(url)
    return(output)

# Expects an 'address' dictionary output by geocode_address
def address_parts(address, location):
    address_parts = [x.strip() for x in address.split(",")]
    output = {'address' : address, 'location' : location, 'address_parts' : {'street' : address_parts[0], 'city' : address_parts[1], 'state' : address_parts[2][:2], 'zip_code' : address_parts[2][-5:], 'country' : address_parts[3]}}
    return(output)

# Expects an 'address' dictionary output by geocode_address, with further modification by address_parts
def address_census_keys(address, location, address_parts, benchmark = 'Public_AR_Current', vintage = 'ACS2019_Current'):
    url = 'https://geocoding.geo.census.gov/geocoder/geographies/coordinates'
    layers = ['2010 Census Blocks', 'Secondary School Districts', '2019 State Legislative Districts - Upper', 'County Subdivisions', 'Elementary School Districts', 'Metropolitan Statistical Areas', 'Counties', '2019 State Legislative Districts - Lower', 'Census Block Groups', 'Combined Statistical Areas', '2010 Census ZIP Code Tabulation Areas', 'Census Tracts']
    url_params = {'x' : location['lng'], 'y' : location['lat'], 'benchmark' : benchmark, 'vintage' : vintage, 'layers' : layers, 'format' : 'json'}
    response = requests.get(url, params = url_params)
    result = response.json()['result']
    geographies = {k1: {k2: v2 for k2, v2 in next(iter(v1 or []), dict()).items() if (k2 in ['GEOID', 'CENTLAT', 'BASENAME', 'NAME', 'CENTLON'])} for k1, v1 in result['geographies'].items()}
    output = {'address' : address, 'location' : location, 'address_parts' : address_parts, 'geographies' : geographies}
    return(output)

# Sample call - datasets_census(vintage = 2017)
def datasets_census(dataset = False, vintage = False):
    # URLs with list of datasets
    urls = ['https://api.census.gov/data/2010.json','https://api.census.gov/data.json']

    # Get list of datasets from both endpoints
    datasets = pd.DataFrame()
    for url in urls:
        response = requests.get(url) # GET
        response = response.json()
        df = pd.DataFrame(response['dataset'])
        if 'c_isTimeseries' not in df.columns:
            df.insert(loc = len(df.columns), column = 'c_isTimeseries', value = np.nan)
            datasets = pd.concat([datasets, df], sort = False, ignore_index = True)
        else:
            datasets = pd.concat([datasets, df], sort = False, ignore_index = True)

    # vintage parameter
    if vintage:
        datasets = datasets[datasets['c_vintage'] == vintage]  

    # dataset parameter        
    if dataset:
        dataset_tags = dict(zip(datasets['identifier'], datasets['c_dataset']))
        df_ids = list(dict(filter(lambda elem: len(list(filter(lambda k: dataset in k, elem[1])))>0, dataset_tags.items())).keys())
        datasets = datasets[datasets['identifier'].isin(df_ids)]

    return(datasets)

In [10]:
ilm_ini = ini()

Existing INI file found. Loading credentials...


In [11]:
orange = geocode_address('2074 East Orange Street, Tempe, AZ')
orange = address_parts(**orange)
orange = address_census_keys(**orange)
print(orange)

REQUEST_DENIED


TypeError: address_parts() argument after ** must be a mapping, not str

In [2]:
datasets = datasets_census(dataset = 'acs5', vintage = 2018)
datasets.head()

Unnamed: 0,c_vintage,c_dataset,c_geographyLink,c_variablesLink,c_tagsLink,c_examplesLink,c_groupsLink,c_valuesLink,c_documentationLink,c_isAggregate,...,license,modified,programCode,references,spatial,temporal,publisher,accessLevel,bureauCode,c_isTimeseries
408,2018.0,"[acs, acs5]",https://api.census.gov/data/2018/acs/acs5/geog...,https://api.census.gov/data/2018/acs/acs5/vari...,https://api.census.gov/data/2018/acs/acs5/tags...,https://api.census.gov/data/2018/acs/acs5/exam...,https://api.census.gov/data/2018/acs/acs5/grou...,https://api.census.gov/data/2018/acs/acs5/valu...,https://www.census.gov/developer/,True,...,https://creativecommons.org/publicdomain/zero/...,2019-10-22 16:28:02.0,[006:004],[https://www.census.gov/developers/],,unidentified,"{'@type': 'org:Organization', 'name': 'U.S. Ce...",,,
411,2018.0,"[acs, acs5, cprofile]",https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://api.census.gov/data/2018/acs/acs5/cpro...,https://www.census.gov/developer/,True,...,https://creativecommons.org/publicdomain/zero/...,2019-10-22 14:54:09.0,[006:004],[https://www.census.gov/developers/],,unidentified,"{'@type': 'org:Organization', 'name': 'U.S. Ce...",,,
413,2018.0,"[acs, acs5, profile]",https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://api.census.gov/data/2018/acs/acs5/prof...,https://www.census.gov/developer/,True,...,https://creativecommons.org/publicdomain/zero/...,2019-10-22 16:22:18.0,[006:004],[https://www.census.gov/developers/],,unidentified,"{'@type': 'org:Organization', 'name': 'U.S. Ce...",,,
414,2018.0,"[acs, acs5, subject]",https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://api.census.gov/data/2018/acs/acs5/subj...,https://www.census.gov/developer/,True,...,https://creativecommons.org/publicdomain/zero/...,2019-10-22 15:36:29.0,[006:004],[https://www.census.gov/developers/],,unidentified,"{'@type': 'org:Organization', 'name': 'U.S. Ce...",,,


In [3]:
print(list(datasets))

['c_vintage', 'c_dataset', 'c_geographyLink', 'c_variablesLink', 'c_tagsLink', 'c_examplesLink', 'c_groupsLink', 'c_valuesLink', 'c_documentationLink', 'c_isAggregate', 'c_isCube', 'c_isAvailable', '@type', 'title', 'description', 'distribution', 'contactPoint', 'identifier', 'keyword', 'license', 'modified', 'programCode', 'references', 'spatial', 'temporal', 'publisher', 'accessLevel', 'bureauCode', 'c_isTimeseries']


In [4]:
for elem in datasets['c_geographyLink']:
    print(elem)

https://api.census.gov/data/2018/acs/acs5/geography.json
https://api.census.gov/data/2018/acs/acs5/cprofile/geography.json
https://api.census.gov/data/2018/acs/acs5/profile/geography.json
https://api.census.gov/data/2018/acs/acs5/subject/geography.json


In [5]:
var_url = datasets.iloc[0,3]

def parse_dataset_varlist(url):
    response = requests.get(url)
    response = response.json()
    response = response['variables']
    df = pd.DataFrame.from_dict(response, orient = 'index')
    return(df)
variables = parse_dataset_varlist(var_url)
list(variables)

['label',
 'concept',
 'predicateType',
 'group',
 'limit',
 'predicateOnly',
 'attributes',
 'required']

In [6]:
geo_url = datasets.iloc[0,2]

def parse_dataset_geography(url):
    response = requests.get(url)
    response = response.json()
    response = response['fips']
    df = pd.DataFrame(response)
    return(df)

geography = parse_dataset_geography(geo_url)
geography[geography['name'] == 'tract']

Unnamed: 0,name,geoLevelDisplay,referenceDate,requires,wildcard,optionalWithWCFor
8,tract,140,2018-01-01,"[state, county]",[county],county


In [85]:
def api_call_url(base, dataset_vars, geography, key = ''):
    get = f"?get={','.join(dataset_vars)}"
    geo_for = f"&for={geography['geo_for']}"
    geo_in = f"&in={geography['geo_in']}"
    key = f"&key={key}"

    url = base + get + geo_for + geo_in + key
    return(url)

def _api_call(url):
    response = requests.get(url)
    response = response.text
    try:
        response = response.split("\n")
        response[0] = response[0][1:(len(response[0])-1)]
        response[1:] = [x[:-1] for x in response[1:]]
        response = [ast.literal_eval(x) for x in response]
        columns = response[0]
        response = pd.DataFrame.from_records(response[1:], columns = columns)
    except:
        pass
    return(response)

def api_call(base, dataset_vars, geography, key = ''):
    url = api_call_url(base, dataset_vars, geography, key)
    response = _api_call(url)
    return(response)

In [86]:
fips = [
    ['name_state', 'abbreviation_state', 'fips_state'],
    ['Alabama', 'AL', '01'],
    ['Alaska', 'AK', '02'],
    ['Arizona', 'AZ', '04'],
    ['Arkansas', 'AR', '05'],
    ['California', 'CA', '06'],
    ['Colorado', 'CO', '08'],
    ['Connecticut', 'CT', '09'],
    ['Delaware', 'DE', '10'],
    ['Florida', 'FL', '12'],
    ['Georgia', 'GA', '13'],
    ['Hawaii', 'HI', '15'],
    ['Idaho', 'ID', '16'],
    ['Illinois', 'IL', '17'],
    ['Indiana', 'IN', '18'],
    ['Iowa', 'IA', '19'],
    ['Kansas', 'KS', '20'],
    ['Kentucky', 'KY', '21'],
    ['Louisiana', 'LA', '22'],
    ['Maine', 'ME', '23'],
    ['Maryland', 'MD', '24'],
    ['Massachusetts', 'MA', '25'],
    ['Michigan', 'MI', '26'],
    ['Minnesota', 'MN', '27'],
    ['Mississippi', 'MS', '28'],
    ['Missouri', 'MO', '29'],
    ['Montana', 'MT', '30'],
    ['Nebraska', 'NE', '31'],
    ['Nevada', 'NV', '32'],
    ['New Hampshire', 'NH', '33'],
    ['New Jersey', 'NJ', '34'],
    ['New Mexico', 'NM', '35'],
    ['New York', 'NY', '36'],
    ['North Carolina', 'NC', '37'],
    ['North Dakota', 'ND', '38'],
    ['Ohio', 'OH', '39'],
    ['Oklahoma', 'OK', '40'],
    ['Oregon', 'OR', '41'],
    ['Pennsylvania', 'PA', '42'],
    ['Rhode Island', 'RI', '44'],
    ['South Carolina', 'SC', '45'],
    ['South Dakota', 'SD', '46'],
    ['Tennessee', 'TN', '47'],
    ['Texas', 'TX', '48'],
    ['Utah', 'UT', '49'],
    ['Vermont', 'VT', '50'],
    ['Virginia', 'VA', '51'],
    ['Washington', 'WA', '53'],
    ['West Virginia', 'WV', '54'],
    ['Wisconsin', 'WI', '55'],
    ['Wyoming', 'WY', '56'],
    ['American Samoa', 'AS', '60'],
    ['Guam', 'GU', '66'],
    ['Northern Mariana Islands', 'MP', '69'],
    ['Puerto Rico', 'PR', '72'],
    ['Virgin Islands', 'VI', '78']]
fips = pd.DataFrame.from_records(fips[1:],columns = fips[0])
fips.head()

Unnamed: 0,name_state,abbreviation_state,fips_state
0,Alabama,AL,1
1,Alaska,AK,2
2,Arizona,AZ,4
3,Arkansas,AR,5
4,California,CA,6


In [87]:
base = list(datasets['distribution'])[0][0]['accessURL']
demographic_variables = {
    "NAME" : 'Name',
    "B01003_001E" : 'Total Population',
    "B23025_001E" : 'Population aged 16 and over',
    "B23025_003E" : 'Population aged 16 and over in civilian labor force',
    "B23025_004E" : 'Population aged 16 and over in civilian labor force, employed',
    "B19013_001E" : 'Median Household Income',
    "B25077_001E" : 'Median Home Price',
    "B25064_001E" : 'Median Gross Rent',
    "B25058_001E" : 'Median Contract Rent'}
geography = {'geo_for':'tract:*', 'geo_in':'state:35'}
key = '7bff9e82cfa3f3e36a17f50f82663a2ab16e00e3'

print(api_call_url(base, demographic_variables.keys(), geography, key))
data = api_call(base, demographic_variables.keys(), geography, key)
data
#data.head()
#data.to_csv('/Users/hamzaamjad/Box/Data/Demographic/Census Bureau/Census Tract/35.csv')

https://api.census.gov/data/2018/acs/acs5?get=NAME,B01003_001E,B23025_001E,B23025_003E,B23025_004E,B19013_001E,B25077_001E,B25064_001E,B25058_001E&for=tract:*&in=state:35&key=7bff9e82cfa3f3e36a17f50f82663a2ab16e00e3


4067","3494","1975","1893","41964","172600","699","669","35","001","000129"]',
 '["Census Tract 4.01, Bernalillo County, New Mexico","4285","3542","2251","2183","52730","246600","876","733","35","001","000401"]',
 '["Census Tract 2.08, Bernalillo County, New Mexico","3013","2542","1696","1634","41536","128300","1100","889","35","001","000208"]',
 '["Census Tract 1.19, Bernalillo County, New Mexico","1887","1546","960","938","88438","270300","847","729","35","001","000119"]',
 '["Census Tract 1.20, Bernalillo County, New Mexico","2991","2267","1572","1507","50250","186700","989","841","35","001","000120"]',
 '["Census Tract 2.05, Bernalillo County, New Mexico","3355","2731","1954","1841","33878","126200","820","722","35","001","000205"]',
 '["Census Tract 6.04, Bernalillo County, New Mexico","3885","3096","1795","1658","26612","120400","636","579","35","001","000604"]',
 '["Census Tract 37.14, Bernalillo County, New Mexico","7500","6502","4594","4006","36742","195400","823","749","35","

In [88]:
_api_call('https://api.census.gov/data/2018/acs/acs5?get=NAME,B01003_001E,B23025_001E,B23025_003E,B23025_004E,B19013_001E,B25077_001E,B25064_001E,B25058_001E&for=tract:*&in=state:35&key=7bff9e82cfa3f3e36a17f50f82663a2ab16e00e3')

4067","3494","1975","1893","41964","172600","699","669","35","001","000129"]',
 '["Census Tract 4.01, Bernalillo County, New Mexico","4285","3542","2251","2183","52730","246600","876","733","35","001","000401"]',
 '["Census Tract 2.08, Bernalillo County, New Mexico","3013","2542","1696","1634","41536","128300","1100","889","35","001","000208"]',
 '["Census Tract 1.19, Bernalillo County, New Mexico","1887","1546","960","938","88438","270300","847","729","35","001","000119"]',
 '["Census Tract 1.20, Bernalillo County, New Mexico","2991","2267","1572","1507","50250","186700","989","841","35","001","000120"]',
 '["Census Tract 2.05, Bernalillo County, New Mexico","3355","2731","1954","1841","33878","126200","820","722","35","001","000205"]',
 '["Census Tract 6.04, Bernalillo County, New Mexico","3885","3096","1795","1658","26612","120400","636","579","35","001","000604"]',
 '["Census Tract 37.14, Bernalillo County, New Mexico","7500","6502","4594","4006","36742","195400","823","749","35","

In [80]:
geo_package = []
for fip in fips['fips_state']:
    geo_package.append({'geo_for':'tract:*', 'geo_in':'state:' + fip, 'fips_state' : fip})
geo_package

[{'geo_for': 'tract:*', 'geo_in': 'state:01', 'fips_state': '01'},
 {'geo_for': 'tract:*', 'geo_in': 'state:02', 'fips_state': '02'},
 {'geo_for': 'tract:*', 'geo_in': 'state:04', 'fips_state': '04'},
 {'geo_for': 'tract:*', 'geo_in': 'state:05', 'fips_state': '05'},
 {'geo_for': 'tract:*', 'geo_in': 'state:06', 'fips_state': '06'},
 {'geo_for': 'tract:*', 'geo_in': 'state:08', 'fips_state': '08'},
 {'geo_for': 'tract:*', 'geo_in': 'state:09', 'fips_state': '09'},
 {'geo_for': 'tract:*', 'geo_in': 'state:10', 'fips_state': '10'},
 {'geo_for': 'tract:*', 'geo_in': 'state:12', 'fips_state': '12'},
 {'geo_for': 'tract:*', 'geo_in': 'state:13', 'fips_state': '13'},
 {'geo_for': 'tract:*', 'geo_in': 'state:15', 'fips_state': '15'},
 {'geo_for': 'tract:*', 'geo_in': 'state:16', 'fips_state': '16'},
 {'geo_for': 'tract:*', 'geo_in': 'state:17', 'fips_state': '17'},
 {'geo_for': 'tract:*', 'geo_in': 'state:18', 'fips_state': '18'},
 {'geo_for': 'tract:*', 'geo_in': 'state:19', 'fips_state': '1

In [81]:
for geo in geo_package:
    data = api_call(base, demographic_variables.keys(), geo, key)
    try:
        data.to_csv('/Users/hamzaamjad/Box/Data/Demographic/Census Bureau/Census Tract/' + geo['fips_state'] + '.csv')
    except:
        print("Error with fips",geo['fips_state'])
        try:
            print(api_call_url(base, demographic_variables.keys(), geo, key))
        except:
            pass

Error with fips 35
https://api.census.gov/data/2018/acs/acs5?get=NAME,B01003_001E,B23025_001E,B23025_003E,B23025_004E,B19013_001E,B25077_001E,B25064_001E,B25058_001E&for=tract:*&in=state:35&key=7bff9e82cfa3f3e36a17f50f82663a2ab16e00e3
Error with fips 60
https://api.census.gov/data/2018/acs/acs5?get=NAME,B01003_001E,B23025_001E,B23025_003E,B23025_004E,B19013_001E,B25077_001E,B25064_001E,B25058_001E&for=tract:*&in=state:60&key=7bff9e82cfa3f3e36a17f50f82663a2ab16e00e3
Error with fips 66
https://api.census.gov/data/2018/acs/acs5?get=NAME,B01003_001E,B23025_001E,B23025_003E,B23025_004E,B19013_001E,B25077_001E,B25064_001E,B25058_001E&for=tract:*&in=state:66&key=7bff9e82cfa3f3e36a17f50f82663a2ab16e00e3
Error with fips 69
https://api.census.gov/data/2018/acs/acs5?get=NAME,B01003_001E,B23025_001E,B23025_003E,B23025_004E,B19013_001E,B25077_001E,B25064_001E,B25058_001E&for=tract:*&in=state:69&key=7bff9e82cfa3f3e36a17f50f82663a2ab16e00e3
Error with fips 78
https://api.census.gov/data/2018/acs/acs5

In [48]:
# https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch07.pdf

demographic_variables = [
    "B01003_001E", # Total Population
    "B23025_001E", # Population aged 16 and over
    "B23025_003E", # Population aged 16 and over in civilian labor force
    "B23025_004E", # Population aged 16 and over in civilian labor force, employed"
    "B19013_001E", # Median Household Income
    "B25077_001E", # Median Home Price
    "B25064_001E", # Median Gross Rent
    "B25058_001E"] # Median Contract Rent
variables.loc[demographic_variables,:]
test = [
variables.loc[test,:]

Unnamed: 0,label,concept,predicateType,group,limit,predicateOnly,attributes,required
B01003_001E,Estimate!!Total,TOTAL POPULATION,int,B01003,0,,"B01003_001EA,B01003_001M,B01003_001MA",
B23025_001E,Estimate!!Total,EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS ...,int,B23025,0,,"B23025_001M,B23025_001MA,B23025_001EA",
B23025_003E,Estimate!!Total!!In labor force!!Civilian labo...,EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS ...,int,B23025,0,,"B23025_003M,B23025_003MA,B23025_003EA",
B23025_004E,Estimate!!Total!!In labor force!!Civilian labo...,EMPLOYMENT STATUS FOR THE POPULATION 16 YEARS ...,int,B23025,0,,"B23025_004M,B23025_004MA,B23025_004EA",
B19013_001E,Estimate!!Median household income in the past ...,MEDIAN HOUSEHOLD INCOME IN THE PAST 12 MONTHS ...,int,B19013,0,,"B19013_001M,B19013_001MA,B19013_001EA",
B25077_001E,Estimate!!Median value (dollars),MEDIAN VALUE (DOLLARS),int,B25077,0,,"B25077_001M,B25077_001MA,B25077_001EA",
B25064_001E,Estimate!!Median gross rent,MEDIAN GROSS RENT (DOLLARS),int,B25064,0,,"B25064_001M,B25064_001MA,B25064_001EA",
B25058_001E,Estimate!!Median contract rent,MEDIAN CONTRACT RENT (DOLLARS),int,B25058,0,,"B25058_001M,B25058_001MA,B25058_001EA",


In [40]:
var_dict = dict(zip(list(variables.index),variables.label))

filters = ['Median household income', 'Estimate!!Total!!In the labor force!!']
var_dict_filtered = []
for filter_string in filters:
    var_dict_filtered.append({key:value for (key,value) in var_dict.items() if filter_string in value})
var_dict_filtered

[{'B19013E_001E': 'Estimate!!Median household income in the past 12 months (in 2018 inflation-adjusted dollars)',
  'B19013D_001E': 'Estimate!!Median household income in the past 12 months (in 2018 inflation-adjusted dollars)',
  'B19013I_001E': 'Estimate!!Median household income in the past 12 months (in 2018 inflation-adjusted dollars)',
  'B19013A_001E': 'Estimate!!Median household income in the past 12 months (in 2018 inflation-adjusted dollars)',
  'B19013F_001E': 'Estimate!!Median household income in the past 12 months (in 2018 inflation-adjusted dollars)',
  'B25119_002E': 'Estimate!!Median household income in the past 12 months (in 2018 inflation-adjusted dollars) --!!Owner occupied (dollars)',
  'B25119_001E': 'Estimate!!Median household income in the past 12 months (in 2018 inflation-adjusted dollars) --!!Total',
  'B25119_003E': 'Estimate!!Median household income in the past 12 months (in 2018 inflation-adjusted dollars) --!!Renter occupied (dollars)',
  'B19013G_001E': 'Est