    # Data for Cleveland Healthline ZIP Codes

    This notebook retrieves data for the following ZIP Codes near the Cleveland Healthline BRT: `44112, 44104, 44103, 44106, 44114`.

    Data Sources from the [American Community Survey](https://data.census.gov/advanced), 2013-2020 (same range as *National Transit Database Tables*)
    - Income: `Table S1901`
    - Population and Median Age: `Table S1901`
    - Household Type (e.g. married couple): `Table S2501`
    - Car Ownership: `Table S0802`

    Data for the Number of Business Establishments were found in the ECNSVY Business Patterns Survey (table accessible in the same link above).

In [33]:
import requests
import json
import pandas as pd

Defining helper functions - TODO: will need to use these in other directories so eventually move this out

In [34]:
def fetch_api_data(url: str) -> dict:
    """
    Fetches data from an API endpoint and returns the response in JSON format.

    Parameters:
    url (str): The URL of the API endpoint to fetch data from.

    Returns:
    dict: A dictionary containing the JSON response from the API endpoint.
    """
    # make the API request
    response = requests.get(url)
    
    return response.json()

# A more comprehensive function - haven't called yet - might actually be too complex
def fetch_census_data(years, zipcodes, url, df):
    for year in years:
        for zipcode in zipcodes:
            res = requests.get(url)
            res = res.json()

            df = pd.DataFrame.from_dict(res)
            income = df['response']['data'][1][161]

            df.loc[year][zipcode] = income


In [35]:
# Defining constants

years = list(map(str, [x for x in range(2013, 2021)]))
print(years)

zipcodes = ['44112', '44104', '44103', '44106', '44114']

['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']


## Income

In [6]:
# create the DataFrame using the two arrays as column names
df_income = pd.DataFrame(index=years, columns=zipcodes)

# print the resulting DataFrame
print(df_income)

     44112 44104 44103 44106 44114
2013   NaN   NaN   NaN   NaN   NaN
2014   NaN   NaN   NaN   NaN   NaN
2015   NaN   NaN   NaN   NaN   NaN
2016   NaN   NaN   NaN   NaN   NaN
2017   NaN   NaN   NaN   NaN   NaN
2018   NaN   NaN   NaN   NaN   NaN
2019   NaN   NaN   NaN   NaN   NaN
2020   NaN   NaN   NaN   NaN   NaN


In [31]:
for year in years:
    for zipcode in zipcodes:
        url = 'https://data.census.gov/api/access/data/table?g=860XX00US{zipcode}&id=ACSST5Y{year}.S1901'.format(zipcode = zipcode, year = year)
        res = fetch_api_data(url)

        df = pd.DataFrame.from_dict(res)
        income = df['response']['data'][1][161]

        df_income.loc[year][zipcode] = income


In [32]:
df_income # takes ~15s to create

# df_income.to_csv('../data/raw/cle/income.csv', index=True)


Unnamed: 0,44112,44104,44103,44106,44114
2013,22220,14603,18411,22602,22180
2014,22548,15204,17616,24065,23013
2015,21608,14009,17166,23222,21324
2016,21028,14414,18531,24578,20823
2017,22147,14646,17867,25892,22304
2018,22586,15813,19540,26310,25757
2019,22412,16999,19730,29225,36182
2020,26236,16650,21468,30669,43159


## Population and Median Age

In [12]:
# create the DataFrame using the two arrays as column names
df_pop = pd.DataFrame(index=years, columns=zipcodes)
df_age = pd.DataFrame(index=years, columns=zipcodes)

In [13]:
for year in years:
    for zipcode in zipcodes:
        url = 'https://data.census.gov/api/access/data/table?g=860XX00US{zipcode}&id=ACSST5Y{year}.S0101'.format(zipcode = zipcode, year = year)
        res = fetch_api_data(url)

        df = pd.DataFrame.from_dict(res)

        if year in ['2013', '2014', '2015', '2016']:
            index = 270
        else: 
            index = 277

        pop = df['response']['data'][1][index]

        df_pop.loc[year][zipcode] = pop


In [15]:
df_pop

Unnamed: 0,44112,44104,44103,44106,44114
2013,22593,23307,17990,26373,5130
2014,22193,22479,17291,26837,5714
2015,22465,22165,16456,25805,6474
2016,22283,21351,16813,25557,6447
2017,21788,20541,16486,25836,6714
2018,21909,19640,16519,25752,6567
2019,21666,19814,15938,25309,6896
2020,21251,19295,15711,25658,6671


In [14]:
# 37.2s

df_pop.to_csv('../data/raw/cle/pop.csv', index=True)

In [16]:
for year in years:
    for zipcode in zipcodes:
        url = 'https://data.census.gov/api/access/data/table?g=860XX00US{zipcode}&id=ACSST5Y{year}.S0101'.format(zipcode = zipcode, year = year)
        res = fetch_api_data(url)

        df = pd.DataFrame.from_dict(res)

        if year in ['2013', '2014', '2015', '2016']:
            index = 140
        else: 
            index = 111

        age = df['response']['data'][1][index]

        df_age.loc[year][zipcode] = age


In [17]:
df_age

Unnamed: 0,44112,44104,44103,44106,44114
2013,41.8,28.6,41.7,28.7,32.9
2014,42.8,28.0,41.1,28.2,32.4
2015,42.7,28.2,41.6,28.3,34.2
2016,43.1,29.2,40.4,28.3,33.4
2017,43.0,28.5,41.4,28.6,33.5
2018,42.8,29.2,40.4,28.3,34.3
2019,42.8,30.7,42.3,28.5,33.9
2020,41.8,30.7,40.1,30.6,33.0


In [18]:
df_age.to_csv('../data/raw/cle/age.csv', index=True)

## Houshold Type
e.g. Married, Single

In [20]:
df_house_married = pd.DataFrame(index=years, columns=zipcodes)
df_house_nonfam = pd.DataFrame(index=years, columns=zipcodes)
df_house_m_single = pd.DataFrame(index=years, columns=zipcodes)
df_house_f_single = pd.DataFrame(index=years, columns=zipcodes)

house_index = {'Married': 0, 'Nonfamily': 0, 'SingleMale': 0, 'SingleFemale': 0}

In [21]:
for year in years:
    for zipcode in zipcodes:
        url = 'https://data.census.gov/api/access/data/table?id=ACSST5Y{year}.S2501&g=860XX00US{zipcode}'.format(zipcode = zipcode, year = year)
        res = fetch_api_data(url)

        df = pd.DataFrame.from_dict(res)

        if year in ['2013', '2014', '2015', '2016']:
            house_index['Married'] = 31
            house_index['Nonfamily'] = 11
            house_index['SingleMale'] = 174
            house_index['SingleFemale'] = 118
        else: 
            house_index['Married'] = 863
            house_index['Nonfamily'] = 346
            house_index['SingleMale'] = 877
            house_index['SingleFemale'] = 13

        df_house_married.loc[year][zipcode] = df['response']['data'][1][house_index['Married']]
        df_house_nonfam.loc[year][zipcode] = df['response']['data'][1][house_index['Nonfamily']]
        df_house_m_single.loc[year][zipcode] = df['response']['data'][1][house_index['SingleMale']]
        df_house_f_single.loc[year][zipcode] = df['response']['data'][1][house_index['SingleFemale']]

In [23]:
# df_house_married
# df_house_nonfam
df_house_m_single
# df_house_f_single

Unnamed: 0,44112,44104,44103,44106,44114
2013,5.8,6.5,5.0,2.7,2.2
2014,5.7,7.1,4.0,2.5,1.6
2015,5.5,6.5,3.2,2.0,4.4
2016,5.2,6.7,4.7,1.8,4.8
2017,5.1,5.7,4.7,2.0,3.9
2018,6.3,5.1,5.8,2.2,4.2
2019,6.4,4.4,5.0,2.3,4.0
2020,9.8,4.3,4.9,2.4,1.0


In [24]:
df_house_married.to_csv('../data/raw/cle/house_married.csv', index=True)
df_house_nonfam.to_csv('../data/raw/cle/house_nonfam.csv', index=True)
df_house_m_single.to_csv('../data/raw/cle/house_m_single.csv', index=True)
df_house_f_single.to_csv('../data/raw/cle/house_f_single.csv', index=True)

## Car Ownership

In [36]:
df_car = pd.DataFrame(index=years, columns=zipcodes)

for year in years:
    for zipcode in zipcodes:
        url = 'https://data.census.gov/api/access/data/table?g=860XX00US{zipcode}&id=ACSST5Y{year}.S0802'.format(zipcode = zipcode, year = year)
        res = fetch_api_data(url)

        df = pd.DataFrame.from_dict(res)

        df_car.loc[year][zipcode] = df['response']['data'][1][114]

In [38]:
# 1 min
df_car

Unnamed: 0,44112,44104,44103,44106,44114
2013,53.9,68.0,51.3,71.2,60.7
2014,60.9,72.2,54.7,62.2,72.9
2015,54.5,67.9,52.2,59.4,64.9
2016,54.1,61.5,47.4,59.2,72.0
2017,47.8,61.1,46.8,61.3,77.8
2018,51.1,66.5,48.1,61.8,73.6
2019,58.2,61.8,44.2,71.5,79.3
2020,56.6,68.3,48.4,73.7,69.9


In [27]:
df_car.to_csv('../data/raw/cle/car.csv', index=True)

## Number of businesses

In [28]:
df_biz = pd.DataFrame(index=years, columns=zipcodes)

# data for this table only until 2018
for year in years[:6]:
    for zipcode in zipcodes:
        url = 'https://data.census.gov/api/access/data/table?id=ZBP{year}.CB{year_short}00ZBP&g=860XX00US{zipcode}'.format(zipcode = zipcode, year = year, year_short = year[2:])
        res = fetch_api_data(url)

        df = pd.DataFrame.from_dict(res)

        df_biz.loc[year][zipcode] = df['response']['data'][1][15]

In [30]:
df_biz

df_biz.to_csv('../data/raw/cle/biz.csv', index=True)

## Rough work below

In [3]:
zipcode = '44112'
year = '2011'

cleveland_income = 'https://data.census.gov/api/access/data/table?g=860XX00US{zipcode}&id=ACSST5Y{year}.S1901'.format(zipcode = zipcode, year = year)


In [11]:
res = fetch_api_data(cleveland_income)

df = pd.DataFrame.from_dict(res)

df['response']['data'][1][161]

'24312'