# Data for Cleveland Healthline ZIP Codes

This notebook retrieves data for the following ZIP Codes near the Cleveland Healthline BRT: `44112, 44104, 44103, 44106, 44114`.

Data Sources: 
- Income: `Table S1901` of the *American Community Survey*, 2013-2020 (same range as *National Transit Database Tables*)
- 

In [3]:
import requests
import json
import pandas as pd

Defining helper functions - TODO: will need to use these in other directories so eventually move this out

In [10]:
def fetch_api_data(url: str) -> dict:
    """
    Fetches data from an API endpoint and returns the response in JSON format.

    Parameters:
    url (str): The URL of the API endpoint to fetch data from.

    Returns:
    dict: A dictionary containing the JSON response from the API endpoint.
    """
    # make the API request
    response = requests.get(url)
    
    return response.json()

# A more comprehensive function - haven't called yet - might actually be too complex
def fetch_census_data(years, zipcodes, url, df):
    for year in years:
        for zipcode in zipcodes:
            res = requests.get(url)
            res = res.json()

            df = pd.DataFrame.from_dict(res)
            income = df['response']['data'][1][161]

            df.loc[year][zipcode] = income


In [5]:
# Defining constants

years = list(map(str, [x for x in range(2013, 2021)]))
print(years)

zipcodes = ['44112', '44104', '44103', '44106', '44114']

['2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']


## Income

In [6]:
# create the DataFrame using the two arrays as column names
df_income = pd.DataFrame(index=years, columns=zipcodes)

# print the resulting DataFrame
print(df_income)

     44112 44104 44103 44106 44114
2013   NaN   NaN   NaN   NaN   NaN
2014   NaN   NaN   NaN   NaN   NaN
2015   NaN   NaN   NaN   NaN   NaN
2016   NaN   NaN   NaN   NaN   NaN
2017   NaN   NaN   NaN   NaN   NaN
2018   NaN   NaN   NaN   NaN   NaN
2019   NaN   NaN   NaN   NaN   NaN
2020   NaN   NaN   NaN   NaN   NaN


In [7]:
for year in years:
    for zipcode in zipcodes:
        url = 'https://data.census.gov/api/access/data/table?g=860XX00US{zipcode}&id=ACSST5Y{year}.S1901'.format(zipcode = zipcode, year = year)
        res = fetch_api_data(url)

        df = pd.DataFrame.from_dict(res)
        income = df['response']['data'][1][161]

        df_income.loc[year][zipcode] = income


In [9]:
print(df_income) # takes ~15s to create

df_income.to_csv('../data/processed/cle/income.csv', index=True)


      44112  44104  44103  44106  44114
2013  22220  14603  18411  22602  22180
2014  22548  15204  17616  24065  23013
2015  21608  14009  17166  23222  21324
2016  21028  14414  18531  24578  20823
2017  22147  14646  17867  25892  22304
2018  22586  15813  19540  26310  25757
2019  22412  16999  19730  29225  36182
2020  26236  16650  21468  30669  43159


## Population and Median Age

In [12]:
# create the DataFrame using the two arrays as column names
df_pop = pd.DataFrame(index=years, columns=zipcodes)
df_age = pd.DataFrame(index=years, columns=zipcodes)

In [13]:
for year in years:
    for zipcode in zipcodes:
        url = 'https://data.census.gov/api/access/data/table?g=860XX00US{zipcode}&id=ACSST5Y{year}.S0101'.format(zipcode = zipcode, year = year)
        res = fetch_api_data(url)

        df = pd.DataFrame.from_dict(res)

        if year in ['2013', '2014', '2015', '2016']:
            index = 270
        else: 
            index = 277

        pop = df['response']['data'][1][index]

        df_pop.loc[year][zipcode] = pop


In [14]:
# 37.2s

df_pop.to_csv('../data/processed/cle/pop.csv', index=True)

## Rough work below

In [3]:
zipcode = '44112'
year = '2011'

cleveland_income = 'https://data.census.gov/api/access/data/table?g=860XX00US{zipcode}&id=ACSST5Y{year}.S1901'.format(zipcode = zipcode, year = year)


In [11]:
res = fetch_api_data(cleveland_income)

df = pd.DataFrame.from_dict(res)

df['response']['data'][1][161]

'24312'