# HFB - Python Assessment

The following data is accessed for this exercise:

[S1701 | Poverty Status in the Past 12 Months](https://data.census.gov/table/ACSST1Y2018.S1701?q=Income%20and%20Poverty&g=050XX00US48201&y=2018) 

[S1903 | Median Income in the Past 12 Months (In 2018 Inflation Adjusted Dollars)](https://data.census.gov/table/ACSST1Y2018.S1903?q=Income%20and%20Poverty&g=050XX00US48201&y=2018)

API Data Links:

[S1701 API Data Link](https://api.census.gov/data/2018/acs/acs1/subject?get=group(S1701)&ucgid=0500000US48201)

[S1903 API Data Link](https://api.census.gov/data/2018/acs/acs1/subject?get=group(S1903)&ucgid=0500000US48201)

In [116]:
# Import Libraries
from requests.auth import HTTPBasicAuth
import requests
import pandas as pd

In [152]:
# Access Data Tables webpage 
url = 'https://api.census.gov/data/2018/acs/acs1/subject?get=group(S1701)&ucgid=0500000US48201'
api_key = '2be6e8a1806dd9ffa2015ac7162111590ea4ed86'
auth = HTTPBasicAuth('apikey', api_key)
response = requests.get(url, auth=auth)
response.raise_for_status() # Raise an exception for bad status code

In [154]:
# Assign json data to a list
poverty_value_list = response.json()[1]

In [156]:
all_data_list = []
for value in poverty_value_list:
    if value != '-888888888' or value != '-888888888.0' or value != '-999999999.0' or value != '-999999999' or value != None:
        all_data_list.append(value)

In [120]:
# Subset list for Estimat and Margin of Error Values
all_data_list = all_data_list[2::2]
all_data_list = all_data_list[:-1]
# Quick check of list values
# print(all_data_list)

In [121]:
# Quick Check of Length of each column
# len(all_data_list) / 6
# Table can be parsed using the following indices
# Total: 0 - 121
# Below Poverty Level: 122-243
# Percent Below Poverty Level: 244 - 365

In [122]:
# Create list to store data for Total, Below Poverty Level, and Percent Below Poverty Level
total_estimate = []
total_moe = []
for i in range(0,122):
    if i%2 == 0:
        total_estimate.append(all_data_list[i])
    else:
        total_moe.append(all_data_list[i])

bpl_estimate = []
bpl_moe = []
for i in range(122,244):
    if i%2 == 0:
        bpl_estimate.append(all_data_list[i])
    else:
        bpl_moe.append(all_data_list[i])

pbpl_estimate = []
pbpl_moe = []
for i in range(244,366):
    if i%2 == 0:
        pbpl_estimate.append(all_data_list[i])
    else:
        pbpl_moe.append(all_data_list[i])

In [123]:
# Quick extraction check: For this exercise, only te first 10 rows are required. 
# print(len(total_estimate[:10]))
# print(len(total_moe[:10]))
# print(len(bpl_estimate[:10]))
# print(len(bpl_moe[:10]))
# print(len(pbpl_estimate[:10]))
# print(len(pbpl_moe[:10]))

In [124]:
# Create a dataframe to house the extracted data.  
index_column_names = ["Population for whom poverty status is determined",
                      "AGE - Under 18 years",
                      "AGE -- Under 5 years",
                      "AGE -- 5 to 17 years",
                      "AGE -- Related children of householder under 18 years",
                      "AGE - 18 to 64 years",
                      "AGE --18 to 34 years",
                      "AGE--35 to 64 years",
                      "AGE - 60 years and over",
                      "AGE - 65 years and over"
                     ]
poverty_data_dict = {'Total - Estimate': total_estimate[:10], 'Total - Margin of Error': total_moe[:10],
                     "Below poverty level - Estimate": bpl_estimate[:10], "Below poverty level - Margin of Error": bpl_moe[:10],
                     "Percent Below poverty level - Estimate": pbpl_estimate[:10], "Percent Below poverty level - Margin of Error": pbpl_moe[:10]}

In [125]:
poverty_data_age_df = pd.DataFrame(poverty_data_dict,
                  index = index_column_names
                 )

In [126]:
poverty_data_age_df

Unnamed: 0,Total - Estimate,Total - Margin of Error,Below poverty level - Estimate,Below poverty level - Margin of Error,Percent Below poverty level - Estimate,Percent Below poverty level - Margin of Error
Population for whom poverty status is determined,4650812,8856,771892,31744,16.6,0.7
AGE - Under 18 years,1238708,3628,314646,18133,25.4,1.5
AGE -- Under 5 years,349669,2530,94560,8741,27.0,2.5
AGE -- 5 to 17 years,889039,2244,220086,12975,24.8,1.5
AGE -- Related children of householder under 18 years,1235313,3648,311714,18228,25.2,1.5
AGE - 18 to 64 years,2924003,7964,397219,16715,13.6,0.6
AGE --18 to 34 years,1169989,6213,181806,11084,15.5,1.0
AGE--35 to 64 years,1754014,2418,215413,10377,12.3,0.6
AGE - 60 years and over,732702,6310,90660,5819,12.4,0.8
AGE - 65 years and over,488101,917,60027,4940,12.3,1.0


In [127]:
# Convert values from string to integer
poverty_data_age_df['Total - Estimate'] = poverty_data_age_df['Total - Estimate'].astype(int)
poverty_data_age_df['Total - Margin of Error'] = poverty_data_age_df['Total - Margin of Error']. astype(int)
poverty_data_age_df['Below poverty level - Estimate'] = poverty_data_age_df['Below poverty level - Estimate'].astype(int)
poverty_data_age_df['Below poverty level - Margin of Error'] = poverty_data_age_df['Below poverty level - Margin of Error']. astype(int)
poverty_data_age_df['Percent Below poverty level - Estimate'] = poverty_data_age_df['Percent Below poverty level - Estimate'].astype(float)
poverty_data_age_df['Percent Below poverty level - Margin of Error'] = poverty_data_age_df['Percent Below poverty level - Margin of Error']. astype(float)

In [128]:
# Estimate of the number of people under the age of 18 in poverty
poverty_under_18 = poverty_data_age_df['Below poverty level - Estimate'].iloc[1]
poverty_any_age = poverty_data_age_df['Below poverty level - Estimate'].iloc[1] + poverty_data_age_df['Below poverty level - Estimate'].iloc[4]
print("Harris County Poverty Data:")
print("Estimated number of people under the age of 18 in povery: {}".format(poverty_under_18))
print("Estimated number of any people of any age in povery: {}".format(poverty_any_age))

Harris County Poverty Data:
Estimated number of people under the age of 18 in povery: 314646
Estimated number of any people of any age in povery: 626360


The same steps applied above can be applied for determining the estimate of the median. 


In [142]:
# Access Data Tables webpage 
url = 'https://api.census.gov/data/2018/acs/acs1/subject?get=group(S1903)&ucgid=0500000US48201'
api_key = '2be6e8a1806dd9ffa2015ac7162111590ea4ed86'
auth = HTTPBasicAuth('apikey', api_key)
response = requests.get(url, auth=auth)
response.raise_for_status() # Raise an exception for bad status code

In [178]:
# Assign json data to a list
median_value_list = response.json()[1]

In [180]:
all_data_list = []
for value in median_value_list:
    if value != '-888888888' or value != '-888888888.0' or value != '-999999999.0' or value != '-999999999':
        all_data_list.append(value)

## NOTE: 
Currently there are string values of -888888888, -888888888.0, -999999999, -999999999.0, which appear to represent the spaces between values on the table. In the previous exercise, this was able to be parsed out with the above string, however, at this time it remains even after the code is run. Adjusting the code here, should allow for continutation towards building the table to collect the estimate. 

In [99]:
# Subset list for Estimat and Margin of Error Values
# all_data_list = all_data_list[2::2]
# all_data_list = all_data_list[:-1]
# Quick check of list values
# print(all_data_list)

## NOTE: 
The index values below should be recalculated for theis specific table before going forward.

In [65]:
# Quick Check of Length of each column
# len(all_data_list) / 6
# Table can be parsed using the following indices
# Total: 0 - 121
# Below Poverty Level: 122-243
# Percent Below Poverty Level: 244 - 365

In [None]:
# Create list to store data for Total, Below Poverty Level, and Percent Below Poverty Level
# number_estimate = []
# number_moe = []
# for i in range(0,122):
#     if i%2 == 0:
#         number_estimate.append(all_data_list[i])
#     else:
#         number__moe.append(all_data_list[i])

# pdist_estimate = []
# pdist_moe = []
# for i in range(122,244):
#     if i%2 == 0:
#         pdist_estimate.append(all_data_list[i])
#     else:
#         pdist_moe.append(all_data_list[i])

# median_income_estimate = []
# median_income_moe = []
# for i in range(244,366):
#     if i%2 == 0:
#        median_income_estimate.append(all_data_list[i])
#     else:
#         median_income_moe.append(all_data_list[i])