# Analyzing Census data using Census Reporter API
## By: Austin Kaliher
### This piece of code was originally created for APRD 6342 (Digital Advertising) at the University of Colorado Boulder. This assignment involves analyzing census data to find optimal target markets for a local coffee roaster. To do this, we called the census reporter API to get the raw data to analyze. From prelimiary research, we found people age 18-34, households that earn 50k - 75k per year, and latino people were the optimal targeting characteristics for the client. Our assignment was to then identiy the metro areas in the United States that have the highest population of people in those groups. Data for this code is available in my repository.

In [1]:
import pandas as pd
import requests
import time

# Import the CBSA data (geographical region identifiers)
alldata = pd.read_csv('msas.csv')

# The age, income, and ethnicity data we are looking for are located in these tables
# I am constructing a string that can be placed into the URL for the API calls
tableids = ['B01001', 'B03002', 'B19001']
tableidstring = ','.join(tableids)
geoid = alldata['CBSA']

# Initialize the destination dictionary for the API calls
recdata = {}

# API calls to get data. time.sleep(1) is used to make the program pause one second in
# between API calls. Without this, the API will usually fail due to too many requests.
for row in range(len(geoid)):
    requesturl = 'http://api.censusreporter.org/1.0/data/show/latest?table_ids=%s&geo_ids=31000US%s' % (tableidstring, geoid[row])
    recdata[row] = requests.get(requesturl).json()
    time.sleep(1)

# Getting count of people aged between 18 - 34 by geoid. Age groups reported in the
# census data only span a few years so all these cateogries must be called to get data
# for people age 18-34
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
age1 = geoid
for row in range(len(recdata)):
    age1[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B01001']['estimate']['B01001031']
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
age2 = geoid
for row in range(len(recdata)):
    age2[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B01001']['estimate']['B01001032']
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
age3 = geoid
for row in range(len(recdata)):
    age3[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B01001']['estimate']['B01001033']
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
age4 = geoid
for row in range(len(recdata)):
    age4[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B01001']['estimate']['B01001034']
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
age5 = geoid
for row in range(len(recdata)):
    age5[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B01001']['estimate']['B01001035']
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
age6 = geoid
for row in range(len(recdata)):
    age6[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B01001']['estimate']['B01001036']
age = age1 + age2 + age3 + age4 + age5 + age6

# Getting count of people with household income between $50k and $75k by geoid
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
income1 = geoid
for row in range(len(recdata)):
    income1[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B19001']['estimate']['B19001011']
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
income2 = geoid
for row in range(len(recdata)):
    income2[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B19001']['estimate']['B19001012']
income = income1 + income2

# Getting count of people that identify as hispanic by geoid
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']
hisp = geoid
for row in range(len(recdata)):
    hisp[row] = recdata[row]['data']['31000US' + str(int(geoid[row]))]['B03002']['estimate']['B03002012']
alldata = pd.read_csv('msas.csv')
geoid = alldata['CBSA']

# Questions 1 - 3
print(' ')
print(' ')
print(' ')
print(' ')
print(' ')
print('Question 1. The city with the highest population of people with household')
print('income of 50k to 75k')
print(' ')
print(' ')
print(' ')
print('When we analyze the data, we see that the highest person count for a city is:')
print(' ')
print(max(income))
print(' ')
print('When we reference back to the tables, we see that the city with this population is:')
print(' ')
print('The New York area')
print(' ')
print(' ')
print(' ')
print(' ')
print(' ')
print('Question 2. The city with the highest population with people aged 18 to 34.')
print(' ')
print(' ')
print(' ')
print('When we analyze the data, we see that the highest person count for a city is:')
print(' ')
print(max(age))
print(' ')
print('When we reference back to the tables, we see that the city with this population is:')
print(' ')
print('The New York area')
print(' ')
print(' ')
print(' ')
print(' ')
print(' ')
print('Question 3. The city with the largest latino population')
print(' ')
print(' ')
print(' ')
print('When we analyze the data, we see that the highest person count for a city is:')
print(' ')
print(max(hisp))
print(' ')
print('When we reference back to the tables, we see that the city with this population is:')
print(' ')
print('Los Angeles/Long Beach')


# Question 4

perincome = income
perage = age
perhisp = hisp
perincome = perincome / max(income)
perage = perage / max(age)
perhisp = perhisp / max(hisp)
totalper = (perincome + perage + perhisp) / 3
print(' ')
print(' ')
print(' ')
print(' ')
print(' ')
print('Question 4. The city with the largest average percentage across categories')
print(' ')
print(' ')
print(' ')
print('When we analyze the data, we see that the highest percentage is:')
print(' ')
print(max(totalper))
print(' ')
print('When we reference back to the tables, we see that the city with this population is:')
print(' ')
print('New York area')

FileNotFoundError: File b'msas.csv' does not exist