### Import Modules and Dependencies ###

In [21]:
import pandas as pd
import numpy as np

from urllib.request import Request, urlopen
from bs4 import BeautifulSoup as bs
import requests

import json
import requests
from pprint import pprint

from api_file import api_key

### Read CSV File and Create a Dataframe of Thirty Cities ###

In [22]:
cities_list_df = pd.read_csv("CensusDataCities.csv")

cities_list_df.head()

Unnamed: 0,Ranking,Metro_Area,City,Metropollitan_Population
0,1,"New York-Newark, NY-NJ-CT-PA","New York, NY",23689255
1,2,"Los Angeles-Long Beach, CA","Los Angeles, CA",18688022
2,3,"Chicago-Naperville, IL-IN-WI","Chicago, IL",9882634
3,4,"Washington-Baltimore-Arlington, DC-MD-VA-WV-PA","Washington, CD",9665892
4,5,"San Jose-San Francisco-Oakland, CA","San Francisco, CA",8751807


### Create Lists of Cities and States for Possible Input in API Call ###

In [23]:
cities = []

for city in cities_list_df["City"].str.split(",").str[0]:
    cities.append(city)
    
print(cities)

['New York', 'Los Angeles', 'Chicago', 'Washington', 'San Francisco', 'Boston', 'Dallas', 'Philadelphia', 'Houston', 'Miami', 'Atlanta', 'Detroit', 'Seattle', 'Minneapolis', 'Cleveland', 'Denver', 'Orlando', 'Portland', 'St. Louis', 'Pittsburgh', 'Charlotte', 'Sacramento', 'Salt Lake City', 'Kansas City', 'Columbus', 'Las Vegas', 'Indianapolis', 'Cincinnati', 'Raleigh', 'Milwaukee']


In [24]:
states = []

for state in cities_list_df["City"].str.split(", ").str[1]:
    states.append(state)
    
print(states)

['NY', 'CA', 'IL', 'CD', 'CA', 'MA', 'TX', 'PA', 'TX', 'FL', 'GA', 'MI', 'WA', 'MN', 'OH', 'CO', 'FL', 'OR', 'MO', 'PA', 'NC', 'CA', 'UT', 'MO', 'OH', 'NV', 'IN', 'OH', 'NC', 'WI']


### Use Beautiful Soup to Scrape an HTML Document with State and FIPS Codes ###

In [25]:
req = Request("https://www.mcc.co.mercer.pa.us/dps/state_fips_code_listing.htm", headers={'User-Agent': 'Mozilla/5.0'})

webpage = urlopen(req).read()

webpage

b'<html>\r\n\r\n<head>\r\n<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">\r\n<meta name="GENERATOR" content="Microsoft FrontPage 4.0">\r\n<meta name="ProgId" content="FrontPage.Editor.Document">\r\n<title>State FIPS Code Listing</title>\r\n</head>\r\n\r\n<body>\r\n\r\n<p align="center"><br>\r\n<b><font size="+3">State FIPS Code Listing</font></b>\r\n\r\n<p align="center"><img border="0" src="images/state_6.gif" width="733" height="172">\r\n<hr>\r\n<h3 align="center">Click on the highlighted State abbreviation for a list of\r\nCounties and County FIPS codes.</h3>\r\n<div align="center">\r\n  <center>\r\n<table border="1" width="687">\r\n  <tbody>\r\n    <tr>\r\n      <td align="center" width="85"><strong>State Abbreviation</strong></td>\r\n      <td align="center" width="46"><strong>FIPS Code</strong></td>\r\n      <td align="center" width="182"><strong>State Name</strong></td>\r\n      <td align="center" width="86"><strong>State Abbreviation</strong></td>\r\n

In [26]:
soup = bs(webpage, "html.parser")
type(soup)

bs4.BeautifulSoup

### Use Beautiful Soup Object to Create an HTML File for Conversion to a Dataframe ###

In [27]:
with open("StateFIPSCodes.html", "w", encoding='utf-8') as file:
    file.write(str(soup))

In [28]:
state_fips = pd.read_html("StateFIPSCodes.html")[0]

state_fips

Unnamed: 0,0,1,2,3,4,5
0,State Abbreviation,FIPS Code,State Name,State Abbreviation,FIPS Code,State Name
1,AK,02,ALASKA,MS,28,MISSISSIPPI
2,AL,01,ALABAMA,MT,30,MONTANA
3,AR,05,ARKANSAS,NC,37,NORTH CAROLINA
4,AS,60,AMERICAN SAMOA,ND,38,NORTH DAKOTA
5,AZ,04,ARIZONA,NE,31,NEBRASKA
6,CA,06,CALIFORNIA,NH,33,NEW HAMPSHIRE
7,CO,08,COLORADO,NJ,34,NEW JERSEY
8,CT,09,CONNECTICUT,NM,35,NEW MEXICO
9,DC,11,DISTRICT OF COLUMBIA,NV,32,NEVADA


### Conversion of Two Sets of Columns from Dataframe into One Set ###

In [29]:
state_fips_df1 = state_fips.drop(state_fips.columns[[3, 4, 5]], axis=1)

state_fips_df1

Unnamed: 0,0,1,2
0,State Abbreviation,FIPS Code,State Name
1,AK,02,ALASKA
2,AL,01,ALABAMA
3,AR,05,ARKANSAS
4,AS,60,AMERICAN SAMOA
5,AZ,04,ARIZONA
6,CA,06,CALIFORNIA
7,CO,08,COLORADO
8,CT,09,CONNECTICUT
9,DC,11,DISTRICT OF COLUMBIA


In [30]:
state_fips_df1a = state_fips_df1.drop(state_fips_df1.index[28])

state_fips_df1a

Unnamed: 0,0,1,2
0,State Abbreviation,FIPS Code,State Name
1,AK,02,ALASKA
2,AL,01,ALABAMA
3,AR,05,ARKANSAS
4,AS,60,AMERICAN SAMOA
5,AZ,04,ARIZONA
6,CA,06,CALIFORNIA
7,CO,08,COLORADO
8,CT,09,CONNECTICUT
9,DC,11,DISTRICT OF COLUMBIA


In [31]:
state_fips_df1b = state_fips_df1a.drop(state_fips_df1a.index[0])

state_fips_df1b

Unnamed: 0,0,1,2
1,AK,2,ALASKA
2,AL,1,ALABAMA
3,AR,5,ARKANSAS
4,AS,60,AMERICAN SAMOA
5,AZ,4,ARIZONA
6,CA,6,CALIFORNIA
7,CO,8,COLORADO
8,CT,9,CONNECTICUT
9,DC,11,DISTRICT OF COLUMBIA
10,DE,10,DELAWARE


In [32]:
state_fips_df2 = state_fips.drop(state_fips.columns[[0, 1, 2]], axis=1)

state_fips_df2

Unnamed: 0,3,4,5
0,State Abbreviation,FIPS Code,State Name
1,MS,28,MISSISSIPPI
2,MT,30,MONTANA
3,NC,37,NORTH CAROLINA
4,ND,38,NORTH DAKOTA
5,NE,31,NEBRASKA
6,NH,33,NEW HAMPSHIRE
7,NJ,34,NEW JERSEY
8,NM,35,NEW MEXICO
9,NV,32,NEVADA


In [33]:
state_fips_df2a = state_fips_df2.drop(state_fips_df2.index[0])

state_fips_df2a

Unnamed: 0,3,4,5
1,MS,28,MISSISSIPPI
2,MT,30,MONTANA
3,NC,37,NORTH CAROLINA
4,ND,38,NORTH DAKOTA
5,NE,31,NEBRASKA
6,NH,33,NEW HAMPSHIRE
7,NJ,34,NEW JERSEY
8,NM,35,NEW MEXICO
9,NV,32,NEVADA
10,NY,36,NEW YORK


In [43]:
all_dfs = [state_fips_df1b, state_fips_df2a]

for df in all_dfs:
    df.columns = ["State Abbreviation", "FIPS Code", "State Name"]

state_fips_df = pd.concat(all_dfs).reset_index(drop=True)

#print(type(state_fips_df["State Abbreviation"][0]))
state_fips_df

<class 'str'>


Unnamed: 0,State Abbreviation,FIPS Code,State Name
0,AK,2,ALASKA
1,AL,1,ALABAMA
2,AR,5,ARKANSAS
3,AS,60,AMERICAN SAMOA
4,AZ,4,ARIZONA
5,CA,6,CALIFORNIA
6,CO,8,COLORADO
7,CT,9,CONNECTICUT
8,DC,11,DISTRICT OF COLUMBIA
9,DE,10,DELAWARE


### Create a List of all FIPS Codes from a Column of the Dataframe ###

In [35]:
state_fips_list = state_fips_df["FIPS Code"].tolist()

print(state_fips_list)

['02', '01', '05', '60', '04', '06', '08', '09', '11', '10', '12', '13', '66', '15', '19', '16', '17', '18', '20', '21', '22', '25', '24', '23', '26', '27', '29', '28', '30', '37', '38', '31', '33', '34', '35', '32', '36', '39', '40', '41', '42', '72', '44', '45', '46', '47', '48', '49', '51', '78', '50', '53', '55', '54', '56']


In [44]:
#state_fips_list_select = []

#for state in states:
#    if state in state_fips_df["State Abbreviation"]:
#        state_fips_list_select.append(state_fips_df["State Abbreviation"])

#print(state_fips_list_select)

### API Call to Census.gov ###

In [37]:
#Census Data API User Guide (Link): https://www.census.gov/content/dam/Census/data/developers/api-user-guide/api-guide.pdf
#American Community Survey Data Variables (Link): https://api.census.gov/data/2013/acs/acs1/variables.html
#("https://api.census.gov/data/2018/acs/acs1?&get=NAME,B02015_009E,B02015_009M&for=state:*&key=your key here")

states_response = []

for state in states:
    url = ("https://api.census.gov/data/2018/acs/acs1?&get=NAME,B20005I_048E&for=state:*&key=")
    state_fips_data = requests.get(url).json()
    states_response.append(state_fips_data)
print(states_response)

[[['NAME', 'B20005I_048E', 'state'], ['Colorado', '1616', '08'], ['Indiana', '217', '18'], ['Kentucky', '75', '21'], ['Louisiana', '396', '22'], ['Illinois', '3232', '17'], ['Iowa', '75', '19'], ['New Hampshire', None, '33'], ['Arkansas', '23', '05'], ['Delaware', '0', '10'], ['Minnesota', '58', '27'], ['Montana', None, '30'], ['Maine', None, '23'], ['North Carolina', '652', '37'], ['Georgia', '1545', '13'], ['Alaska', '452', '02'], ['Alabama', '152', '01'], ['Vermont', None, '50'], ['Nevada', '1155', '32'], ['West Virginia', None, '54'], ['Oklahoma', '265', '40'], ['Wisconsin', '423', '55'], ['Puerto Rico', '1356', '72'], ['Virginia', '1040', '51'], ['North Dakota', None, '38'], ['South Carolina', '392', '45'], ['Oregon', '464', '41'], ['Wyoming', None, '56'], ['California', '20214', '06'], ['Mississippi', '64', '28'], ['Connecticut', '910', '09'], ['Texas', '9635', '48'], ['Maryland', '874', '24'], ['Florida', '6904', '12'], ['Massachusetts', '1571', '25'], ['District of Columbia', N

In [38]:
pprint(states_response)

[[['NAME', 'B20005I_048E', 'state'],
  ['Colorado', '1616', '08'],
  ['Indiana', '217', '18'],
  ['Kentucky', '75', '21'],
  ['Louisiana', '396', '22'],
  ['Illinois', '3232', '17'],
  ['Iowa', '75', '19'],
  ['New Hampshire', None, '33'],
  ['Arkansas', '23', '05'],
  ['Delaware', '0', '10'],
  ['Minnesota', '58', '27'],
  ['Montana', None, '30'],
  ['Maine', None, '23'],
  ['North Carolina', '652', '37'],
  ['Georgia', '1545', '13'],
  ['Alaska', '452', '02'],
  ['Alabama', '152', '01'],
  ['Vermont', None, '50'],
  ['Nevada', '1155', '32'],
  ['West Virginia', None, '54'],
  ['Oklahoma', '265', '40'],
  ['Wisconsin', '423', '55'],
  ['Puerto Rico', '1356', '72'],
  ['Virginia', '1040', '51'],
  ['North Dakota', None, '38'],
  ['South Carolina', '392', '45'],
  ['Oregon', '464', '41'],
  ['Wyoming', None, '56'],
  ['California', '20214', '06'],
  ['Mississippi', '64', '28'],
  ['Connecticut', '910', '09'],
  ['Texas', '9635', '48'],
  ['Maryland', '874', '24'],
  ['Florida', '6904', 

  ['Virginia', '1040', '51'],
  ['North Dakota', None, '38'],
  ['South Carolina', '392', '45'],
  ['Oregon', '464', '41'],
  ['Wyoming', None, '56'],
  ['California', '20214', '06'],
  ['Mississippi', '64', '28'],
  ['Connecticut', '910', '09'],
  ['Texas', '9635', '48'],
  ['Maryland', '874', '24'],
  ['Florida', '6904', '12'],
  ['Massachusetts', '1571', '25'],
  ['District of Columbia', None, '11'],
  ['Utah', '348', '49'],
  ['New York', '6123', '36'],
  ['New Jersey', '2139', '34'],
  ['Ohio', '200', '39'],
  ['Missouri', '161', '29'],
  ['Pennsylvania', '1002', '42'],
  ['Michigan', '547', '26'],
  ['Nebraska', '487', '31'],
  ['Idaho', '93', '16'],
  ['New Mexico', '622', '35'],
  ['Hawaii', '194', '15'],
  ['South Dakota', None, '46'],
  ['Washington', '1743', '53'],
  ['Rhode Island', '290', '44'],
  ['Arizona', '2124', '04'],
  ['Tennessee', '74', '47'],
  ['Kansas', '33', '20']],
 [['NAME', 'B20005I_048E', 'state'],
  ['Colorado', '1616', '08'],
  ['Indiana', '217', '18'],
