In [1]:
# Imported the dependencies of Pandas, Beautiful Soup, and Pretty Print
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs
from pprint import pprint

In [2]:
# Setup the request link for the URL with the data
zip_population_income_url = 'http://zipatlas.com/us/tx/houston/zip-code-comparison/median-household-income.htm'


In [3]:
# Retrieve the Houston zip code statistics with the requests module
zip_population_income_response = requests.get(zip_population_income_url)
# Develop the BeautifulSoup object by parsing with 'html.parser'
soup = bs(zip_population_income_response.text, 'html.parser')
# Create a Beautiful Soup object using the text of the HTML string
print(soup.prettify())

<html>
 <head>
  <title>
   Median Household Income in Houston, TX by Zip Code
  </title>
  <meta content="Median Household Income in Houston, TX with a color coded Zip Code Heat Map." name="description"/>
  <script src="/js/map/city/?e=4FTll8hutHwc7y4DZOfS4oQwlNRYVuiG4MUTO90DJZrhR84Tom1KE5NHRjQlitu0Lg3y4XrswsQFyFD7xk0BXojcFCDWeF4BhM1cCqRac33HKq9yiJNhKUhNUnPpjFqb" type="text/javascript">
  </script>
  <script src="http://maps.google.com/maps/api/js?sensor=false" type="text/javascript">
  </script>
  <meta content="all,index,follow" name="robots"/>
  <meta content="general" name="rating"/>
  <meta content="ZipAtlas.com Development Team" name="author"/>
  <meta content="en-us" name="language"/>
  <meta content="Copyright 2011 ZipAtlas.com" name="copyright"/>
  <meta content="7 Days" name="revisit-after"/>
  <meta content="-1" http-equiv="Expires"/>
  <meta content="Global" http-equiv="Distribution"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="

In [4]:
# Read in the URL for the data and identified the number of tables on the page.
Htown_facts_table = pd.read_html(zip_population_income_url)
len(Htown_facts_table)

14

In [5]:
# Since the table appeared to be the last on the page, started with 14 and worked backwards to ID the table
# we needed as number 11.
H_Town_facts_table = Htown_facts_table[11]

In [6]:
# Created a Pandas dataframe of the table.
Htown_facts_table_df = pd.DataFrame(H_Town_facts_table)
# Printed the table.
Htown_facts_table_df

Unnamed: 0,0,1,2,3,4,5,6
0,#,Zip Code,Location,City,Population,Avg. Income/H/hold,National Rank
1,1.,77010,"29.754310, -95.361109","Houston, Texas",76,"$200,000.00",#1
2,2.,77094,"29.769285, -95.681292","Houston, Texas",7779,"$123,244.00",#78
3,3.,77046,"29.733084, -95.430659","Houston, Texas",471,"$105,863.00",#181
4,4.,77059,"29.615219, -95.134960","Houston, Texas",16690,"$104,844.00",#197
5,5.,77005,"29.718435, -95.423555","Houston, Texas",23338,"$104,035.00",#208
6,6.,77024,"29.771991, -95.515453","Houston, Texas",32746,"$82,620.00",#706
7,7.,77068,"30.008830, -95.487234","Houston, Texas",9505,"$77,724.00",#948
8,8.,77095,"29.916055, -95.663077","Houston, Texas",39275,"$76,814.00",#992
9,9.,77062,"29.575781, -95.134334","Houston, Texas",26978,"$75,689.00","#1,066"


In [7]:
# Eliminated the first row and made the second row the header row.
Houston_Facts_Table=Htown_facts_table_df.rename(columns=Htown_facts_table_df.iloc[0]).drop(Htown_facts_table_df.index[0])

In [8]:
# Renamed the first column from "#" to number to be able to delete the column.
Houston_Facts_Table.rename(columns={'#': 'Number'})

Unnamed: 0,Number,Zip Code,Location,City,Population,Avg. Income/H/hold,National Rank
1,1.,77010,"29.754310, -95.361109","Houston, Texas",76,"$200,000.00",#1
2,2.,77094,"29.769285, -95.681292","Houston, Texas",7779,"$123,244.00",#78
3,3.,77046,"29.733084, -95.430659","Houston, Texas",471,"$105,863.00",#181
4,4.,77059,"29.615219, -95.134960","Houston, Texas",16690,"$104,844.00",#197
5,5.,77005,"29.718435, -95.423555","Houston, Texas",23338,"$104,035.00",#208
6,6.,77024,"29.771991, -95.515453","Houston, Texas",32746,"$82,620.00",#706
7,7.,77068,"30.008830, -95.487234","Houston, Texas",9505,"$77,724.00",#948
8,8.,77095,"29.916055, -95.663077","Houston, Texas",39275,"$76,814.00",#992
9,9.,77062,"29.575781, -95.134334","Houston, Texas",26978,"$75,689.00","#1,066"
10,10.,77056,"29.749035, -95.469021","Houston, Texas",14031,"$71,926.00","#1,336"


In [9]:
# Saved the table as a csv file.
Houston_Facts_Table.to_csv("Houston_Facts_Table.csv", index = False)