import urllib
import urllib2
from bs4 import BeautifulSoup
base = ''
stats = ''
url = ''
# Connects to the given url, parses for the div with id='content', finds all the links in it and appends it to a given list object
def link_scrape(url,list):
connect = urllib2.urlopen(url)
html =
soup = BeautifulSoup(html)
content = soup.find(id='content')
links = content.find_all('a')
# Skip the first link, which is an anchor for navigation
for i in links[1:]:
# Create the list before sending it to the function
years = []
# Get links to all the years the Secretary of State has available
# Create the list before sending it to the function
pdfs = []
# For each year, scrape for the link to each month's pdf of voter registration numbers
for year in years:
# Now that we have the link to the pdf, replace the relative paths, pull out the name of the file then download it to the current directory
for i in pdfs:
clean = i.replace('../../..','')
name = i.replace('../../../doc/voterresources/registration/','')
print "Downloading %s" % name