Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
chadskelton committed Jun 26, 2019
1 parent e85f961 commit b1901e3
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions scraper.py
Expand Up @@ -145,34 +145,35 @@ def scrape_bcca(url):

def scrape_bcpc(url):

'''
html = requests.get(url, verify=False, headers={'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'})
# verify=False because getting 502 errors due to invalid certificate
htmlpage = html.content

soup = BeautifulSoup(htmlpage)

print soup
'''

'''
# attempt to do it with mechanize instead of requests; still errored
br = mechanize.Browser()
br.set_handle_robots(False)
# br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36')]
html = br.open(url)
htmlpage = html.content
'''

soup = BeautifulSoup(htmlpage)

print soup

# table = soup.find ("div", {"class" : "view-content"})
table = soup.find ("div", {"class" : "view-content"})

# print table
print table

# decisions = table.findAll ("a")
decisions = table.findAll ("a")

decisions = soup.findAll ("div", {"class":"views-field views-field-text"})
# decisions = soup.findAll ("div", {"class":"views-field views-field-text"})

print decisions

Expand Down

0 comments on commit b1901e3

Please sign in to comment.