Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
chadskelton committed Jun 26, 2019
1 parent ae2e8f9 commit e85f961
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions scraper.py
Expand Up @@ -15,6 +15,7 @@
from BeautifulSoup import BeautifulSoup
# new for secret variables
import os
import mechanize

# Establish Twitter authorization. These codes are specific to @BCCourtBot
# Think it's better to setup authorization at beginning instead of resetting within tweet function, but not sure
Expand Down Expand Up @@ -143,12 +144,26 @@ def scrape_bcca(url):
tweetit(record)

def scrape_bcpc(url):

'''
html = requests.get(url, verify=False, headers={'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'})
# verify=False because getting 502 errors due to invalid certificate
htmlpage = html.content
soup = BeautifulSoup(htmlpage)
print soup
'''

br = mechanize.Browser()
br.set_handle_robots(False)
# br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36')]
html = br.open(url)
htmlpage = html.content

soup = BeautifulSoup(htmlpage)

print soup

# table = soup.find ("div", {"class" : "view-content"})
Expand Down

0 comments on commit e85f961

Please sign in to comment.