Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
chadskelton committed Jun 26, 2019
1 parent b1901e3 commit 9951627
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions scraper.py
Expand Up @@ -145,7 +145,8 @@ def scrape_bcca(url):

def scrape_bcpc(url):

html = requests.get(url, verify=False, headers={'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'})
# html = requests.get(url, verify=False, headers={'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'})
html = requests.get(url)
# verify=False because getting 502 errors due to invalid certificate
htmlpage = html.content

Expand All @@ -161,19 +162,25 @@ def scrape_bcpc(url):
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36')]
html = br.open(url)
htmlpage = html.content
'''

soup = BeautifulSoup(htmlpage)

print soup

table = soup.find ("div", {"class" : "view-content"})
print table
decisions = table.findAll ("a")
# decisions = soup.findAll ("div", {"class":"views-field views-field-text"})
'''

# new instructions for canlii site

soup = BeautifulSoup(htmlpage)

print soup

section = soup.find ("div", {"id" : "recentJudg"})

decisions = section.findAll ("a")

print decisions

Expand All @@ -196,6 +203,7 @@ def scrape_bcpc(url):
print 'Difficulty scraping BCCA'

#try:
scrape_bcpc("http://www.provincialcourt.bc.ca/judgments-decisions")
#scrape_bcpc("http://www.provincialcourt.bc.ca/judgments-decisions")
scrape_bcpc("https://www.canlii.org/en/bc/bcpc/")
#except:
# print 'Difficulty scraping BCPC'

0 comments on commit 9951627

Please sign in to comment.