Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
blablupcom committed Mar 2, 2018
1 parent c9a30df commit ad7477b
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions scraper.py
Expand Up @@ -93,15 +93,15 @@ def convert_mth_strings ( mth_string ):

#### READ HTML 1.1 - no "lxml"

html = requests.get(url)
soup = BeautifulSoup(html.text, "html.parser")
html = urllib2.urlopen(url)
soup = BeautifulSoup(html, "html.parser")

#### SCRAPE DATA


for i in range(1, 12):
html = requests.get(start_url+str(i))
soup = BeautifulSoup(html.text, "html.parser")
html = urllib2.urlopen(start_url+str(i))
soup = BeautifulSoup(html, "html.parser")
block = soup.find('table')
links = block.findAll('a', href=True)
for link in links:
Expand All @@ -110,7 +110,7 @@ def convert_mth_strings ( mth_string ):
suburl = 'http://apps.warwickshire.gov.uk/' + link['href']
if 'payments-to-suppliers' in suburl:
html2 = urllib2.urlopen(suburl)
soup2 = BeautifulSoup(html2, 'lxml')
soup2 = BeautifulSoup(html2, "html.parser")
block = soup2.find('dt', text=re.compile("CSV")).find_next('dd').find('a')
if block:
sublink = block['href']
Expand Down

0 comments on commit ad7477b

Please sign in to comment.