Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
blablupcom committed Mar 12, 2018
1 parent e054c6d commit 4d8930f
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion scraper.py
Expand Up @@ -96,7 +96,7 @@ def convert_mth_strings ( mth_string ):

#### SCRAPE DATA

blocks = soup.find('article', 'post-380 page type-page status-publish hentry').find_all('a')
blocks = soup.find('h2', text="Spend over £25,000").find_next('ul').find_all('a')
for block in blocks:
if '.csv' in block['href'] or '.xls' in block['href'] or '.xlsx' in block['href'] or '.pdf' in block['href']:
if 'http' not in block['href']:
Expand All @@ -108,6 +108,9 @@ def convert_mth_strings ( mth_string ):
csvYr = title[-1][-4:]
if 'l to' in block.text:
csvMth = 'Q0'
if 'November 2012 to January 2017' in block.text:
csvMth = 'Q0'
csvYr = '2017'
csvMth = convert_mth_strings(csvMth.upper())
data.append([csvYr, csvMth, link])

Expand Down

0 comments on commit 4d8930f

Please sign in to comment.