Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
blablupcom committed Mar 28, 2017
1 parent 57c4359 commit db0767e
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,22 +96,22 @@ def convert_mth_strings ( mth_string ):

#### SCRAPE DATA

financial = soup.find(text=re.compile('2015/2016 financial year'))
financial = soup.find(text=re.compile('2016/2017 financial year'))
links = financial.find_all_next('a', href=True)
for link in links:
url = 'http://www.nottinghamshire.gov.uk' + link['href']
title = link.encode_contents(formatter='html').replace(' ',' ')
if '.csv' in link['href']:
if '.csv' in link['href'] and link['href']:
title = title.upper().strip()
if '20' not in title:
csvYr = '2013'
csvMth = title.split(' ')[-2][:3]
csvYr = '2016'
csvMth = title.split(' ')[0][:3]
else:
csvYr = title.split(' ')[1][:4]
csvMth = title.split(' ')[0][:3]

csvMth = convert_mth_strings(csvMth.upper())
data.append([csvYr, csvMth, url])
if csvMth:
csvMth = convert_mth_strings(csvMth.upper())
data.append([csvYr, csvMth, url])

#### STORE DATA 1.0

Expand All @@ -132,4 +132,5 @@ def convert_mth_strings ( mth_string ):
if errors > 0:
raise Exception("%d errors occurred during scrape." % errors)


#### EOF

0 comments on commit db0767e

Please sign in to comment.