Permalink
Browse files

Update scraper.py

  • Loading branch information...
blablupcom committed Apr 18, 2017
1 parent 50ff582 commit e6613ba7de0b3c4d51347eaee1e3e3c29c8301ff
Showing with 19 additions and 6 deletions.
  1. +19 −6 scraper.py
View
@@ -84,7 +84,7 @@ def convert_mth_strings ( mth_string ):
#### VARIABLES 1.0
entity_id = "E3902_WUA_gov"
url = "http://www.wiltshire.gov.uk/council/howthecouncilworks/budgetsandspending/paymentssalariesandexpenses.htm"
url = "http://www.wiltshire.gov.uk/paymentssalariesandexpenses/councilpayments.htmm"
errors = 0
data = []
@@ -97,16 +97,29 @@ def convert_mth_strings ( mth_string ):
#### SCRAPE DATA
block = soup.find('div', attrs = {'id':'article'})
links_block = block.find('table', attrs = {'cellspacing': '0'})
links = links_block.find_all('a')
links_block_1 = block.find('table', attrs = {'cellspacing': '0'})
links = links_block_1.find_all('a')[1:]
for link in links:
if '.csv' in link['href']:
url = 'http://www.wiltshire.gov.uk/' + link['href']
csvMth = url.split('-')[-2].strip()
csvYr = url.split('-')[-3].strip()
csvMth = url.split('-')[1].strip()
csvYr = url.split('-')[0].split('/')[-1].strip()
csvMth = convert_mth_strings(csvMth.upper())
data.append([csvYr, csvMth, url])
# print(block)
links_block = block.find_all('table', attrs = {'cellspacing': '0'})[1:]
for link_block in links_block:
links = link_block.find_all('a')[1:]
for link in links:
if '.csv' in link['href']:
url = 'http://www.wiltshire.gov.uk/' + link['href']
csvMth = url.split('-')[-2].strip()
csvYr = url.split('-')[-3].strip()
csvMth = convert_mth_strings(csvMth.upper())
data.append([csvYr, csvMth, url])
#### STORE DATA 1.0
@@ -128,4 +141,4 @@ def convert_mth_strings ( mth_string ):
raise Exception("%d errors occurred during scrape." % errors)
#### EOF
#### EOF

0 comments on commit e6613ba

Please sign in to comment.