Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
blablupcom committed Apr 18, 2017
1 parent 50ff582 commit e6613ba
Showing 1 changed file with 19 additions and 6 deletions.
25 changes: 19 additions & 6 deletions scraper.py
Expand Up @@ -84,7 +84,7 @@ def convert_mth_strings ( mth_string ):
#### VARIABLES 1.0 #### VARIABLES 1.0


entity_id = "E3902_WUA_gov" entity_id = "E3902_WUA_gov"
url = "http://www.wiltshire.gov.uk/council/howthecouncilworks/budgetsandspending/paymentssalariesandexpenses.htm" url = "http://www.wiltshire.gov.uk/paymentssalariesandexpenses/councilpayments.htmm"
errors = 0 errors = 0
data = [] data = []


Expand All @@ -97,16 +97,29 @@ def convert_mth_strings ( mth_string ):
#### SCRAPE DATA #### SCRAPE DATA


block = soup.find('div', attrs = {'id':'article'}) block = soup.find('div', attrs = {'id':'article'})
links_block = block.find('table', attrs = {'cellspacing': '0'}) links_block_1 = block.find('table', attrs = {'cellspacing': '0'})
links = links_block.find_all('a') links = links_block_1.find_all('a')[1:]
for link in links: for link in links:
if '.csv' in link['href']: if '.csv' in link['href']:
url = 'http://www.wiltshire.gov.uk/' + link['href'] url = 'http://www.wiltshire.gov.uk/' + link['href']
csvMth = url.split('-')[-2].strip() csvMth = url.split('-')[1].strip()
csvYr = url.split('-')[-3].strip() csvYr = url.split('-')[0].split('/')[-1].strip()
csvMth = convert_mth_strings(csvMth.upper()) csvMth = convert_mth_strings(csvMth.upper())
data.append([csvYr, csvMth, url]) data.append([csvYr, csvMth, url])


# print(block)
links_block = block.find_all('table', attrs = {'cellspacing': '0'})[1:]
for link_block in links_block:
links = link_block.find_all('a')[1:]
for link in links:
if '.csv' in link['href']:
url = 'http://www.wiltshire.gov.uk/' + link['href']
csvMth = url.split('-')[-2].strip()
csvYr = url.split('-')[-3].strip()
csvMth = convert_mth_strings(csvMth.upper())
data.append([csvYr, csvMth, url])




#### STORE DATA 1.0 #### STORE DATA 1.0


Expand All @@ -128,4 +141,4 @@ def convert_mth_strings ( mth_string ):
raise Exception("%d errors occurred during scrape." % errors) raise Exception("%d errors occurred during scrape." % errors)




#### EOF #### EOF

0 comments on commit e6613ba

Please sign in to comment.