diff --git a/scraper.py b/scraper.py index 0f1d5de..27adb72 100644 --- a/scraper.py +++ b/scraper.py @@ -84,7 +84,7 @@ def convert_mth_strings ( mth_string ): #### VARIABLES 1.0 entity_id = "E3902_WUA_gov" -url = "http://www.wiltshire.gov.uk/council/howthecouncilworks/budgetsandspending/paymentssalariesandexpenses.htm" +url = "http://www.wiltshire.gov.uk/paymentssalariesandexpenses/councilpayments.htmm" errors = 0 data = [] @@ -97,16 +97,29 @@ def convert_mth_strings ( mth_string ): #### SCRAPE DATA block = soup.find('div', attrs = {'id':'article'}) -links_block = block.find('table', attrs = {'cellspacing': '0'}) -links = links_block.find_all('a') +links_block_1 = block.find('table', attrs = {'cellspacing': '0'}) +links = links_block_1.find_all('a')[1:] for link in links: if '.csv' in link['href']: url = 'http://www.wiltshire.gov.uk/' + link['href'] - csvMth = url.split('-')[-2].strip() - csvYr = url.split('-')[-3].strip() + csvMth = url.split('-')[1].strip() + csvYr = url.split('-')[0].split('/')[-1].strip() csvMth = convert_mth_strings(csvMth.upper()) data.append([csvYr, csvMth, url]) +# print(block) +links_block = block.find_all('table', attrs = {'cellspacing': '0'})[1:] +for link_block in links_block: + links = link_block.find_all('a')[1:] + for link in links: + if '.csv' in link['href']: + url = 'http://www.wiltshire.gov.uk/' + link['href'] + csvMth = url.split('-')[-2].strip() + csvYr = url.split('-')[-3].strip() + csvMth = convert_mth_strings(csvMth.upper()) + data.append([csvYr, csvMth, url]) + + #### STORE DATA 1.0 @@ -128,4 +141,4 @@ def convert_mth_strings ( mth_string ): raise Exception("%d errors occurred during scrape." % errors) -#### EOF \ No newline at end of file +#### EOF