Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
blablupcom committed May 31, 2018
1 parent cefe65c commit 3aba1c2
Showing 1 changed file with 12 additions and 9 deletions.
21 changes: 12 additions & 9 deletions scraper.py
Expand Up @@ -85,26 +85,29 @@ def convert_mth_strings ( mth_string ):

#### VARIABLES 1.0

entity_id = "E0931_ABC_gov"
url = "https://www.allerdale.gov.uk/en/about-council/budget-and-spending/spending-council/"
entity_id = "E0531_CCC_gov"
url = "https://www.cambridge.gov.uk/payments-to-suppliers"
errors = 0
data = []


#### READ HTML 1.0

html = urllib2.urlopen(url)
soup = BeautifulSoup(html, "lxml")
soup = BeautifulSoup(html, 'lxml')

#### SCRAPE DATA

links = soup.find_all('a')
links = soup.find('h2', text=re.compile("Payments")).find_all_next('a', href=True)
for link in links:
file_name = link.text
if 'Spending' in file_name and '.csv' in link['href']:
url = link['href']
csvYr = file_name.replace('Spending ', '').strip()[-4:]
csvMth = file_name.replace('Spending ', '').strip()[:3]
if 'CSV' in link.text:
if 'http' not in link['href']:
url = 'https://www.cambridge.gov.uk' + link['href']
else:
url = link['href']
file_name = link.text
csvMth = file_name[:3]
csvYr = file_name.split()[1].replace('p', '')
csvMth = convert_mth_strings(csvMth.upper())
data.append([csvYr, csvMth, url])

Expand Down

0 comments on commit 3aba1c2

Please sign in to comment.