From 4d8930f57272f3811263bba94b6cf2a8225bca49 Mon Sep 17 00:00:00 2001 From: blablupcom Date: Mon, 12 Mar 2018 23:37:20 +0300 Subject: [PATCH] Update scraper.py --- scraper.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scraper.py b/scraper.py index eec1dde..0d058ef 100644 --- a/scraper.py +++ b/scraper.py @@ -96,7 +96,7 @@ def convert_mth_strings ( mth_string ): #### SCRAPE DATA -blocks = soup.find('article', 'post-380 page type-page status-publish hentry').find_all('a') +blocks = soup.find('h2', text="Spend over £25,000").find_next('ul').find_all('a') for block in blocks: if '.csv' in block['href'] or '.xls' in block['href'] or '.xlsx' in block['href'] or '.pdf' in block['href']: if 'http' not in block['href']: @@ -108,6 +108,9 @@ def convert_mth_strings ( mth_string ): csvYr = title[-1][-4:] if 'l to' in block.text: csvMth = 'Q0' + if 'November 2012 to January 2017' in block.text: + csvMth = 'Q0' + csvYr = '2017' csvMth = convert_mth_strings(csvMth.upper()) data.append([csvYr, csvMth, link])