Skip to content

Commit

Permalink
updated url. previous url now leads to 404
Browse files Browse the repository at this point in the history
  • Loading branch information
dcalde committed Sep 19, 2021
1 parent fb52ccb commit 1081299
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion scraper.py
Expand Up @@ -21,7 +21,7 @@
jre_dir = jdk.install('11', jre=True, path='/tmp/.jre')
tabula.io._run = partial(tabula_custom._run, java_path=jre_dir + '/bin/java')

URL = "https://www.perth.wa.gov.au/develop/planning-and-building-applications/building-and-development-applications"
URL = "https://perth.wa.gov.au/en/building-and-planning/planning-and-building-applications/building-and-development-applications"
DATABASE = "data.sqlite"
DATA_TABLE = "data"
PROCESSED_FILES_TABLE = "files_processed"
Expand Down Expand Up @@ -61,6 +61,7 @@ def clean_description(description: str) -> str:
with Browser('chrome', headless=True, options=options) as browser:
browser.visit(URL)
links = browser.find_by_css(".list-item > a")
print(f"Found {len(links)} links")
for link in links:
title = link.html
pdf_url = link["href"]
Expand Down

0 comments on commit 1081299

Please sign in to comment.