### Scrape booth locations
This notebook goes through the AEC website and get the exact address of each polling booth. This is then saved in a two column excel file.

In [37]:
# Libraries
import requests
from bs4 import BeautifulSoup
import csv
from collections import defaultdict

In [40]:
# Get all the seats in a state
def get_seats_in_state():
    base_url = "https://results.aec.gov.au/27966/website/"
    start_url = base_url + "HouseDivisionMenu-27966-NSW.htm"

    response = requests.get(start_url)
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract division links
    division_links = [base_url + link["href"] for link in soup.find_all("a", href=True) if link["href"].startswith("HouseDivisionPage-27966")]
    return division_links

# Get polling booth names and addresses, skipping "Special Hospital Team" entries
def get_booth_name_location(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    # Locate correct table
    target_table = None
    for table in soup.find_all("table"):
        caption = table.find("caption")
        if caption and "Polling places for" in caption.text:
            target_table = table
            break

    # Initialize table_data to prevent errors
    table_data = []

    # Extract polling place data
    if target_table:
        for row in target_table.find_all("tr")[1:]:  # Skip header row
            columns = row.find_all("td")
            if len(columns) >= 2:
                place_name = columns[0].text.strip()
                address = columns[1].text.strip()
                table_data.append((place_name, address))

    return table_data  # Ensure a list is always returned

# Save results to CSV
def save_to_csv(table_data, output_file):
    with open(output_file, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["PollingPlace", "Address"])  # Header row
        writer.writerows(table_data)

    print(f"Data saved to {output_file}")

# duplicate place names are given a number on the end
def amend_place_names(table_data):
    # Dictionary to track occurrences of polling places
    seen_places = defaultdict(int)
    
    # Modify polling place names if duplicates exist
    updated_table_data = []
    for polling_place, address in table_data:
        seen_places[polling_place] += 1
        if seen_places[polling_place] > 1:
            polling_place = f"{polling_place} ({seen_places[polling_place]})"  # Append a counter
        updated_table_data.append((polling_place, address))
    
    # Update table_data with the modified entries
    table_data = updated_table_data
    
    return table_data

In [41]:
# Run the script
division_links = get_seats_in_state()
table_data = []

for link in division_links:
    new_data = get_booth_name_location(link)
    table_data.extend(new_data)  # Use extend instead of append
    
# Remove entries where the address contains "Multiple sites"
table_data = [entry for entry in table_data if "Multiple sites" not in entry[1]]
table_data = amend_place_names(table_data)
save_to_csv(table_data, "polling_places.csv")

Data saved to polling_places.csv


In [42]:
# Find all entries containing "Special Hospital 1"
matches = [entry for entry in table_data if "Team" in entry[0] or "Special Hospital 1" in entry[1]]

# Print results
if matches:
    for match in matches:
        print(match)
else:
    print("No entries found for 'Special Hospital 1'.")

('Special Hospital Team 1', 'Dalton Gardens, 678 Victoria Rd, RYDE NSW 2112')
('Special Hospital Team 1 (2)', 'Cherrybrook Christian Care Centre, 3 Kitchener Rd, CHERRYBROOK NSW 2126')
('Special Hospital Team 1 (3)', 'UPA Sydney North Aged Care, 1614-1634 Pacific Hwy, WAHROONGA NSW 2076')
('Special Hospital Team 1 (4)', 'Opal Coffs Harbour, 50 Lakes Dr, NORTH BOAMBEE VALLEY NSW 2450')
('Other Mobile Team 1', 'Griffith Aboriginal Medical Service, 38-42 Jondaryan Ave, GRIFFITH NSW 2680')
('Special Hospital Team 1 (5)', 'St Sergius Nursing Home, 1 Gilbert St, CABRAMATTA NSW 2166')
('Special Hospital Team 1 (6)', 'Carrington Nursing Home, 90 Werombi Rd, GRASMERE NSW 2570')
('Special Hospital Team 1 (7)', 'Wesley Gardens, 2B Morgan Rd, BELROSE NSW 2085')
('Special Hospital Team 1 (8)', 'Woodberry Village, 129 Lanhams Rd, WINSTON HILLS NSW 2153')
('Special Hospital Team 1 (9)', 'McLean Care, 67 Killean St, INVERELL NSW 2360')
('Special Hospital Team 1 (10)', 'Harbourside Haven Hostels and Nu

In [43]:
from collections import Counter

# Count occurrences of each polling place
polling_place_counts = Counter(entry[0] for entry in table_data)

# Print duplicates
duplicates = {place: count for place, count in polling_place_counts.items() if count > 1}

if duplicates:
    print("Duplicate Polling Places Found:")
    for place, count in duplicates.items():
        print(f"{place}: {count} times")
else:
    print("No duplicate polling places found.")

No duplicate polling places found.
