In [9]:
import requests
import os
import pandas as pd
from urllib.request import urlretrieve
from urllib.error import HTTPError, URLError

In [11]:
# Ensure the folders are set up - from data download notebook
def create_data_folder(output_dir):
    """
    Create folders for each stage of the ETL pipeline
    :param output_dir: The base directory where the folders will be created
    """
    # set output directory
    import os
    
    # check if data directory exists, if not create it
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # create folders for each stage of the ETL pipeline
    for stage in ['landing', 'raw', 'curated', 'analysis']:
        stage_path = os.path.join(output_dir, stage)
        if not os.path.exists(stage_path):
            os.makedirs(stage_path) 


In [13]:
# puts the landing, raw, curated into data
create_data_folder('../data')

In [15]:
# create the directory for the landing demographic data if it doesn't exist
population_by_suburb_directory = '../data/landing/population_by_suburb'
os.makedirs(population_by_suburb_directory, exist_ok=True)


In [17]:
# open the vic_suburbs_postcodes
postcodes_df = pd.read_csv("../data/geo/vic_suburbs_postcodes.csv")


In [None]:
# URL template for moving annual rent by suburb
for postcode in postcodes_df["postcode"]:
    AREA_CODE = f"POA{postcode}"
    URL_TEMPLATE = f"https://abs.gov.au/census/find-census-data/community-profiles/2021/{AREA_CODE}/download/GCP_{AREA_CODE}.xlsx"
    
    # generate output file path
    output_file_path = f"{population_by_suburb_directory}/{AREA_CODE}_population.xlsx"
    
    # check if output file already exists
    if not os.path.exists(output_file_path):
        # download postcode data with exception handling
        try:
            urlretrieve(URL_TEMPLATE, output_file_path)
            print(f"✅ File downloaded and saved to {output_file_path}")
            
        except Exception as e:
            print(f"❌ Unexpected error for postcode {postcode}: {e}")

    else:
        print(f"File already exists at {output_file_path}")