# Example notebook: Australian census data

Example notebook to show how to download, process and display geopackaged Australian census data.

Australian census data downloaded from:
 - https://www.abs.gov.au/census/find-census-data/geopackages

Using the following settings:
 - 2021 census year
 - All state and territories
 - G01 tables (population counts)
 - 2020 GDA (geo data specifications)

## Imports Libraries & Set Globals

In [1]:
# Import libraries
import os
import package.support as support

# Set globals
WORK_DIR = os.getenv('WORK_DIR')
CONN_STR = os.getenv('CONN_STR')

# temp
WORK_DIR = 'C:/Users/zlatt/Documents/data/australian-census-data'

# Set Variables & Load Data

In [3]:
# Set variables
data_config = {
    'data_year': 2021,
    'data_topic': 'G01',
    'geo_area': 'AUST',
    'gda_spec': 'GDA2020',
    'gda_type': 'SA4'
}

gdf_column = {
    'name': 'Tot_P_P',
    'rename': 'Population',
    'type': 'int'
}

# Load data
raw_gdf = support.load_raw(
    WORK_DIR,
    data_config,
    False,
    False
)

pro_gdf = support.process_data(
    raw_gdf,
    gdf_column,
    data_config['data_year'],
    data_config['gda_type']
)

In [4]:
pro_gdf

Unnamed: 0,Location,Population,geometry
0,Capital Region,238810,"MULTIPOLYGON (((150.05261 -37.26253, 150.05251..."
1,Central Coast,346596,"MULTIPOLYGON (((151.31497 -33.55578, 151.31496..."
2,Central West,212962,"MULTIPOLYGON (((150.14236 -32.34153, 150.14255..."
3,Coffs Harbour - Grafton,146127,"MULTIPOLYGON (((153.07639 -30.42982, 153.07645..."
4,Far West and Orana,115566,"MULTIPOLYGON (((148.67619 -29.50976, 148.67662..."
...,...,...,...
84,West and North West,116156,"MULTIPOLYGON (((144.60439 -41.01001, 144.60443..."
85,Darwin,139902,"MULTIPOLYGON (((130.99838 -12.16118, 130.99608..."
86,Northern Territory - Outback,89103,"MULTIPOLYGON (((132.78392 -11.31694, 132.78315..."
87,Australian Capital Territory,453890,"MULTIPOLYGON (((149.06239 -35.1591, 149.09134 ..."


In [4]:
# Set file name
filename = \
    f"{data_topic}_{gda_type}_{data_year}_{geo_area}_{gda_spec}"

pro_gdf.to_parquet(
    f"{WORK_DIR}/pro/{filename}.parquet"
)

In [6]:
import geopandas as gpd

df = gpd.read_parquet(
    f"{WORK_DIR}/pro/{filename}.parquet"
)

df

Unnamed: 0,Location,Population,geometry
0,Capital Region,238810,"MULTIPOLYGON (((150.05261 -37.26253, 150.05251..."
1,Central Coast,346596,"MULTIPOLYGON (((151.31497 -33.55578, 151.31496..."
2,Central West,212962,"MULTIPOLYGON (((150.14236 -32.34153, 150.14255..."
3,Coffs Harbour - Grafton,146127,"MULTIPOLYGON (((153.07639 -30.42982, 153.07645..."
4,Far West and Orana,115566,"MULTIPOLYGON (((148.67619 -29.50976, 148.67662..."
...,...,...,...
84,West and North West,116156,"MULTIPOLYGON (((144.60439 -41.01001, 144.60443..."
85,Darwin,139902,"MULTIPOLYGON (((130.99838 -12.16118, 130.99608..."
86,Northern Territory - Outback,89103,"MULTIPOLYGON (((132.78392 -11.31694, 132.78315..."
87,Australian Capital Territory,453890,"MULTIPOLYGON (((149.06239 -35.1591, 149.09134 ..."


## Create Figure & Save Figure

In [5]:
# Create figure
figure = support.create_figure(
    pro_gdf,
    gdf_column
)

# Save figure
support.save_figure(
    WORK_DIR,
    figure,
    'json',
    data_year,
    data_topic,
    geo_area,
    gda_spec,
    gda_type
)

support.save_figure(
    WORK_DIR,
    figure,
    'html',
    data_year,
    data_topic,
    geo_area,
    gda_spec,
    gda_type
)

## Load Figure & Display Figure

In [6]:
# Load figure
figure = support.read_figure(
    WORK_DIR,
    data_year,
    data_topic,
    geo_area,
    gda_spec,
    gda_type
)

# Display figure
#figure.show()

In [3]:
from azure.storage.blob import BlobServiceClient
import os

def download_folder_from_blob(container_name, folder_name, download_path, connection_string):
    """
    Downloads all blobs from a specified folder in an Azure Blob Storage container.

    :param container_name: Name of the Azure Blob Storage container.
    :param folder_name: Name of the folder (virtual directory) within the container.
    :param download_path: Local directory where the blobs should be downloaded.
    :param connection_string: Azure Blob Storage connection string.
    """

    # Ensure download path exists
    if not os.path.exists(download_path):
        os.makedirs(download_path)

    # Create the BlobServiceClient object
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)

    # Get the container client
    container_client = blob_service_client.get_container_client(container_name)

    # List all blobs in the specified folder
    blobs = container_client.list_blobs(name_starts_with=folder_name)

    for blob in blobs:
        # Create the full local path where the blob will be downloaded
        blob_name = blob.name
        file_path = os.path.join(download_path, os.path.relpath(blob_name, folder_name))

        # Ensure the local directory exists
        os.makedirs(os.path.dirname(file_path), exist_ok=True)

        print(f"Downloading {blob_name} to {file_path}...")

        # Download the blob
        with open(file_path, "wb") as file:
            blob_client = container_client.get_blob_client(blob_name)
            file.write(blob_client.download_blob().readall())

    print("Download complete!")

container_name = "australian-census-data"
folder_name = "raw"
download_path = "C:/Users/zlatt/Documents/data/australian-census-data/download"
download_folder_from_blob(container_name, folder_name, download_path, CONN_STR)

Downloading raw/Geopackage_2021_G01_AUST_GDA2020/G01_AUST_GDA2020.gpkg to C:/Users/zlatt/Documents/data/australian-census-data/download\Geopackage_2021_G01_AUST_GDA2020\G01_AUST_GDA2020.gpkg...
Downloading raw/Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/2021Census_geog_desc_1st_2nd_3rd_release.xlsx to C:/Users/zlatt/Documents/data/australian-census-data/download\Geopackage_2021_G01_AUST_GDA2020\Release3_Metadata\GDA2020\2021Census_geog_desc_1st_2nd_3rd_release.xlsx...
Downloading raw/Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/2021_GCP_Sequential_Template_R2.xlsx to C:/Users/zlatt/Documents/data/australian-census-data/download\Geopackage_2021_G01_AUST_GDA2020\Release3_Metadata\GDA2020\2021_GCP_Sequential_Template_R2.xlsx...
Downloading raw/Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/ADD_2021_AUST_GDA2020.xml to C:/Users/zlatt/Documents/data/australian-census-data/download\Geopackage_2021_G01_AUST_GDA2020\Release3_Metadata\GDA2020\ADD_2021_AU