# Example notebook: Australian census data - cloud

Example notebook to show how to download, process and display geopackaged Australian census data - cloud.

Australian census data downloaded from:
 - https://www.abs.gov.au/census/find-census-data/geopackages

Using the following settings:
 - 2021 census year
 - All state and territories
 - G01 tables (population counts)
 - 2020 GDA (geo data specifications)

## Imports Libraries & Set Globals

In [2]:
# Import libraries
import os
from azure.storage.blob import BlobServiceClient
import geopandas as gpd
import plotly.graph_objects as go
import plotly.io as pio

# Set globals
WORKING_DIRECTORY = \
    'C:/Users/zlatt/Documents/data/australian-census-data'

In [3]:
conn_str = os.getenv('CONN_STR')

In [3]:
# Define your connection string and container name
connection_string = conn_str
container_name = "australian-census-data"

# Create a BlobServiceClient object using the connection string
blob_service_client = BlobServiceClient.from_connection_string(connection_string)

# Get the container client object
container_client = blob_service_client.get_container_client(container_name)

# List all blobs in the container
blobs = container_client.list_blobs()

# Print the names of all blobs
print(f"Blobs in the container '{container_name}':")
for blob in blobs:
    print(blob.name)

Blobs in the container 'australian-census-data':
Geopackage_2021_G01_AUST_GDA2020/G01_AUST_GDA2020.gpkg
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/2021Census_geog_desc_1st_2nd_3rd_release.xlsx
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/2021_GCP_Sequential_Template_R2.xlsx
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/ADD_2021_AUST_GDA2020.xml
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/AUS_2021_AUST_GDA2020.xml
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/CED_2021_AUST_GDA2020.xml
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/DZN_2021_AUST_GDA2020.xml
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/GCCSA_2021_AUST_GDA2020.xml
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/IARE_2021_AUST_GDA2020.xml
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/ILOC_2021_AUST_GDA2020.xml
Geopackage_2021_G01_AUST_GDA2020/Release3_Metadata/GDA2020/IREG_2021_AUST_GDA2020.xml
Geopackage

# Set Variables & Load Data

In [2]:
# Set variables
census_year = '2021'
geo_type = 'AUST'
data_topic = 'G01'
gda_type = '2020'
map_type = 'SA4'

# Load data
geo_file_directory = f'Geopackage_{census_year}_{data_topic}_{geo_type}_GDA{gda_type}'
geo_file_name = f'{data_topic}_{geo_type}_GDA{gda_type}.gpkg'

file_path = f'{BASE_DIRECTORY}/{geo_file_directory}/{geo_file_name}'
geo_layer = f'{data_topic}_{map_type}_{census_year}_{geo_type}'

census_gdf = gpd.read_file(
    filename=file_path,
    layer=geo_layer
)

# Process data
columns = [
    #f'{map_type}_CODE_{census_year}',
    f'{map_type}_NAME_{census_year}',
    'Tot_P_P',
    #'AREA_ALBERS_SQKM',
    'geometry'
]
census_gdf = census_gdf[columns]

rename_columns = {
    #f'{map_type}_CODE_{census_year}': 'Code', 
    f'{map_type}_NAME_{census_year}': 'Name',
    'Tot_P_P': 'Population',
    #'AREA_ALBERS_SQKM': 'Area',
    'geometry': 'geometry'
}
census_gdf = census_gdf.rename(columns=rename_columns)

census_gdf = census_gdf[
    census_gdf['geometry'] != None
]
census_gdf = census_gdf.reset_index(drop=True)

column_types = {
    #'Code': 'str',
    'Name': 'str',
    'Population': 'int',
    #'Area': 'float',
    'geometry': 'geometry'
}
census_gdf = census_gdf.astype(column_types)

## Create Figure & Save Figure

In [3]:
# Create figure data
figure_df = census_gdf.copy()
figure_df = figure_df.set_index('Name')
figure_geojson = figure_df.__geo_interface__

# Create figure
figure = go.Figure()

# Create and add choroplethmap trace
trace = go.Choroplethmapbox(
    name='Census Data',
    geojson=figure_geojson,
    locations=figure_df.index,
    z=figure_df['Population'],
    marker_opacity=0.5,
    hovertemplate= \
        '<b>Location</b>: %{location}<br>'+\
        '<b>Population</b>: %{z:.2s}<extra></extra>'
)
figure = figure.add_trace(trace)

# Create and add map layout
layout = go.Layout(
    mapbox=dict(
        style="carto-positron",
        center=dict(
            lat=-25,
            lon=130
        ),
        zoom=2,
        bounds=dict(
            west=85,
            east=185,
            north=0,
            south=-50
        )
    ),
    autosize=True,
    margin=dict(
        l=0,
        r=0,
        t=0,
        b=0
    ),
    height=650,
    width=1300
)
figure = figure.update_layout(layout)

In [None]:
# Write figure
file_name = f"{data_topic}_{map_type}_{census_year}_{geo_type}_GDA{gda_type}"
pio.write_html(
    figure,
    f"{BASE_DIRECTORY}/figures/{file_name}.html"
)
pio.write_json(
    figure,
    f"{BASE_DIRECTORY}/figures/{file_name}.json",
)

## Load Figure & Display Figure

In [None]:
# Load figure
file_name = f"{data_topic}_{map_type}_{census_year}_{geo_type}_GDA{gda_type}"
figure = pio.read_json(
    f"{BASE_DIRECTORY}/figures/{file_name}.json",
)

# Show figure
figure.show()