In [13]:
# Setup imports and display options
import sys
from pathlib import Path
import pandas as pd

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.api_client import ABSClient
from src.explorer import CensusExplorer

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 160)

client = ABSClient()
explorer = CensusExplorer(client=client)
print('Clients ready')

Clients ready


In [14]:
# Find G21 dataflows and pick an SA2 variant
g21 = explorer.search_dataflows('G21')
display(g21[['id', 'name']].head(30))
sa2_rows = g21[g21['id'].str.contains('SA2', na=False)]
if sa2_rows.empty:
    raise RuntimeError('No SA2 G21 dataflow found in search results')
dataflow_id = sa2_rows.iloc[0]['id']
print('Using dataflow:', dataflow_id)

Unnamed: 0,id,name
654,C21_G21_CED,"Census 2021, G21 Type of long-term health cond..."
655,C21_G21_LGA,"Census 2021, G21 Type of long-term health cond..."
656,C21_G21_POA,"Census 2021, G21 Type of long-term health cond..."
657,C21_G21_RA,"Census 2021, G21 Type of long-term health cond..."
658,C21_G21_SA2,"Census 2021, G21 Type of long-term health cond..."
659,C21_G21_SAL,"Census 2021, G21 Type of long-term health cond..."
660,C21_G21_SED,"Census 2021, G21 Type of long-term health cond..."
661,C21_G21_SUA,"Census 2021, G21 Type of long-term health cond..."
662,C21_G21_UCL,"Census 2021, G21 Type of long-term health cond..."


Using dataflow: C21_G21_SA2


In [16]:
# Inspect structure: dimensions and small codelists

details = explorer.get_dataflow_details(dataflow_id)

print('Dimensions:')

display(details['dimensions'])



print('\nCodelists (<=50 codes):')

for cid, df in details['codelists'].items():

    if len(df) <= 50:

        print(f"\n{cid} ({len(df)} codes)")

        display(df)


Dimensions:


Unnamed: 0,id,position,codelist,concept
0,PCHAR,0,urn:sdmx:org.sdmx.infomodel.codelist.Codelist=...,urn:sdmx:org.sdmx.infomodel.conceptscheme.Conc...
1,LTHP,1,urn:sdmx:org.sdmx.infomodel.codelist.Codelist=...,urn:sdmx:org.sdmx.infomodel.conceptscheme.Conc...
2,REGION,2,urn:sdmx:org.sdmx.infomodel.codelist.Codelist=...,urn:sdmx:org.sdmx.infomodel.conceptscheme.Conc...
3,REGION_TYPE,3,urn:sdmx:org.sdmx.infomodel.codelist.Codelist=...,urn:sdmx:org.sdmx.infomodel.conceptscheme.Conc...
4,STATE,4,urn:sdmx:org.sdmx.infomodel.codelist.Codelist=...,urn:sdmx:org.sdmx.infomodel.conceptscheme.Conc...



Codelists (<=50 codes):

CL_C21_LTHP01 (14 codes)


Unnamed: 0,code,name
0,_T,Total (Persons)
1,11,Arthritis
2,21,Asthma
3,31,Cancer (including remission)
4,41,Dementia (including Alzheimer's)
5,51,Diabetes (excluding gestational diabetes)
6,61,Heart disease (including heart attack or angina)
7,71,Kidney disease
8,81,Lung condition (including COPD or emphysema)
9,91,Mental health condition (including depression ...



CL_C21_PCHAR02 (38 codes)


Unnamed: 0,code,name
0,CB_T,Country of birth: Total
1,CB_11,Country of birth: Australia
2,CB_S_T,Country of birth: Total born overseas
3,CB_1_O,Country of birth: Other Oceania and Antarctica
4,CB_21,"Country of birth: United Kingdom, Channel Isla..."
5,CB_2_O,Country of birth: Other North-West Europe
6,CB_3,Country of birth: Southern and Eastern Europe
7,CB_4,Country of birth: North Africa and the Middle ...
8,CB_5,Country of birth: South-East Asia
9,CB_6,Country of birth: North-East Asia



CL_REGION_TYPE (43 codes)


Unnamed: 0,code,name
0,AUS,Australia
1,STE,States and Territories
2,SA4,Statistical Area Level 4
3,SA3,Statistical Area Level 3
4,SA2,Statistical Area Level 2
5,SA1,Statistical Area Level 1
6,RA,Remoteness Area
7,SOS,Section of State
8,SOSR,Section of State Ranges
9,UC,Urban Centres



CL_STATE (10 codes)


Unnamed: 0,code,name
0,AUS,Australia
1,1,New South Wales
2,2,Victoria
3,3,Queensland
4,4,South Australia
5,5,Western Australia
6,6,Tasmania
7,7,Northern Territory
8,8,Australian Capital Territory
9,9,Other Territories


In [17]:
# Fetch sample data with labels to understand columns and values

sample = client.get_data(

    dataflow_id=dataflow_id,

    data_key='all',

    start_period='2021',

    end_period='2021',

    response_format='csv_labels',

)

display(sample.head(10))

print('\nColumns:', list(sample.columns))

print('Shape:', sample.shape)


Unnamed: 0,DATAFLOW,PCHAR: Selected person characteristic,LTHP: Type of long-term health condition,REGION: Region,REGION_TYPE: Region Type,STATE: State,TIME_PERIOD: Time Period,OBS_VALUE
0,ABS:C21_G21_SA2(1.0.0),CB_T: Country of birth: Total,21: Asthma,20201: Bendigo,SA3: Statistical Area Level 3,2: Victoria,2021,11043
1,ABS:C21_G21_SA2(1.0.0),CB_7: Country of birth: Southern and Central Asia,61: Heart disease (including heart attack or a...,31102: Beenleigh,SA3: Statistical Area Level 3,3: Queensland,2021,13
2,ABS:C21_G21_SA2(1.0.0),CB_N: Country of birth: Country of birth not s...,81: Lung condition (including COPD or emphysema),31106: Springwood - Kingston,SA3: Statistical Area Level 3,3: Queensland,2021,29
3,ABS:C21_G21_SA2(1.0.0),CB_3: Country of birth: Southern and Eastern E...,101: Stroke,31603: Maroochy,SA3: Statistical Area Level 3,3: Queensland,2021,24
4,ABS:C21_G21_SA2(1.0.0),CB_9: Country of birth: Sub-Saharan Africa,81: Lung condition (including COPD or emphysema),50702: Fremantle,SA3: Statistical Area Level 3,5: Western Australia,2021,10
5,ABS:C21_G21_SA2(1.0.0),CB_8: Country of birth: Americas,31: Cancer (including remission),90103: Jervis Bay,SA3: Statistical Area Level 3,9: Other Territories,2021,0
6,ABS:C21_G21_SA2(1.0.0),CB_6: Country of birth: North-East Asia,_N: Not stated,101061542: Yass Surrounds,SA2: Statistical Area Level 2,1: New South Wales,2021,0
7,ABS:C21_G21_SA2(1.0.0),CB_5: Country of birth: South-East Asia,121: No long-term health condition(s),103031072: Mudgee,SA2: Statistical Area Level 2,1: New South Wales,2021,105
8,ABS:C21_G21_SA2(1.0.0),CB_11: Country of birth: Australia,41: Dementia (including Alzheimer's),104021088: Korora - Emerald Beach,SA2: Statistical Area Level 2,1: New South Wales,2021,40
9,ABS:C21_G21_SA2(1.0.0),CB_6: Country of birth: North-East Asia,121: No long-term health condition(s),108011153: Forster-Tuncurry Surrounds,SA2: Statistical Area Level 2,1: New South Wales,2021,12



Columns: ['DATAFLOW', 'PCHAR: Selected person characteristic', 'LTHP: Type of long-term health condition', 'REGION: Region', 'REGION_TYPE: Region Type', 'STATE: State', 'TIME_PERIOD: Time Period', 'OBS_VALUE']
Shape: (1585892, 8)


In [18]:
# Quick uniques for key columns (trimmed for small codelists)

for col in sample.columns:

    nunique = sample[col].nunique()

    if nunique <= 20:

        print(f"\n{col} ({nunique} unique):")

        print(sample[col].unique())

    else:

        print(f"\n{col}: {nunique} unique values")



DATAFLOW (1 unique):
['ABS:C21_G21_SA2(1.0.0)']

PCHAR: Selected person characteristic: 38 unique values

LTHP: Type of long-term health condition (14 unique):
['21: Asthma' '61: Heart disease (including heart attack or angina)'
 '81: Lung condition (including COPD or emphysema)' '101: Stroke'
 '31: Cancer (including remission)' '_N: Not stated'
 '121: No long-term health condition(s)'
 "41: Dementia (including Alzheimer's)"
 '111: Any other long-term health condition(s)' '11: Arthritis'
 '71: Kidney disease' '51: Diabetes (excluding gestational diabetes)'
 '_T: Total (Persons)'
 '91: Mental health condition (including depression or anxiety)']

REGION: Region: 2981 unique values

REGION_TYPE: Region Type (6 unique):
['SA3: Statistical Area Level 3' 'SA2: Statistical Area Level 2'
 'SA4: Statistical Area Level 4'
 'GCCSA: Greater Capital City Statistical Areas'
 'STE: States and Territories' 'AUS: Australia']

STATE: State (10 unique):
['2: Victoria' '3: Queensland' '5: Western Austral