# ASGS GeoNames Pipeline

## Required Imports

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import pathlib
from urllib.request import urlretrieve
from zipfile import ZipFile
import pyogrio

## Required Files

### Create required directories

In [2]:
geonames_path = pathlib.Path('geonames/')
geonames_path.mkdir(exist_ok=True)

asgs_path = pathlib.Path('asgs/')
asgs_path.mkdir(exist_ok=True)

outputs_path = pathlib.Path('outputs/')
outputs_path.mkdir(exist_ok=True)

### Get GeoNames Australian Postcodes & Placenames Files

In [3]:
gnames_au_postcodes = 'https://download.geonames.org/export/zip/AU.zip'
urlretrieve(gnames_au_postcodes,'geonames_au_postcodes.zip')
with ZipFile('geonames_au_postcodes.zip','r') as zip:
    zip.extract('AU.txt', path='geonames')
    
pathlib.Path(geonames_path / 'AU.txt').rename(geonames_path / 'au_postcodes.txt')
pathlib.Path('geonames_au_postcodes.zip').unlink()

gnames_au_placenames = 'https://download.geonames.org/export/dump/AU.zip'
urlretrieve(gnames_au_placenames, 'geonames_au_placenames.zip')
with ZipFile('geonames_au_placenames.zip','r') as zip:
    zip.extract('AU.txt', path='geonames')

pathlib.Path(geonames_path / 'AU.txt').rename(geonames_path / 'au_placenames.txt')
pathlib.Path('geonames_au_placenames.zip').unlink()

### Get ASGS Files

In [4]:
asgs_geopackages = {'asgs_main.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_2021_MAIN_STRUCTURE_GPKG_GDA2020.zip',
                    'asgs_indigenous.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_Ed3_2021_Indigenous_Structure_GDA2020_GPKG.zip',
                    'asgs_non_abs.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_Ed3_Non_ABS_Structures_GDA2020_updated_2024.zip',
                    'asgs_urban_section_state.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_2021_SUA_UCL_SOS_SOSR_GPKG_GDA2020.zip',
                    'asgs_remoteness.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_Ed3_2021_RA_GPKG_GDA2020.zip'
                   }

asgs_paths = []

for zip_name, link in asgs_geopackages.items():
    urlretrieve(link, zip_name)

    with ZipFile(zip_name,'r') as zip:
        for file in zip.namelist():
            if '.gpkg' in file:
                zip.extract(file, path='asgs')
                asgs_paths.append(pathlib.Path(asgs_path / file))

    pathlib.Path(zip_name).unlink()

## Create GeoNames Australian Postcodes & Placenames Shapefiles

In [5]:
au_postcodes_cols = ['country_code','postal_code','place_name','admin_name1','admin_code1','admin_name2','admin_code2','admin_name3','admin_code3','latitude','longitude','accuracy']

au_postcodes_df = (pd.read_csv(geonames_path / 'au_postcodes.txt', delimiter='\t', header=None, names=au_postcodes_cols)
                   [['postal_code', 'place_name', 'latitude', 'longitude']]
                   .drop_duplicates(ignore_index=True)
                  )

au_postcodes_df

Unnamed: 0,postal_code,place_name,latitude,longitude
0,200,Australian National University,-35.2777,149.1189
1,221,Barton,-35.3049,149.1412
2,2540,Jervis Bay,-35.1499,150.6969
3,2540,Hmas Creswell,-35.0280,150.5501
4,2540,Wreck Bay,-35.1648,150.6908
...,...,...,...,...
16867,6989,Maddington,-32.0500,115.9833
16868,6990,Gosnells,-32.0810,116.0054
16869,6991,Kelmscott,-32.1243,116.0259
16870,6992,Armadale,-32.1461,116.0093


In [6]:
au_postcodes_gdf = (gpd.GeoDataFrame(au_postcodes_df, geometry=gpd.points_from_xy(au_postcodes_df['longitude'], au_postcodes_df['latitude']), crs="EPSG:7844")
                    [['postal_code', 'place_name', 'geometry']]
                    .drop_duplicates(ignore_index=True)
                   )

au_postcodes_path = pathlib.Path(outputs_path / 'au_postcodes')

au_postcodes_path.mkdir(exist_ok=True)

au_postcodes_gdf.to_file(au_postcodes_path / 'au_postcodes.shp')

del au_postcodes_gdf

with ZipFile(outputs_path / 'au_postcodes.zip', mode='w') as archive:
    for file in au_postcodes_path.iterdir():
        archive.write(file, arcname=file.name)
        file.unlink()

au_postcodes_path.rmdir()

del au_postcodes_path

pathlib.Path(geonames_path / 'au_postcodes.txt').unlink()

  au_postcodes_gdf.to_file(au_postcodes_path / 'au_postcodes.shp')


In [7]:
au_placenames_cols = ['geonameid','name','asciiname','alternatenames','latitude','longitude','feature_class','feature_code','country_code','cc2','admin1_code','admin2_code','admin3_code','admin4_code','population','elevation','dem','timezone','modification_date']

au_placenames_df = (pd.read_csv(pathlib.Path.cwd() / 'geonames' / 'au_placenames.txt', delimiter='\t', header=None, names=au_placenames_cols)
                    [['geonameid', 'name', 'latitude', 'longitude', 'feature_class', 'feature_code']]
                    .drop_duplicates(ignore_index=True)
                   )

au_placenames_df

  au_placenames_df = (pd.read_csv(pathlib.Path.cwd() / 'geonames' / 'au_placenames.txt', delimiter='\t', header=None, names=au_placenames_cols)


Unnamed: 0,geonameid,name,latitude,longitude,feature_class,feature_code
0,1818181,Flinders Shoal,-9.88333,129.28333,H,SHOL
1,2057175,Zuytdorp Point,-26.40321,113.29991,T,PT
2,2057176,Zuytdorp Cliffs,-27.10165,113.88189,T,CLF
3,2057177,Mount Zion,-33.19381,138.20853,T,MT
4,2057178,Mount Zephyr,-28.38701,121.79360,T,MT
...,...,...,...,...,...,...
214172,12952964,Charles Allen Reserve,-37.79191,145.30293,L,PRK
214173,13060436,Davis Hill,-27.78833,153.21444,T,MT
214174,13061027,Centre of the Universe,-33.47621,150.28941,T,HLL
214175,13100187,Flat Top,-33.65607,150.36924,T,MT


In [8]:
au_placenames_gdf = (gpd.GeoDataFrame(au_placenames_df, geometry=gpd.points_from_xy(au_placenames_df['longitude'], au_placenames_df['latitude']), crs="EPSG:7844")
                    [['geonameid', 'name', 'feature_class', 'feature_code', 'geometry']]
                    .drop_duplicates(ignore_index=True)
                   )

au_placenames_path = pathlib.Path(outputs_path / 'au_placenames')

au_placenames_path.mkdir(exist_ok=True)

au_placenames_gdf.to_file(au_placenames_path / 'au_placenames.shp')

del au_placenames_gdf

with ZipFile(outputs_path / 'au_placenames.zip', mode='w') as archive:
    for file in au_placenames_path.iterdir():
        archive.write(file, arcname=file.name)
        file.unlink()

au_placenames_path.rmdir()

del au_placenames_path

pathlib.Path(geonames_path / 'au_placenames.txt').unlink()

  au_placenames_gdf.to_file(au_placenames_path / 'au_placenames.shp')


## Create ASGS Shapefiles & Final Cleanup

In [9]:
def create_asgs_shapefile(path: pathlib.PosixPath) -> None:
    for layer in pyogrio.list_layers(path):
        asgs_gdf = (gpd.read_file(path, layer=layer[0])
                    .query('not(geometry.isna())')
                    .drop_duplicates(ignore_index=True)
                   )
    
        asgs_gdf = (asgs_gdf
                    [[asgs_gdf.columns[0], asgs_gdf.columns[1], 'geometry']]
                    .rename(columns={asgs_gdf.columns[0]:asgs_gdf.columns[0].lower(),
                                     asgs_gdf.columns[1]: asgs_gdf.columns[1].lower()
                                    }
                           )
                    .drop_duplicates(ignore_index=True)
                   )
        
        asgs_path = pathlib.Path(outputs_path / f'{layer[0].lower()}')
        
        asgs_path.mkdir(exist_ok=True)
        
        asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')
        
        del asgs_gdf
        
        with ZipFile(outputs_path / f'{layer[0].lower()}.zip', mode='w') as archive:
            for file in asgs_path.iterdir():
                archive.write(file, arcname=file.name)
                file.unlink()
        
        asgs_path.rmdir()
        
        del asgs_path
        
        print(f'Saved {layer[0]} to Shapefile!')

    return None

In [10]:
for path in asgs_paths:
    create_asgs_shapefile(path)
    pathlib.Path(path).unlink()

pathlib.Path(geonames_path).rmdir()
pathlib.Path(asgs_path).rmdir()
print("Finished!")

  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved MB_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SA2_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SA1_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SA3_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SA4_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved GCCSA_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved STE_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved AUS_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved IARE_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved ILOC_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved IREG_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SAL_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved ADD_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved TR_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SED_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved CED_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved LGA_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SED_2022_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved DZN_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved LGA_2022_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved POA_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved LGA_2023_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SED_2024_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved LGA_2024_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SOS_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SOSR_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved UCL_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved SUA_2021_AUST_GDA2020 to Shapefile!


  asgs_gdf.to_file(asgs_path / f'{layer[0].lower()}.shp')


Saved RA_2021_AUST_GDA2020 to Shapefile!
Finished!
