# ASGS GeoNames Pipeline

## Required Imports

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import pathlib
from urllib.request import urlretrieve
from zipfile import ZipFile
import pyogrio

## Required Files

### Create required directories

In [2]:
pathlib.Path('geonames/').mkdir(exist_ok=True)

pathlib.Path('asgs/').mkdir(exist_ok=True)

### GeoNames Australian Postcodes

In [3]:
gnames_au_postcodes = 'https://download.geonames.org/export/zip/AU.zip'
urlretrieve(gnames_au_postcodes,'geonames_au_postcodes.zip')

with ZipFile('geonames_au_postcodes.zip','r') as zip:
    zip.extract('AU.txt', path='geonames')
    
pathlib.Path(pathlib.Path.cwd() / 'geonames' / 'AU.txt').rename(pathlib.Path.cwd() / 'geonames' / 'au_postcodes.txt')

pathlib.Path(pathlib.Path.cwd() / 'geonames_au_postcodes.zip').unlink()

### ASGS Files

In [4]:
asgs_geopackages = {'asgs_main.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_2021_MAIN_STRUCTURE_GPKG_GDA2020.zip',
                    'asgs_indigenous.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_Ed3_2021_Indigenous_Structure_GDA2020_GPKG.zip',
                    'asgs_non_abs.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_Ed3_Non_ABS_Structures_GDA2020_updated_2024.zip',
                    'asgs_urban_section_state.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_2021_SUA_UCL_SOS_SOSR_GPKG_GDA2020.zip',
                    'asgs_remoteness.zip': 'https://www.abs.gov.au/statistics/standards/australian-statistical-geography-standard-asgs-edition-3/jul2021-jun2026/access-and-downloads/digital-boundary-files/ASGS_Ed3_2021_RA_GPKG_GDA2020.zip'
                   }

asgs_paths = []

for zip_name, link in asgs_geopackages.items():
    urlretrieve(link, zip_name)

    with ZipFile(zip_name,'r') as zip:
        for file in zip.namelist():
            if '.gpkg' in file:
                zip.extract(file, path='asgs')
                asgs_paths.append(pathlib.Path(pathlib.Path.cwd() / 'asgs' / file))

    pathlib.Path(pathlib.Path.cwd() / zip_name).unlink()

## Code

### GeoNames Postcodes

In [5]:
au_postcodes_cols = ['country_code','postal_code','place_name','admin_name1','admin_code1','admin_name2','admin_code2','admin_name3','admin_code3','latitude','longitude','accuracy']

au_postcodes_df = (pd.read_csv(pathlib.Path.cwd() / 'geonames' / 'au_postcodes.txt', delimiter='\t', header=None, names=au_postcodes_cols)
                   [['postal_code', 'place_name', 'latitude', 'longitude']])

au_postcodes_df

Unnamed: 0,postal_code,place_name,latitude,longitude
0,200,Australian National University,-35.2777,149.1189
1,221,Barton,-35.3049,149.1412
2,2540,Jervis Bay,-35.1499,150.6969
3,2540,Hmas Creswell,-35.0280,150.5501
4,2540,Wreck Bay,-35.1648,150.6908
...,...,...,...,...
16868,6989,Maddington,-32.0500,115.9833
16869,6990,Gosnells,-32.0810,116.0054
16870,6991,Kelmscott,-32.1243,116.0259
16871,6992,Armadale,-32.1461,116.0093


In [6]:
au_postcodes_gdf = (gpd.GeoDataFrame(au_postcodes_df, geometry=gpd.points_from_xy(au_postcodes_df['longitude'], au_postcodes_df['latitude']), crs="EPSG:7844")
                    [['postal_code', 'place_name', 'geometry']]
                   )

au_postcodes_gdf.to_file('asgs_geonames.gpkg', driver='GPKG', layer='au_postcodes', mode='w', index=False)

In [7]:
def asgs_geonames_merge(asgs_path: pathlib.PosixPath, au_postcodes_gdf: gpd.GeoDataFrame) -> None:
    for layer in pyogrio.list_layers(asgs_path):
        asgs_gdf = (gpd.read_file(asgs_path, layer=layer[0])
                    .query('not(geometry.isna())')
                   )
    
        asgs_gdf = (asgs_gdf
                    [[asgs_gdf.columns[0], asgs_gdf.columns[1], asgs_gdf.columns[-1]]]
                    .rename(columns={asgs_gdf.columns[0]:asgs_gdf.columns[0].lower(),
                                     asgs_gdf.columns[1]: asgs_gdf.columns[1].lower()
                                    }
                           )
                   )
    
        asgs_gdf['asgs_geometry'] = asgs_gdf.geometry
    
        initial_merge_gdf = (au_postcodes_gdf
                          .to_crs(asgs_gdf.crs)
                          .sjoin(asgs_gdf, how='left', lsuffix='geonames', rsuffix='asgs', predicate='within')
                         )
    
        initial_merge_success_gdf = initial_merge_gdf.query('not(index_asgs.isna())')
        
        initial_merge_nearest_gdf = (initial_merge_gdf
                                     .query('index_asgs.isna()')
                                     [au_postcodes_gdf.columns]
                                     .to_crs(asgs_gdf.crs)
                                     .sjoin_nearest(asgs_gdf, how='left', lsuffix='geonames', rsuffix='asgs')
                                    )

        full_merge_gdf = (pd.concat([initial_merge_success_gdf, initial_merge_nearest_gdf],ignore_index=True)
                          .drop(columns=['index_asgs', 'geometry'])
                          .rename(columns={'asgs_geometry':'geometry'})
                          [['postal_code',asgs_gdf.columns[0], asgs_gdf.columns[1], asgs_gdf.columns[2]]]
                         )

        del initial_merge_gdf, initial_merge_success_gdf, initial_merge_nearest_gdf, asgs_gdf
        
        full_merge_gdf.to_file('asgs_geonames.gpkg', driver='GPKG', layer=layer[0].lower(), index=False)
        print(f'Saved {layer[0]} to GeoPackage!') 

        del full_merge_gdf

    return None

In [None]:
for path in asgs_paths:
    asgs_geonames_merge(path, au_postcodes_gdf)
    pathlib.Path(path).unlink()

pathlib.Path(pathlib.Path.cwd() / 'asgs').rmdir()

pathlib.Path(pathlib.Path.cwd() / 'geonames' / 'au_postcodes.txt').unlink()

pathlib.Path(pathlib.Path.cwd() / 'geonames').rmdir()




Saved MB_2021_AUST_GDA2020 to GeoPackage!





Saved SA2_2021_AUST_GDA2020 to GeoPackage!





Saved SA1_2021_AUST_GDA2020 to GeoPackage!





Saved SA3_2021_AUST_GDA2020 to GeoPackage!





Saved SA4_2021_AUST_GDA2020 to GeoPackage!





Saved GCCSA_2021_AUST_GDA2020 to GeoPackage!





Saved STE_2021_AUST_GDA2020 to GeoPackage!





Saved AUS_2021_AUST_GDA2020 to GeoPackage!





Saved IARE_2021_AUST_GDA2020 to GeoPackage!





Saved ILOC_2021_AUST_GDA2020 to GeoPackage!





Saved IREG_2021_AUST_GDA2020 to GeoPackage!





Saved SAL_2021_AUST_GDA2020 to GeoPackage!





Saved ADD_2021_AUST_GDA2020 to GeoPackage!





Saved TR_2021_AUST_GDA2020 to GeoPackage!





Saved SED_2021_AUST_GDA2020 to GeoPackage!





Saved CED_2021_AUST_GDA2020 to GeoPackage!





Saved LGA_2021_AUST_GDA2020 to GeoPackage!





Saved SED_2022_AUST_GDA2020 to GeoPackage!





Saved DZN_2021_AUST_GDA2020 to GeoPackage!





Saved LGA_2022_AUST_GDA2020 to GeoPackage!





Saved POA_2021_AUST_GDA2020 to GeoPackage!





Saved LGA_2023_AUST_GDA2020 to GeoPackage!





Saved SED_2024_AUST_GDA2020 to GeoPackage!





Saved LGA_2024_AUST_GDA2020 to GeoPackage!





In [None]:
print('It worked!!! Somehow...')