In [234]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, MultiPolygon
from geoalchemy2 import Geometry, WKTElement
import matplotlib.pyplot as plt

from sqlalchemy import create_engine
import psycopg2
import psycopg2.extras
import json

In [235]:
# Import all necessary datasets for the report + initial filtering
sa2_bounds_raw = gpd.read_file("space_data/SA2.shp")
businesses_raw = pd.read_csv("other_data/Businesses.csv")
stops_raw = pd.read_csv("other_data/Stops.txt")
polls_raw = pd.read_csv("other_data/PollingPlaces2019.csv")
schools_prima_raw = gpd.read_file("space_data/catchments/catchments_primary.shp")
schools_secon_raw = gpd.read_file("space_data/catchments/catchments_secondary.shp")
schools_futur_raw = gpd.read_file("space_data/catchments/catchments_future.shp")
populations_raw = pd.read_csv("other_data/Population.csv")
incomes_raw = pd.read_csv("other_data/Income.csv")

In [236]:
sa2_bounds_raw.info()
sa2_bounds_raw

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 2473 entries, 0 to 2472
Data columns (total 17 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   SA2_CODE21  2473 non-null   object  
 1   SA2_NAME21  2473 non-null   object  
 2   CHG_FLAG21  2473 non-null   object  
 3   CHG_LBL21   2473 non-null   object  
 4   SA3_CODE21  2473 non-null   object  
 5   SA3_NAME21  2473 non-null   object  
 6   SA4_CODE21  2473 non-null   object  
 7   SA4_NAME21  2473 non-null   object  
 8   GCC_CODE21  2473 non-null   object  
 9   GCC_NAME21  2473 non-null   object  
 10  STE_CODE21  2473 non-null   object  
 11  STE_NAME21  2473 non-null   object  
 12  AUS_CODE21  2473 non-null   object  
 13  AUS_NAME21  2473 non-null   object  
 14  AREASQKM21  2454 non-null   float64 
 15  LOCI_URI21  2473 non-null   object  
 16  geometry    2454 non-null   geometry
dtypes: float64(1), geometry(1), object(15)
memory usage: 328.6+ KB


Unnamed: 0,SA2_CODE21,SA2_NAME21,CHG_FLAG21,CHG_LBL21,SA3_CODE21,SA3_NAME21,SA4_CODE21,SA4_NAME21,GCC_CODE21,GCC_NAME21,STE_CODE21,STE_NAME21,AUS_CODE21,AUS_NAME21,AREASQKM21,LOCI_URI21,geometry
0,101021007,Braidwood,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,3418.3525,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.58424 -35.44426, 149.58444 -35.4..."
1,101021008,Karabar,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,6.9825,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.21899 -35.36738, 149.21800 -35.3..."
2,101021009,Queanbeyan,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,4.7620,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.21326 -35.34325, 149.21619 -35.3..."
3,101021010,Queanbeyan - East,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,13.0032,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.24034 -35.34781, 149.24024 -35.3..."
4,101021012,Queanbeyan West - Jerrabomberra,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,13.6748,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.19572 -35.36126, 149.19970 -35.3..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2468,901031003,Jervis Bay,0,No change,90103,Jervis Bay,901,Other Territories,9OTER,Other Territories,9,Other Territories,AUS,Australia,67.2296,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"MULTIPOLYGON (((150.69567 -35.18295, 150.69556..."
2469,901041004,Norfolk Island,0,No change,90104,Norfolk Island,901,Other Territories,9OTER,Other Territories,9,Other Territories,AUS,Australia,38.6510,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"MULTIPOLYGON (((167.96325 -29.07212, 167.96326..."
2470,997979799,Migratory - Offshore - Shipping (OT),0,No change,99797,Migratory - Offshore - Shipping (OT),997,Migratory - Offshore - Shipping (OT),99799,Migratory - Offshore - Shipping (OT),9,Other Territories,AUS,Australia,,http://linked.data.gov.au/dataset/asgsed3/SA2/...,
2471,999999499,No usual address (OT),0,No change,99999,No usual address (OT),999,No usual address (OT),99499,No usual address (OT),9,Other Territories,AUS,Australia,,http://linked.data.gov.au/dataset/asgsed3/SA2/...,


In [237]:
# database connection, querying functions

credentials = "Credentials.json"

def pgconnect(credential_filepath, db_schema="public"):
    with open(credential_filepath) as f:
        db_conn_dict = json.load(f)
        host       = db_conn_dict['host']
        db_user    = db_conn_dict['user']
        db_pw      = db_conn_dict['password']
        default_db = db_conn_dict['database']
        try:
            db = create_engine('postgresql+psycopg2://'+db_user+':'+db_pw+'@'+host+'/'+default_db, echo=False)
            conn = db.connect()
            print('Connected successfully.')
        except Exception as e:
            print("Unable to connect to the database.")
            print(e)
            db, conn = None, None
        return db,conn

def query(conn, sqlcmd, args=None, df=True):
    result = pd.DataFrame() if df else None
    try:
        if df:
            result = pd.read_sql_query(sqlcmd, conn, params=args)
        else:
            result = conn.execute(sqlcmd, args).fetchall()
            result = result[0] if len(result) == 1 else result
    except Exception as e:
        print("Error encountered: ", e, sep='\n')
    return result

In [238]:
# pd -> PostGIS suitable type conversino helper function
def create_wkt_element(geom, srid):
    if geom.geom_type == 'Polygon':
        geom = MultiPolygon([geom])
    return WKTElement(geom.wkt, srid)
srid = 4326

In [239]:
db, conn = pgconnect(credentials)

Connected successfully.


In [240]:
from sqlalchemy import inspect
print(inspect(db).get_schema_names())
conn.execute("SET search_path TO public;")
print(query(conn, "SELECT PostGIS_version();"))


['information_schema', 'public', 'sa2_regions']
                         postgis_version
0  3.3 USE_GEOS=1 USE_PROJ=1 USE_STATS=1


In [243]:
# Initial filtering (SA2_regions)
# 1. W're only interested in "Greater Sydney" GCC)
print(sa2_bounds_raw.GCC_NAME21.value_counts().head())
print("... more regions (truncated)\n")
sa2_bounds = sa2_bounds_raw[sa2_bounds_raw.GCC_NAME21 == "Greater Sydney"]
print(sa2_bounds.GCC_NAME21.value_counts(), "\n")
# 2. We'll only be conducting analysis on SA2 regions. We'll not be 
# examining ins encompassing(broader) regions such as SA3, SA4, and states. 
sa2_bounds = sa2_bounds.loc[:, ["SA2_CODE21", "SA2_NAME21", "AREASQKM21", "geometry"]]
# 3. rename columns
sa2_bounds = sa2_bounds.rename(
    columns = dict(SA2_CODE21="code", SA2_NAME21="name", AREASQKM21="area_sq_km"))
# 4. cast appropriately
sa2_bounds["code"] = sa2_bounds["code"].astype(int)
sa2_bounds["name"] = sa2_bounds["name"].astype(str)
sa2_bounds["geom"] = sa2_bounds['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=srid))
sa2_bounds = sa2_bounds.drop(columns="geometry") 


Greater Sydney       373
Greater Melbourne    361
Rest of Qld          300
Rest of NSW          269
Greater Brisbane     246
Name: GCC_NAME21, dtype: int64
... more regions (truncated)

Greater Sydney    373
Name: GCC_NAME21, dtype: int64 



In [245]:
# Glimpse into SA2 regions
print(sa2_bounds.info())
# SQL table definition for SA2 regions
max_sa2_name_len = sa2_bounds.name.str.len().max()
table = f'''
DROP TABLE IF EXISTS sa2_bounds;
CREATE TABLE sa2_bounds(
    code INT PRIMARY KEY,
    name VARCHAR({max_sa2_name_len+1}) NOT NULL,
    area_sq_km NUMERIC NOT NULL,
    geom GEOMETRY(MULTIPOLYGON,{4326}) NOT NULL
);
'''
r = conn.execute(table)
sa2_bounds.to_sql('sa2_bounds', conn, if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid)})

<class 'pandas.core.frame.DataFrame'>
Int64Index: 373 entries, 28 to 641
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   code        373 non-null    int32  
 1   name        373 non-null    object 
 2   area_sq_km  373 non-null    float64
 3   geom        373 non-null    object 
dtypes: float64(1), int32(1), object(2)
memory usage: 13.1+ KB
None


373

In [247]:
query(conn, "SELECT * FROM sa2_bounds");