# 1. Setup

Import librarys and load database into juypiter

### Connect to data base

In [171]:
!pip install geoalchemy2
!pip install matplotlib



In [172]:
import psycopg2
import psycopg2.extras
import json
import os
import pandas as pd
import geopandas as gpd
from datetime import datetime
from sqlalchemy import text
from sqlalchemy import inspect
from sqlalchemy import create_engine
from shapely.geometry import Point, Polygon, MultiPolygon
from geoalchemy2 import Geometry, WKTElement
import matplotlib.pyplot as plt

credentials = "Credentials.json"

def pgconnect(credential_filepath, db_schema="sydney"):
    with open(credential_filepath) as f:
        db_conn_dict = json.load(f)
        host       = db_conn_dict['host']
        db_user    = db_conn_dict['user']
        db_pw      = db_conn_dict['password']
        default_db = db_conn_dict['user']
        try:
            db = create_engine('postgresql+psycopg2://'+db_user+':'+db_pw+'@'+host+'/'+default_db, echo=False)
            conn = db.connect()
            print('Connected successfully.')
        except Exception as e:
            print("Unable to connect to the database.")
            print(e)
            db, conn = None, None
        return db,conn
    
def query(conn, sqlcmd, args=None, df=True):
    result = pd.DataFrame() if df else None
    try:
        if df:
            result = pd.read_sql_query(sqlcmd, conn, params=args)
        else:
            result = conn.execute(text(sqlcmd), args).fetchall()
            result = result[0] if len(result) == 1 else result
    except Exception as e:
        print("Error encountered: ", e, sep='\n')
    return result

srid = 4283

In [173]:
db, conn = pgconnect(credentials)

Connected successfully.


### Import data into python and data cleaning

Loading all datasets into python first using **Geopandas** module. 

Removed any empty primary keys, transform geospatial into formats recognizable by SQL, and renamed some of the columns.

Diff between polygons and multipolygons: https://gis.stackexchange.com/questions/225368/understanding-difference-between-polygon-and-multipolygon-for-shapefiles-in-qgis 

Some are polygons and some are multi, if we ...

In [174]:
SA2_Regions = gpd.read_file('SA2_2021_AUST_SHP_GDA2020/SA2_2021_AUST_GDA2020.shp')
# SA2_shapely = SA2_Regions
SA2_Regions = SA2_Regions[SA2_Regions['GCC_CODE21'] == "1GSYD"]

def create_wkt_element(geom, srid):
    if geom.geom_type == 'Polygon':
        geom = MultiPolygon([geom])
    return WKTElement(geom.wkt, srid)
SA2_Regionsog = SA2_Regions.copy() 
SA2_regions_geometry = SA2_Regions
SA2_Regions['geom'] = SA2_Regions['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=srid)) 
SA2_Regions.columns = map(str.lower, SA2_Regions.columns)

Removing unwanted columns. Note we can only run this again.

In [175]:
SA2_Regions = SA2_Regions.drop(columns=['geometry', 'gcc_code21', 'gcc_name21', 'ste_code21', 'ste_name21', 'aus_code21', 'aus_name21'])
SA2_Regions.head(10)

Unnamed: 0,sa2_code21,sa2_name21,chg_flag21,chg_lbl21,sa3_code21,sa3_name21,sa4_code21,sa4_name21,areasqkm21,loci_uri21,geom
28,102011028,Avoca Beach - Copacabana,0,No change,10201,Gosford,102,Central Coast,6.4376,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.413733024921 -33.465580583...
29,102011029,Box Head - MacMasters Beach,0,No change,10201,Gosford,102,Central Coast,32.0802,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.37484081570685 -33.5005199...
30,102011030,Calga - Kulnura,0,No change,10201,Gosford,102,Central Coast,767.9512,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.20449037540152 -33.5328022...
31,102011031,Erina - Green Point,0,No change,10201,Gosford,102,Central Coast,33.7934,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.37193611462118 -33.4369790...
32,102011032,Gosford - Springfield,0,No change,10201,Gosford,102,Central Coast,16.9123,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.32348639265098 -33.4277852...
33,102011033,Kariong,0,No change,10201,Gosford,102,Central Coast,8.3063,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.2856983473498 -33.43233822...
34,102011034,Kincumber - Picketts Valley,0,No change,10201,Gosford,102,Central Coast,11.7169,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.37292545042803 -33.4672773...
35,102011035,Narara,0,No change,10201,Gosford,102,Central Coast,7.7021,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.32173446737218 -33.3942363...
36,102011036,Niagara Park - Lisarow,0,No change,10201,Gosford,102,Central Coast,16.7316,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.34438983583274 -33.3819733...
37,102011037,Point Clare - Koolewong,0,No change,10201,Gosford,102,Central Coast,6.7794,http://linked.data.gov.au/dataset/asgsed3/SA2/...,MULTIPOLYGON (((151.3084624827523 -33.44283732...


Businesses dataset:

In SQL column names cannot start with numbers 

set `sa2_code` as primary key, check if any row has empty primary key

In [176]:
businesses = pd.read_csv('Businesses.csv')

column_mapping = {
    '0_to_50k_businesses': 'businesses_0_50k',
    '50k_to_200k_businesses': 'businesses_50k_200k',
    '200k_to_2m_businesses': 'businesses_200k_2m',
    '2m_to_5m_businesses': 'businesses_2m_5m',
    '5m_to_10m_businesses': 'businesses_5m_10m',
    '10m_or_more_businesses': 'businesses_10m_or_more',
    'total_businesses': 'total_businesses'
}
businesses.rename(columns=column_mapping, inplace=True)

# Check for missing values in the 'sa2_code' column, as this is the primary key
missing_sa2_code = businesses['sa2_code'].isnull()
print (f"{businesses['sa2_code'].isnull().sum()} rows with empty primary key have been deleted")
businesses = businesses[~missing_sa2_code]
businesses.columns = map(str.lower, businesses.columns)

businesses.head(20)




0 rows with empty primary key have been deleted


Unnamed: 0,industry_code,industry_name,sa2_code,sa2_name,businesses_0_50k,businesses_50k_200k,businesses_200k_2m,businesses_2m_5m,businesses_5m_10m,businesses_10m_or_more,total_businesses
0,A,"Agriculture, Forestry and Fishing",101021007,Braidwood,136,92,63,4,0,0,296
1,A,"Agriculture, Forestry and Fishing",101021008,Karabar,6,3,0,0,0,0,9
2,A,"Agriculture, Forestry and Fishing",101021009,Queanbeyan,6,4,3,0,0,3,15
3,A,"Agriculture, Forestry and Fishing",101021010,Queanbeyan - East,0,3,0,0,0,0,3
4,A,"Agriculture, Forestry and Fishing",101021012,Queanbeyan West - Jerrabomberra,7,4,5,0,0,0,16
5,A,"Agriculture, Forestry and Fishing",101021610,Googong,0,3,0,0,0,0,3
6,A,"Agriculture, Forestry and Fishing",101021611,Queanbeyan Surrounds,182,80,60,5,0,0,327
7,A,"Agriculture, Forestry and Fishing",101031013,Bombala,65,92,81,6,0,0,246
8,A,"Agriculture, Forestry and Fishing",101031014,Cooma,28,30,36,3,0,3,96
9,A,"Agriculture, Forestry and Fishing",101031015,Cooma Surrounds,91,84,73,0,0,0,251


A summary of our data so far

In [177]:
businesses.dtypes

industry_code             object
industry_name             object
sa2_code                   int64
sa2_name                  object
businesses_0_50k           int64
businesses_50k_200k        int64
businesses_200k_2m         int64
businesses_2m_5m           int64
businesses_5m_10m          int64
businesses_10m_or_more     int64
total_businesses           int64
dtype: object

Income dataset:
We remove those rows without a SA2 code

In [178]:
income = pd.read_csv('Income.csv')

missing_sa2_code = income['sa2_code21'].isnull()
print (f"{income['sa2_code21'].isnull().sum()} rows with empty primary key have been deleted")
income = income[~missing_sa2_code]

income.dtypes


0 rows with empty primary key have been deleted


sa2_code21        int64
sa2_name         object
earners          object
median_age       object
median_income    object
mean_income      object
dtype: object

Population dataset:
Similar as above. In SQL column names cannot start with numbers

In [179]:
population = pd.read_csv('Population.csv')

column_mapping = {
    '0-4_people': 'people_0_4',
    '5-9_people': 'people_5_9',
    '10-14_people': 'people_10_14',
    '15-19_people': 'people_15_19',
    '20-24_people': 'people_20_24',
    '25-29_people': 'people_25_29',
    '30-34_people': 'people_30_34',
    '35-39_people': 'people_35_39',
    '40-44_people': 'people_40_44',
    '45-49_people': 'people_45_49',
    '50-54_people': 'people_50_54',
    '55-59_people': 'people_55_59',
    '60-64_people': 'people_60_64',
    '65-69_people': 'people_65_69',
    '70-74_people': 'people_70_74',
    '75-79_people': 'people_75_79',
    '80-84_people': 'people_80_84',
    '85-and-over_people': 'people_85_and_over',
    'total_people': 'total_people'
}
population.rename(columns=column_mapping, inplace=True)
population.dtypes


sa2_code               int64
sa2_name              object
people_0_4             int64
people_5_9             int64
people_10_14           int64
people_15_19           int64
people_20_24           int64
people_25_29           int64
people_30_34           int64
people_35_39           int64
people_40_44           int64
people_45_49           int64
people_50_54           int64
people_55_59           int64
people_60_64           int64
people_65_69           int64
people_70_74           int64
people_75_79           int64
people_80_84           int64
people_85_and_over     int64
total_people           int64
dtype: object

School Future:
Create a copy of the original data in case there is anything wrong

Converting Geometry to WKT: WKT is a text-based format used to represent geometric shapes like points, lines, and polygons. We then drop geometry column...

Convert datetime.
Lowercasing column names

In [180]:
schools_future = gpd.read_file('catchments/catchments_future.shp')

schools_futureog = schools_future.copy() 
schools_future['geom'] = schools_future['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=srid)) 
schools_future = schools_future.drop(columns="geometry")
schools_future['ADD_DATE'] = schools_future['ADD_DATE'].apply(lambda x: datetime.strptime(str(x), "%Y%m%d") if x is not None else None)
schools_future.columns = map(str.lower, schools_future.columns)
schools_future.head(1)

Unnamed: 0,use_id,catch_type,use_desc,add_date,kindergart,year1,year2,year3,year4,year5,year6,year7,year8,year9,year10,year11,year12,geom
0,8416,HIGH_COED,Ku-ring-gai HS,2023-01-14,0,0,0,0,0,0,0,2024,2024,2024,2024,2024,2024,MULTIPOLYGON (((151.19848917708944 -33.5398987...


Basicially the same as above:

In [181]:
schools_primary = gpd.read_file('catchments/catchments_primary.shp')

schools_primaryog = schools_primary.copy() 
schools_primary['geom'] = schools_primary['geometry'].apply(lambda x: create_wkt_element(geom=x, srid=srid)) 
schools_primary = schools_primary.drop(columns="geometry")

schools_primary['ADD_DATE'] = schools_primary['ADD_DATE'].apply(lambda x: datetime.strptime(str(x), "%Y%m%d") if x is not None else None)

schools_primary.columns = map(str.lower, schools_primary.columns)
schools_primary.head()

Unnamed: 0,use_id,catch_type,use_desc,add_date,kindergart,year1,year2,year3,year4,year5,year6,year7,year8,year9,year10,year11,year12,priority,geom
0,2838,PRIMARY,Parklea PS,2018-12-10,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((150.93563850416004 -33.7161211...
1,2404,PRIMARY,Lindfield EPS,2021-12-19,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.1833640465581 -33.74748398...
2,4393,PRIMARY,Carlingford WPS,2022-02-23,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.0451821055135 -33.77303212...
3,4615,PRIMARY,Caddies Ck PS,2018-12-10,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((150.92567327976582 -33.7296030...
4,3918,PRIMARY,Killara PS,2021-12-19,Y,Y,Y,Y,Y,Y,Y,N,N,N,N,N,N,,MULTIPOLYGON (((151.1537883781186 -33.75586174...


In [182]:
schools_secondary = gpd.read_file('catchments/catchments_secondary.shp')

schools_secondaryog = schools_secondary.copy() 
schools_secondary['geom'] = schools_secondary['geometry'].apply(lambda x: create_wkt_element(geom=x,srid=srid)) 
schools_secondary = schools_secondary.drop(columns="geometry")
schools_secondary['ADD_DATE'] = schools_secondary['ADD_DATE'].apply(lambda x: datetime.strptime(str(x), "%Y%m%d") if x is not None else None)

schools_secondary.columns = map(str.lower, schools_secondary.columns)
schools_secondary.dtypes

use_id                object
catch_type            object
use_desc              object
add_date      datetime64[ns]
kindergart            object
year1                 object
year2                 object
year3                 object
year4                 object
year5                 object
year6                 object
year7                 object
year8                 object
year9                 object
year10                object
year11                object
year12                object
priority              object
geom                  object
dtype: object

Converting geodetic coordinate into x y points. 
WKT element then. 

In [183]:
stops = pd.read_csv('Stops.csv')

# Check for missing values in the 'stop_id' column, as this is the primary key
missing_stop_id = stops['stop_id'].isnull()
print (f"{stops['stop_id'].isnull().sum()} rows with empty primary key have been deleted")
stops = stops[~missing_stop_id]
stops['geom'] = gpd.points_from_xy(stops.stop_lon, stops.stop_lat)
stops = stops.drop(columns=['stop_lon', 'stop_lat'])
stops['geom'] = stops['geom'].apply(lambda x: WKTElement(x.wkt, srid=srid))
stops['geom'] = stops['geom'].apply(lambda x: str(x))
stops.head()

0 rows with empty primary key have been deleted


Unnamed: 0,stop_id,stop_code,stop_name,location_type,parent_station,wheelchair_boarding,platform_code,geom
0,200039,200039.0,"Central Station, Eddy Av, Stand A",,200060.0,0,,POINT (151.20666465471 -33.8822064874687)
1,200054,200054.0,"Central Station, Eddy Av, Stand D",,200060.0,0,,POINT (151.20699145565 -33.8820421431408)
2,200060,,Central Station,1.0,,0,,POINT (151.206292455081 -33.8840842535493)
3,201510,,Redfern Station,1.0,,0,,POINT (151.198866071817 -33.8916900512711)
4,201646,201646.0,"Redfern Station, Gibbons St, Stand B",,201510.0,0,,POINT (151.198881722942 -33.8933293130144)


Removing empty FID and geom. 
Convert postcode into integer type

In [184]:
polling = pd.read_csv('PollingPlaces2019.csv')

missing_FID = polling['FID'].isnull()
print(f"{missing_FID.sum()} rows with empty primary key have been deleted")
polling = polling[~missing_FID]

missing_geom = polling['the_geom'].isnull()
print(f"{missing_geom.sum()} rows with empty geom have been deleted")
polling = polling[~missing_geom]

polling['the_geom'] = polling['the_geom'].apply(lambda x: Point(float(x.split()[2].strip(")")), float(x.split()[1].strip("("))))

polling['the_geom'] = polling['the_geom'].apply(lambda x: WKTElement(x.wkt, srid=srid))

polling = polling.drop(columns=['latitude', 'longitude'])

polling['premises_post_code'] = polling['premises_post_code'].astype(int)
polling.columns = map(str.lower, polling.columns)

polling.head(5)

0 rows with empty primary key have been deleted
140 rows with empty geom have been deleted


Unnamed: 0,fid,state,division_id,division_name,polling_place_id,polling_place_type_id,polling_place_name,premises_name,premises_address_1,premises_address_2,premises_address_3,premises_suburb,premises_state_abbreviation,premises_post_code,the_geom
13,aec_federal_election_polling_places_2019.fid-4...,NSW,103,Banks,58,1,Oatley,Oatley Public School,51 Letitia St,,,OATLEY,NSW,2223,POINT (151.081 -33.9847)
15,aec_federal_election_polling_places_2019.fid-4...,NSW,111,Chifley,392,1,Dharruk,Dawson Public School,7 Stuart Rd,,,DHARRUK,NSW,2770,POINT (150.817 -33.7475)
16,aec_federal_election_polling_places_2019.fid-4...,NSW,103,Banks,31,1,Allawah,PJ Ferry Reserve Community Hall,147B Bellevue Pde,,,ALLAWAH,NSW,2218,POINT (151.1148974 -33.9767897)
17,aec_federal_election_polling_places_2019.fid-4...,NSW,103,Banks,67,1,Allawah South,St Raphael's Church Hall,84 George St,,,SOUTH HURSTVILLE,NSW,2221,POINT (151.111 -33.9756)
18,aec_federal_election_polling_places_2019.fid-4...,NSW,103,Banks,56500,1,Beverly Hills North (Banks),Beverly Hills North Public School,1-3 Shorter Ave,,,BEVERLY HILLS,NSW,2209,POINT (151.075 -33.9413)


### load dataset into SQL data base

create schema for each table
One good idea is to run them two separately 
Another point is to use `text()` because SQLAlchemy's `execute()` method doesn't directly support executing SQL commands as strings.

In [185]:
# Connect to the database and specify the schema
# Create schema if it doesn't exist
conn.execute(text('CREATE SCHEMA IF NOT EXISTS sydney'))

# Set search path to the sydney schema
#conn.execute(text('SET search_path TO sydney'))

conn.execute(text('SET search_path TO sydney, public;'))



<sqlalchemy.engine.cursor.CursorResult at 0x1f66e5174d0>

In [186]:
conn.execute(text('''
DROP TABLE IF EXISTS sydney.sa2_regions;
CREATE TABLE sydney.sa2_regions (
    sa2_code21 VARCHAR(20),
    sa2_name21 VARCHAR(100),
    chg_flag21 VARCHAR(2),
    chg_lbl21 VARCHAR(100),
    sa3_code21 VARCHAR(20),
    sa3_name21 VARCHAR(100),
    sa4_code21 VARCHAR(20),
    sa4_name21 VARCHAR(100),
    areasqkm21 NUMERIC,
    loci_uri21 VARCHAR(500),
    geom GEOMETRY(MULTIPOLYGON, 4283)
);
'''))

conn.execute(text('''
DROP TABLE IF EXISTS sydney.businesses;
CREATE TABLE sydney.businesses (
    industry_code VARCHAR(20),
    industry_name VARCHAR(100),
    sa2_code INTEGER,
    sa2_name VARCHAR(100),
    businesses_0_50k INTEGER,
    businesses_50k_200k INTEGER,
    businesses_200k_2m INTEGER,
    businesses_2m_5m INTEGER,
    businesses_5m_10m INTEGER,
    businesses_10m_or_more INTEGER,
    total_businesses INTEGER
);
'''))

conn.execute(text('''
DROP TABLE IF EXISTS sydney.income;
CREATE TABLE sydney.income (
    sa2_code21 INTEGER,
    sa2_name VARCHAR(100),
    earners VARCHAR(100),
    median_age VARCHAR(100),
    median_income VARCHAR(100),
    mean_income VARCHAR(100)
);
'''))

conn.execute(text('''
DROP TABLE IF EXISTS sydney.population;
CREATE TABLE sydney.population (
    sa2_code INTEGER,
    sa2_name VARCHAR(100),
    people_0_4 INTEGER,
    people_5_9 INTEGER,
    people_10_14 INTEGER,
    people_15_19 INTEGER,
    people_20_24 INTEGER,
    people_25_29 INTEGER,
    people_30_34 INTEGER,
    people_35_39 INTEGER,
    people_40_44 INTEGER,
    people_45_49 INTEGER,
    people_50_54 INTEGER,
    people_55_59 INTEGER,
    people_60_64 INTEGER,
    people_65_69 INTEGER,
    people_70_74 INTEGER,
    people_75_79 INTEGER,
    people_80_84 INTEGER,
    people_85_and_over INTEGER,
    total_people INTEGER
);
'''))

conn.execute(text('''
DROP TABLE IF EXISTS sydney.stops;
CREATE TABLE sydney.stops (
    stop_id VARCHAR(100),
    stop_code FLOAT,
    stop_name VARCHAR(100),
    location_type FLOAT,
    parent_station VARCHAR(100),
    wheelchair_boarding INTEGER,
    platform_code VARCHAR(100),
    geom GEOMETRY(POINT,4283)
);
'''))

conn.execute(text("""
DROP TABLE IF EXISTS sydney.schools_future;
CREATE TABLE sydney.schools_future (
    USE_ID VARCHAR(100),
    CATCH_TYPE VARCHAR(100),
    USE_DESC VARCHAR(100),
    ADD_DATE TIMESTAMP,
    KINDERGART INTEGER,
    YEAR1 INTEGER,
    YEAR2 INTEGER,
    YEAR3 INTEGER,
    YEAR4 INTEGER,
    YEAR5 INTEGER,
    YEAR6 INTEGER,
    YEAR7 INTEGER,
    YEAR8 INTEGER,
    YEAR9 INTEGER,
    YEAR10 INTEGER,
    YEAR11 INTEGER,
    YEAR12 INTEGER,
    geom GEOMETRY(MULTIPOLYGON,4283)
);
"""))

conn.execute(text("""
DROP TABLE IF EXISTS sydney.schools_primary;
CREATE TABLE sydney.schools_primary (
    USE_ID VARCHAR(100),
    CATCH_TYPE VARCHAR(100),
    USE_DESC VARCHAR(100),
    ADD_DATE TIMESTAMP,
    KINDERGART VARCHAR(100),
    YEAR1 VARCHAR(100),
    YEAR2 VARCHAR(100),
    YEAR3 VARCHAR(100),
    YEAR4 VARCHAR(100),
    YEAR5 VARCHAR(100),
    YEAR6 VARCHAR(100),
    YEAR7 VARCHAR(100),
    YEAR8 VARCHAR(100),
    YEAR9 VARCHAR(100),
    YEAR10 VARCHAR(100),
    YEAR11 VARCHAR(100),
    YEAR12 VARCHAR(100),
    PRIORITY VARCHAR(100),
    geom GEOMETRY(MULTIPOLYGON,4283)
);
"""))

conn.execute(text("""
DROP TABLE IF EXISTS sydney.schools_secondary;
CREATE TABLE sydney.schools_secondary (
    USE_ID VARCHAR(100),
    CATCH_TYPE VARCHAR(100),
    USE_DESC VARCHAR(100),
    ADD_DATE TIMESTAMP,
    KINDERGART VARCHAR(100),
    YEAR1 VARCHAR(100),
    YEAR2 VARCHAR(100),
    YEAR3 VARCHAR(100),
    YEAR4 VARCHAR(100),
    YEAR5 VARCHAR(100),
    YEAR6 VARCHAR(100),
    YEAR7 VARCHAR(100),
    YEAR8 VARCHAR(100),
    YEAR9 VARCHAR(100),
    YEAR10 VARCHAR(100),
    YEAR11 VARCHAR(100),
    YEAR12 VARCHAR(100),
    PRIORITY VARCHAR(100),
    geom GEOMETRY(MULTIPOLYGON,4283)
);
"""))


conn.execute(text("""
CREATE SCHEMA IF NOT EXISTS sydney;
DROP TABLE IF EXISTS sydney.polling;
CREATE TABLE sydney.polling (
    FID VARCHAR(100),
    state VARCHAR(100),
    division_id INTEGER,
    division_name VARCHAR(100),
    polling_place_id INTEGER,
    polling_place_type_id INTEGER,
    polling_place_name VARCHAR(100),
    premises_name VARCHAR(100),
    premises_address_1 VARCHAR(100),
    premises_address_2 VARCHAR(100),
    premises_address_3 VARCHAR(100),
    premises_suburb VARCHAR(100),
    premises_state_abbreviation VARCHAR(100),
    premises_post_code INTEGER,
    the_geom GEOMETRY(POINT,4283)
);
"""))



<sqlalchemy.engine.cursor.CursorResult at 0x1f66e517690>

Loading dataframe into SQL

In [187]:
businesses.to_sql("businesses", con=conn, schema="sydney", if_exists='append', index=False)
income.to_sql("income", con=conn, schema="sydney", if_exists='append', index=False)
population.to_sql("population", con=conn, schema="sydney", if_exists='append', index=False)
stops.to_sql("stops", con=conn, schema="sydney", if_exists='append', index=False, dtype={'geom': Geometry('POINT', srid)})


718

In [188]:

SA2_Regions.to_sql("sa2_regions", con=conn, schema="sydney", if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid)})
schools_future.to_sql("schools_future", con=conn, schema="sydney", if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid)})
schools_primary.to_sql("schools_primary", con=conn, schema="sydney", if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid)})
schools_secondary.to_sql("schools_secondary", con=conn, schema="sydney", if_exists='append', index=False, dtype={'geom': Geometry('MULTIPOLYGON', srid)})
polling.to_sql('polling', con=conn, schema="sydney", if_exists='append', index=False, dtype={'the_geom': Geometry('POINT', srid)})


790

In [189]:
sql = text("""
SELECT *
FROM population
""")
query(conn, sql)

Unnamed: 0,sa2_code,sa2_name,people_0_4,people_5_9,people_10_14,people_15_19,people_20_24,people_25_29,people_30_34,people_35_39,...,people_45_49,people_50_54,people_55_59,people_60_64,people_65_69,people_70_74,people_75_79,people_80_84,people_85_and_over,total_people
0,102011028,Avoca Beach - Copacabana,424,522,623,552,386,222,306,416,...,572,602,570,520,464,369,226,142,70,7530
1,102011029,Box Head - MacMasters Beach,511,666,702,592,461,347,420,535,...,749,749,794,895,863,925,603,331,264,11052
2,102011030,Calga - Kulnura,200,225,258,278,274,227,214,286,...,325,436,422,397,327,264,190,100,75,4748
3,102011031,Erina - Green Point,683,804,880,838,661,502,587,757,...,859,882,901,930,917,1065,976,773,1028,14803
4,102011032,Gosford - Springfield,1164,1044,1084,1072,1499,1864,1750,1520,...,1330,1241,1377,1285,1166,949,664,476,537,21346
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,128021537,Royal National Park,2,4,10,4,9,7,1,2,...,4,0,0,0,1,0,0,0,0,45
369,128021538,Sutherland - Kirrawee,1519,1292,1150,1117,1335,1852,2120,1945,...,1569,1391,1285,1157,909,909,781,582,807,23369
370,128021607,Engadine,1157,1283,1469,1209,891,675,928,1229,...,1315,1086,909,764,707,886,748,389,327,17379
371,128021608,Loftus - Yarrawarrah,503,487,575,508,380,293,426,493,...,564,477,450,387,418,335,263,192,109,7354


# 2. Z-score

## Modify geospacial tables

To make it run faster, we create indexes. 
Remember `text()`

In [190]:
def create_index(conn, table_name, index_name, column_name, using=None):
    """
    Create an index if it does not exist.
    Arguments:
    - conn: database connection
    - table_name: the name of the table
    - index_name: the name of the index to create
    - column_name: the column(s) to index
    - using: (optional) the index method (e.g., GIST for spatial columns)
    """
    # table_name = text(table_name)
    # index_name = text(index_name)
    # column_
    using_str = f"USING {using}" if using else ""
    query = text(f"""
    DO $$
    BEGIN
        IF NOT EXISTS (
            SELECT 1 FROM pg_indexes
            WHERE schemaname = 'sydney' AND indexname = '{index_name}'
        ) THEN
            EXECUTE 'CREATE INDEX {index_name} ON sydney.{table_name} {using_str} ({column_name})';
        END IF;
    END
    $$;
    """)
    conn.execute(query)
    print(f"Checked and created index '{index_name}' if necessary.")

### stops

- Create index to improve runtime
- Add `sa2_code21` to stops by `ST_Contains`, noting the long and lat table.
- Group by `sa2_code21`, and count number of stops in each region

In [191]:
create_index(conn, 'sa2_regions', 'idx_sa2_code21', 'sa2_code21')
create_index(conn, 'sa2_regions', 'idx_sa2_regions_geom', 'geom', 'GIST')
create_index(conn, 'stops', 'idx_stops_geom', 'geom', 'GIST')
create_index(conn, 'stops', 'idx_stop_id', 'stop_id')

Checked and created index 'idx_sa2_code21' if necessary.
Checked and created index 'idx_sa2_regions_geom' if necessary.
Checked and created index 'idx_stops_geom' if necessary.
Checked and created index 'idx_stop_id' if necessary.


Note: this query doesn't return anything so an error is expected. 

In [192]:
sql = """
DROP TABLE IF EXISTS sydney.region_stop_counts;
CREATE TABLE IF NOT EXISTS sydney.region_stop_counts AS
    SELECT
        r.sa2_code21,
        COUNT(s.stop_id) AS number_of_stops
    FROM
        sydney.sa2_regions AS r
    JOIN
        sydney.stops AS s
    ON
        ST_Contains(r.geom, s.geom)
    GROUP BY
        r.sa2_code21
"""
query(conn, sql)

Error encountered: 
This result object does not return rows. It has been closed automatically.


In [193]:
sql = """
SELECT * from region_stop_counts
"""
query(conn, sql)

Unnamed: 0,sa2_code21,number_of_stops
0,102011028,127
1,102011029,215
2,102011030,172
3,102011031,285
4,102011032,355
...,...,...
367,128021537,29
368,128021538,195
369,128021607,221
370,128021608,144


### polling

- Create index to improve runtime
- Add sa2_code21 to polling by ST_Contains
- Group by sa2_code21, and count number of pollings in each region

In [194]:
create_index(conn, 'polling', 'idx_polling_geom', 'the_geom', 'GIST')

Checked and created index 'idx_polling_geom' if necessary.


In [195]:
sql = """
DROP TABLE IF EXISTS sydney.region_polling_counts;
CREATE TABLE IF NOT EXISTS sydney.region_polling_counts AS
    SELECT
        r.sa2_code21,
        COUNT(s.fid) AS number_of_pollings
    FROM
        sydney.sa2_regions AS r
    JOIN
        sydney.polling AS s 
    ON
        ST_Contains(r.geom, s.the_geom)
    GROUP BY
        r.sa2_code21;
"""
query(conn, sql)

Error encountered: 
This result object does not return rows. It has been closed automatically.


In [196]:
sql = """
SELECT * from region_polling_counts
"""
query(conn, sql)

Unnamed: 0,sa2_code21,number_of_pollings
0,117031643,4
1,128011606,3
2,126021501,3
3,123011700,2
4,125041588,7
...,...,...
348,125031484,3
349,127031522,2
350,125031480,4
351,122031432,1


### Schools
- due to limited time, only chose primary school
- Create index to improve runtime
- Add sa2_code21 to schools_primary by ST_Contains
- Group by sa2_code21, and count number of primary school in each region

In [197]:
create_index(conn, 'schools_primary', 'idx_schools_primary_geom', 'geom', 'GIST')
create_index(conn, 'schools_primary', 'idx_s_use_id', 'use_id')

Checked and created index 'idx_schools_primary_geom' if necessary.
Checked and created index 'idx_s_use_id' if necessary.


In [198]:
sql = """
DROP TABLE IF EXISTS sydney.region_primary_school_counts;
CREATE TABLE IF NOT EXISTS sydney.region_primary_school_counts AS
    SELECT
        r.sa2_code21,
        COUNT(p.use_id) AS number_of_primary_schools
    FROM
        sydney.sa2_regions AS r
    JOIN
        sydney.schools_primary AS p
    ON
        ST_Intersects(r.geom, p.geom)
    GROUP BY
        r.sa2_code21

"""
query(conn, sql)

Error encountered: 
This result object does not return rows. It has been closed automatically.


In [199]:
sql = """
SELECT * from region_primary_school_counts
"""
query(conn, sql)

Unnamed: 0,sa2_code21,number_of_primary_schools
0,102011028,4
1,102011029,4
2,102011030,17
3,102011031,10
4,102011032,10
...,...,...
368,128021537,11
369,128021538,12
370,128021607,7
371,128021608,6


### Businesses
- choose Retail Trade

In [200]:
create_index(conn, 'businesses', 'idx_sa2_code', 'sa2_code')
create_index(conn, 'businesses', 'idx_industry_name', 'industry_name')

Checked and created index 'idx_sa2_code' if necessary.
Checked and created index 'idx_industry_name' if necessary.


In [201]:
sql = """
DROP TABLE IF EXISTS sydney.region_businesses_counts;
CREATE TABLE IF NOT EXISTS sydney.region_businesses_counts AS
    SELECT
        sa2_code,
        SUM(total_businesses) AS total_retail_businesses
    FROM
        sydney.businesses
    JOIN
        sydney.sa2_regions on sa2_regions.sa2_code21 = CAST(businesses.sa2_code AS VARCHAR)
    WHERE
        industry_name = 'Retail Trade'
    GROUP BY
        sa2_code;
"""
query(conn, sql)

Error encountered: 
This result object does not return rows. It has been closed automatically.


In [202]:
sql = """
SELECT * FROM region_businesses_counts
"""
query(conn, sql)

Unnamed: 0,sa2_code,total_retail_businesses
0,128021535,98
1,121021406,88
2,120031394,86
3,125011587,45
4,125011586,199
...,...,...
368,124011453,42
369,119011354,126
370,119041669,76
371,125041489,167


### Population
- count the total population of people age 0 - 19
- make that into a new table

In [203]:
create_index(conn, 'businesses', 'idx_sa2_code', 'sa2_code')
create_index(conn, 'businesses', 'idx_industry_name', 'industry_name')

Checked and created index 'idx_sa2_code' if necessary.
Checked and created index 'idx_industry_name' if necessary.


In [204]:
sql = """
DROP TABLE IF EXISTS sydney.region_population_counts;
CREATE TABLE sydney.region_population_counts AS
SELECT
    sa2_code,
    (COALESCE(people_0_4, 0) + COALESCE(people_5_9, 0) + COALESCE(people_10_14, 0) + COALESCE(people_15_19, 0)) AS total_population_0_19
FROM
    sydney.population

"""
query(conn, sql)

Error encountered: 
This result object does not return rows. It has been closed automatically.


In [205]:
sql = """
SELECT * FROM region_population_counts
"""
query(conn, sql)

Unnamed: 0,sa2_code,total_population_0_19
0,102011028,2121
1,102011029,2471
2,102011030,961
3,102011031,3205
4,102011032,4364
...,...,...
368,128021537,20
369,128021538,5078
370,128021607,5118
371,128021608,2073


### Z score
Join all five tables into one with the following fields:
-    sa2_code
-   total_population_0_19
-   total_retail_businesses
-   number_of_primary_schools
-   number_of_pollings
-   number_of_stops
- the total_population_0_19 must be more than 100

In [206]:
sql = """
DROP TABLE IF EXISTS sydney.sa2_aggregated_data;
CREATE TABLE sydney.sa2_aggregated_data AS
SELECT
    pc.sa2_code,
    COALESCE(bc.total_retail_businesses, 0) AS total_retail_businesses,
    COALESCE(sc.number_of_primary_schools, 0) AS number_of_primary_schools,
    COALESCE(pc2.number_of_pollings, 0) AS number_of_pollings,
    COALESCE(sc2.number_of_stops, 0) AS number_of_stops
FROM
    sydney.region_population_counts pc
LEFT JOIN
    sydney.region_businesses_counts bc ON pc.sa2_code = bc.sa2_code
LEFT JOIN
    sydney.region_primary_school_counts sc ON pc.sa2_code = CAST(sc.sa2_code21 AS INTEGER)
LEFT JOIN
    sydney.region_polling_counts pc2 ON pc.sa2_code = CAST(pc2.sa2_code21 AS INTEGER)
LEFT JOIN
    sydney.region_stop_counts sc2 ON pc.sa2_code = CAST(sc2.sa2_code21 AS INTEGER)
WHERE
    pc.total_population_0_19 >= 100
"""
query(conn, sql)

Error encountered: 
This result object does not return rows. It has been closed automatically.


We are just using the original formula:
$S(z_{business}+z_{stops}+z_{polls}+z_{schools})$

In [207]:
sql = """
DROP TABLE IF EXISTS sydney.sa2_s_scores;
CREATE TABLE sydney.sa2_s_scores AS
    SELECT
        sa2_code,
        
        -- Calculate the sigmoid function for the sum of z-scores
        1 / (1 + EXP(-(
            z_score_total_retail_businesses +
            z_score_number_of_primary_schools +
            z_score_number_of_pollings +
            z_score_number_of_stops
        ))) AS s_score

    FROM (
        SELECT
            sa2_code,
            total_retail_businesses,
            number_of_primary_schools,
            number_of_pollings,
            number_of_stops,
            
            -- Calculating Z-score for total_retail_businesses
            (total_retail_businesses - AVG(total_retail_businesses) OVER()) / STDDEV(total_retail_businesses) OVER() AS z_score_total_retail_businesses,
            
            -- Calculating Z-score for number_of_primary_schools
            (number_of_primary_schools - AVG(number_of_primary_schools) OVER()) / STDDEV(number_of_primary_schools) OVER() AS z_score_number_of_primary_schools,
            
            -- Calculating Z-score for number_of_pollings
            (number_of_pollings - AVG(number_of_pollings) OVER()) / STDDEV(number_of_pollings) OVER() AS z_score_number_of_pollings,
            
            -- Calculating Z-score for number_of_stops
            (number_of_stops - AVG(number_of_stops) OVER()) / STDDEV(number_of_stops) OVER() AS z_score_number_of_stops
            
        FROM sydney.sa2_aggregated_data
    ) AS z_scores;

"""
query(conn, sql)

Error encountered: 
This result object does not return rows. It has been closed automatically.


### This is S score of each SA2 region

In [208]:
sql = """
SELECT * FROM sa2_s_scores
order by s_score DESC
"""
query(conn, sql)

Unnamed: 0,sa2_code,s_score
0,117031644,1.000000
1,115021297,0.999999
2,117031645,0.999717
3,115031300,0.999222
4,123021437,0.998189
...,...,...
354,126021590,0.011465
355,102021052,0.009472
356,118021654,0.009229
357,116021562,0.008409


In [209]:
# drop all intermediate tables
sql = """
DROP TABLE IF EXISTS sydney.region_stop_counts;
DROP TABLE IF EXISTS sydney.region_polling_counts;
DROP TABLE IF EXISTS sydney.region_primary_school_counts;
DROP TABLE IF EXISTS sydney.region_businesses_counts;
DROP TABLE IF EXISTS sydney.region_population_counts;
"""
query(conn, sql)

# close connection
conn.close()
db.dispose()

Error encountered: 
This result object does not return rows. It has been closed automatically.


# Extra dataset 1: Walk count site
This dataset records the location of walk counting site in Sydney. 

In [210]:
import geopandas as gpd
import psycopg2
walk_dataframe = gpd.read_file('Walking_count_sites.geojson') #GeoDataFrame
walk_dataframe.head(1)

SA2_shapely = gpd.read_file('SA2_2021_AUST_SHP_GDA2020/SA2_2021_AUST_GDA2020.shp')
SA2_shapely

Unnamed: 0,SA2_CODE21,SA2_NAME21,CHG_FLAG21,CHG_LBL21,SA3_CODE21,SA3_NAME21,SA4_CODE21,SA4_NAME21,GCC_CODE21,GCC_NAME21,STE_CODE21,STE_NAME21,AUS_CODE21,AUS_NAME21,AREASQKM21,LOCI_URI21,geometry
0,101021007,Braidwood,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,3418.3525,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.58424 -35.44426, 149.58444 -35.4..."
1,101021008,Karabar,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,6.9825,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.21899 -35.36738, 149.21800 -35.3..."
2,101021009,Queanbeyan,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,4.7620,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.21326 -35.34325, 149.21619 -35.3..."
3,101021010,Queanbeyan - East,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,13.0032,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.24034 -35.34781, 149.24024 -35.3..."
4,101021012,Queanbeyan West - Jerrabomberra,0,No change,10102,Queanbeyan,101,Capital Region,1RNSW,Rest of NSW,1,New South Wales,AUS,Australia,13.6748,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"POLYGON ((149.19572 -35.36126, 149.19970 -35.3..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2468,901031003,Jervis Bay,0,No change,90103,Jervis Bay,901,Other Territories,9OTER,Other Territories,9,Other Territories,AUS,Australia,67.2296,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"MULTIPOLYGON (((150.69567 -35.18295, 150.69556..."
2469,901041004,Norfolk Island,0,No change,90104,Norfolk Island,901,Other Territories,9OTER,Other Territories,9,Other Territories,AUS,Australia,38.6510,http://linked.data.gov.au/dataset/asgsed3/SA2/...,"MULTIPOLYGON (((167.96325 -29.07212, 167.96326..."
2470,997979799,Migratory - Offshore - Shipping (OT),0,No change,99797,Migratory - Offshore - Shipping (OT),997,Migratory - Offshore - Shipping (OT),99799,Migratory - Offshore - Shipping (OT),9,Other Territories,AUS,Australia,,http://linked.data.gov.au/dataset/asgsed3/SA2/...,
2471,999999499,No usual address (OT),0,No change,99999,No usual address (OT),999,No usual address (OT),99499,No usual address (OT),9,Other Territories,AUS,Australia,,http://linked.data.gov.au/dataset/asgsed3/SA2/...,


In [211]:
import geopandas as gpd
#Convert polygons into multi polygons
# Read your data containing points and MultiPolygon geometries
points_gdf = gpd.read_file('Walking_count_sites.geojson')
multipolygon_gdf = gpd.read_file('SA2_2021_AUST_SHP_GDA2020/SA2_2021_AUST_GDA2020.shp')

if multipolygon_gdf.crs != points_gdf.crs:
    multipolygon_gdf = multipolygon_gdf.to_crs(points_gdf.crs)


# Perform a spatial join between the points and the MultiPolygon
points_within_multipolygon = gpd.sjoin(points_gdf, multipolygon_gdf, how="left", predicate='within')

# Group the points by polygon and count the number of points within each polygon
points_within_multipolygon_count = points_within_multipolygon.groupby('index_right').size()

# Optionally, merge the counts with the MultiPolygon GeoDataFrame
multipolygon_gdf = multipolygon_gdf.merge(points_within_multipolygon_count.rename('point_count'), left_index=True, right_index=True)

multipolygon_gdf = multipolygon_gdf.drop(columns=['CHG_FLAG21', 'CHG_LBL21', 'GCC_CODE21', 'GCC_NAME21', 'AUS_CODE21','AUS_NAME21', 'STE_CODE21', 'STE_NAME21', 'LOCI_URI21'])
multipolygon_gdf


Unnamed: 0,SA2_CODE21,SA2_NAME21,SA3_CODE21,SA3_NAME21,SA4_CODE21,SA4_NAME21,AREASQKM21,geometry,point_count
354,117031329,Darlinghurst,11703,Sydney Inner City,117,Sydney - City and Inner South,0.8569,"POLYGON ((151.21227 -33.87631, 151.21232 -33.8...",10
355,117031330,Erskineville - Alexandria,11703,Sydney Inner City,117,Sydney - City and Inner South,4.3199,"POLYGON ((151.18200 -33.90064, 151.18231 -33.8...",3
356,117031331,Glebe - Forest Lodge,11703,Sydney Inner City,117,Sydney - City and Inner South,2.3018,"POLYGON ((151.17710 -33.87386, 151.17720 -33.8...",11
357,117031333,Potts Point - Woolloomooloo,11703,Sydney Inner City,117,Sydney - City and Inner South,1.4636,"POLYGON ((151.21887 -33.86861, 151.21897 -33.8...",7
358,117031336,Surry Hills,11703,Sydney Inner City,117,Sydney - City and Inner South,1.3164,"POLYGON ((151.20831 -33.88343, 151.20842 -33.8...",7
359,117031638,Camperdown - Darlington,11703,Sydney Inner City,117,Sydney - City and Inner South,1.7106,"POLYGON ((151.17268 -33.89045, 151.17285 -33.8...",2
360,117031639,Chippendale,11703,Sydney Inner City,117,Sydney - City and Inner South,0.4642,"POLYGON ((151.19399 -33.88655, 151.19441 -33.8...",3
361,117031640,Newtown (NSW),11703,Sydney Inner City,117,Sydney - City and Inner South,1.5707,"POLYGON ((151.17352 -33.89636, 151.17305 -33.8...",5
362,117031641,Pyrmont,11703,Sydney Inner City,117,Sydney - City and Inner South,0.9299,"POLYGON ((151.18869 -33.86635, 151.18908 -33.8...",5
363,117031642,Redfern,11703,Sydney Inner City,117,Sydney - City and Inner South,1.6989,"POLYGON ((151.19682 -33.89072, 151.19687 -33.8...",9


In [212]:
# Convert Geometry to WKT
#walk_dataframe['geom'] = walk_dataframe['geometry'].apply(lambda geom: geom.wkt)
walk_dataframe.head(1)

Unnamed: 0,OBJECTID,Site_ID,Location,SiteDescription,geometry
0,1,2,Botany Road,Between Bourke Street and Hansard Street,POINT (151.20287 -33.90732)


In [213]:
# Remove possibly NULL items
walk_dataframe = walk_dataframe.dropna(subset=["OBJECTID"])
walk_dataframe


Unnamed: 0,OBJECTID,Site_ID,Location,SiteDescription,geometry
0,1,2,Botany Road,Between Bourke Street and Hansard Street,POINT (151.20287 -33.90732)
1,2,3,King Street,Between Whitehorse Street and Newman Street,POINT (151.17795 -33.89934)
2,3,4,William Street,Between Crown Street and Palmer Street,POINT (151.21666 -33.87438)
3,4,5,City Road,Between Carillon Avenue and Forbes Street,POINT (151.18803 -33.89170)
4,5,6,Broadway,Between Buckland Street and Abercrombie Street,POINT (151.19771 -33.88434)
...,...,...,...,...,...
115,116,118,Bridge Street,Between Loftus Street and Pitt Street,POINT (151.20968 -33.86356)
116,117,119,Margaret Street,Between George Street and Carrington Street,POINT (151.20689 -33.86512)
117,118,120,Margaret Street,Between Clarence Street and Kent Street,POINT (151.20452 -33.86516)
118,119,121,York Street,Between Barrack Street and King Street,POINT (151.20595 -33.86805)


No empty rows. This dataset is quite clean.
Next, we group the counting objects by SA2 regions. Note that we have to make sure they are of the same SRID.

In [214]:
#
if SA2_regions_geometry.crs != walk_dataframe.crs:
    SA2_shapely = SA2_shapely.to_crs(walk_dataframe.crs)

points_in_multipolygon = gpd.sjoin(walk_dataframe, SA2_shapely, how='left', predicate='within')
point_count_per_multipolygon = points_in_multipolygon.groupby('geometry').size()

point_count_per_multipolygon

geometry
POINT (151.18118 -33.90619)    1
POINT (151.18133 -33.90818)    1
POINT (151.18252 -33.90846)    1
POINT (151.17980 -33.90344)    1
POINT (151.17795 -33.89934)    1
                              ..
POINT (151.21248 -33.89220)    1
POINT (151.21342 -33.88843)    1
POINT (151.21761 -33.88904)    1
POINT (151.21555 -33.89194)    1
POINT (151.21597 -33.89642)    1
Length: 120, dtype: int64