# Assign Suburbs to POIs

Use SA2 level from ABS to find suburbs. Uses file downloaded from [ABS](http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/1270.0.55.001July%202011), which is the [ZIP link](http://www.abs.gov.au/ausstats/subscriber.nsf/log?openagent&1270055001_sa2_2011_aust_shape.zip&1270.0.55.001&Data%20Cubes&7130A5514535C5FCCA257801000D3FBD&0&July%202011&23.12.2010&Latest) called Statistical Area Level 2 (SA2) ASGS Ed 2011 Digital Boundaires in ESRI Shapefile Format. The files below are obtained after unzipping the file.

*ABS has a habit of changing their links, so the above links are likely to be broken.*

In [1]:
from osgeo import ogr, osr
import pandas as pd
import numpy as np
import collections

## Extracting SLA2 region names

In [2]:
data_dir = '../data/'
driver = ogr.GetDriverByName('ESRI Shapefile')

node_source = driver.Open(data_dir + 'SA2_2011_AUST.shp', 0)
layer = node_source.GetLayer()
layer.GetFeatureCount()

2214

In [3]:
layer.ResetReading()
feature = layer.GetNextFeature()
feature.keys()

['SA2_MAIN11',
 'SA2_5DIG11',
 'SA2_NAME11',
 'SA3_CODE11',
 'SA3_NAME11',
 'SA4_CODE11',
 'SA4_NAME11',
 'GCC_CODE11',
 'GCC_NAME11',
 'STE_CODE11',
 'STE_NAME11',
 'ALBERS_SQM']

In [4]:
layer.ResetReading()
feature = layer.GetNextFeature()
sla2 = []
name = []
while feature:
    # Victoria is state number 2
    if feature.GetField('STE_CODE11') == '2':
        sla2.append(feature.GetField('SA2_MAIN11'))
        name.append(feature.GetField('SA2_NAME11'))
    feature = layer.GetNextFeature()
print(len(sla2))

435


In [5]:
sla_name_dict = {}
for ix, sla in enumerate(sla2):
    sla_name_dict[sla] = name[ix]


## Find SLA region name, and add to POIs

In [6]:
def find_points(locations, sla_name, pt, polygon):
    """
    locations : the dataframe containing the locations
    sla_name : the name of the SLA region
    pt : a handle that determines coordinates
    polygon : the polygon defining the SLA
    """
    for idx, row in locations.iterrows():
        lat, long = row['poiLat'], row['poiLon']
        if np.isinf(lat):
            continue
        pt.SetPoint(0, long, lat)
        try:
            inside = pt.Within(polygon)
        except ValueError:
            inside = False
            print(long, lat)
            print('Unable to solve inside polygon')
            return
        if inside:
            locations.loc[idx, 'suburb'] = sla_name


In [7]:
spatial_ref = osr.SpatialReference()
spatial_ref.SetWellKnownGeogCS("WGS84")

pt = ogr.Geometry(ogr.wkbPoint)
pt.AssignSpatialReference(spatial_ref)


In [8]:
layer.ResetReading()
poi = pd.read_csv(data_dir + 'poi-Melb-all.csv')
poi['suburb'] = ''
sla = layer.GetNextFeature()
num_sla = 1
while sla:
    # Victoria is state number 2
    if sla.GetField('STE_CODE11') == '2':
        sla_id = sla.GetField('SA2_MAIN11')
        sla_name = sla_name_dict[sla_id]
        polygon = sla.GetGeometryRef()
        find_points(poi, sla_name, pt, polygon)
        
        # progress bar
        if num_sla % 100 == 0:
            print(num_sla)
        num_sla += 1
        
    sla = layer.GetNextFeature()

poi.head()    

100
200
300
400
144.96778 -37.821670000000005
Unable to solve inside polygon
144.96778 -37.821670000000005
Unable to solve inside polygon


Unnamed: 0,poiID,poiName,poiTheme,poiLat,poiLon,poiURL,suburb
0,0,Arts Precinct,City precincts,-37.82167,144.96778,https://en.wikipedia.org/wiki/Melbourne_Arts_P...,Southbank
1,1,Docklands,City precincts,-37.817,144.946,"https://en.wikipedia.org/wiki/Docklands,_Victoria",Docklands
2,2,Government Precinct,City precincts,-37.8119,144.973,"https://en.wikipedia.org/wiki/Spring_Street,_M...",Melbourne
3,3,Little Italy,City precincts,-37.79972,144.96694,"https://en.wikipedia.org/wiki/Little_Italy,_Me...",Carlton
4,4,RMIT City,City precincts,-37.80778,144.96333,https://en.wikipedia.org/wiki/City_campus_of_t...,Melbourne


In [9]:
poi.to_csv(data_dir + 'poi-Melb-all-suburb.csv', index=False)

## Some statistics

In [10]:
print(len(poi))
c_theme = collections.Counter(poi['poiTheme'])
c_suburb = collections.Counter(poi['suburb'])
print(c_theme.most_common(10))
print(c_suburb.most_common(10))

88
[('Shopping', 17), ('Sports stadiums', 14), ('Parks and spaces', 14), ('Institutions', 12), ('City precincts', 8), ('Transport', 8), ('Structures', 8), ('Entertainment', 4), ('Public galleries', 3)]
[('Melbourne', 26), ('Southbank', 16), ('East Melbourne', 15), ('Docklands', 6), ('Parkville', 4), ('Carlton', 4), ('Albert Park', 4), ('Beaumaris', 1), ('', 1), ('Carlton North - Princes Hill', 1)]
