# Calculate distance
This notebook is used to calculate distance by cars between property and amenities (eg. public transport stations/nearest shopping center/schools/parks/Melbourne CBD)

This was achieved by self-hosting the web api to local host in docker.

First step was to download the australia open route map to be used for open route service.

Link to the map:
wget http://download.geofabrik.de/australia-oceania/australia-latest.osm.pbf

In [4]:
import pandas as pd
import numpy as np
import json
import requests
from ast import literal_eval
import time
import re
import sys
import os

In [156]:
import math

In [1]:
# read the files
domain_path = "../data/curated/listings_suburbs_SA2.csv"

In [153]:
domain_abs_path = "../data/curated/ABS_data/ABS_domain.csv"

In [154]:
domain_abs = pd.read_csv(domain_abs_path)
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Two persons in family,Three persons in family,Four persons in family,Five persons in family,Six or more persons in family,Separate house,Semi-detached house or Townhouse,Flat or Apartment,Non-residential properties,Total Suburb Dwellings
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,2445.0,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,2445.0,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,2445.0,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,2445.0,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,2445.0,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0


In [155]:
domain_abs['coordinate'] = domain_abs[['latitude', 'longitude']].apply(lambda x: [x['latitude'], x['longitude']], axis=1)
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Three persons in family,Four persons in family,Five persons in family,Six or more persons in family,Separate house,Semi-detached house or Townhouse,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]"
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]"
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]"
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]"
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,396.0,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]"


## Calculate distance to Melbourne CBD

#### Define a function to calculate distance to CBD

In [119]:
# osrm_url = "http://localhost:5000/ors/v2/directions"

In [106]:
# url to the local host
osrm_url = "http://localhost:8080/ors"

In [120]:
def cbd_distances(property_geo):
    '''
    This function is to calculate distance between two locations in km
    :param property_geo:
    :return distance between each property to CBD in km:
    '''
    # CBD coordinates
    cbd_geo = [-37.815207, 144.963937]

    journey = requests.get(f"{osrm_url}{property_geo[1]},{property_geo[0]};{cbd_geo[1]},{cbd_geo[0]}?steps=true&alternatives=true")


    # extract distance from trip information nested dictionary
    distance_meters = json.loads(journey.content)['routes'][0]['legs'][0]['distance']

    return distance_meters / 1000

#### Define a function to get a list of distance for the domain property data

In [121]:
def calculate_cbd_distance(domain_data):
    '''
    This function to get a list of distance from the domain property data
    :param domain_data:
    :return the distance between each coordinate:
    '''
    cbd_distances = []
    for i in domain_data['coordinate']:
        try:
            cbd_distances.append(cbd_distances(literal_eval(i)))
        except:
            cbd_distances.append(np.nan)
            continue

    return cbd_distances

In [122]:
cbd_distances = calculate_cbd_distance(domain_abs)

In [123]:
domain_abs.loc[:,'CBD_Distance'] = cbd_distances

### Define a function to calculate distance in km

In [178]:
# define the function to calculate the distance between two locations in km
def haversine_distance(lat1, lon1, lat2, lon2):
    '''
    This function is to calculate the distance in km
    :param lat1: latitude for the first location
    :param lon1: longitude for the first location
    :param lat2: latitude for the second location
    :param lon2: longitude for the second location
    :return the distance:
    '''
    radius = 6371.0
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)

    # calculate distance between geometory locations
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    # define a haversine function
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # calculation distance
    distance = radius * c

    return distance


#### calculate distance to melbourence cbd

In [158]:
melbourne_cbd = (-37.815207, 144.963937)

In [159]:
# calculate the distance to melbourne CBD
domain_abs['distance_to_melbourne_cbd_km'] = domain_abs['coordinate'].apply(lambda coord: haversine_distance(coord[0], coord[1], melbourne_cbd[0], melbourne_cbd[1]))


In [160]:
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Four persons in family,Five persons in family,Six or more persons in family,Separate house,Semi-detached house or Townhouse,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,100.0,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106


#### calculate km to shopping center

In [161]:
shopping_center_path = "../data/raw/shopping_centers.csv"

In [162]:
# read the shopping center file
shopping_center = pd.read_csv(shopping_center_path)
shopping_center.head()

Unnamed: 0,name,state,city,longitude,latitude,num_store,geometry,SA2_NAME,SA2_CODE
0,206 Bourke Street,Victoria,Melbourne,144.966947,-37.812733,14.0,POINT (144.966947 -37.812733),Melbourne CBD - East,206041503
1,670 Chapel,Victoria,Melbourne,144.996158,-37.837395,28.0,POINT (144.996158 -37.837395),South Yarra - North,206061515
2,Acland Court Shopping Centre,Victoria,St Kilda,144.980617,-37.868967,11.0,POINT (144.980617 -37.868967),St Kilda - West,206051514
3,Altona Gate Shopping Centre,Victoria,Melbourne,144.84627,-37.828989,63.0,POINT (144.84627 -37.828989),Altona North,213021343
4,Arena Shopping Centre,Victoria,Officer,145.435175,-38.064493,30.0,POINT (145.43517539621 -38.064493171914),Beaconsfield - Officer,212011546


In [163]:
# preprocess the geo location for the shopping center
shopping_center['coordinate'] = shopping_center[['latitude', 'longitude']].apply(lambda x: [x['latitude'], x['longitude']], axis=1)
shopping_center.head()

Unnamed: 0,name,state,city,longitude,latitude,num_store,geometry,SA2_NAME,SA2_CODE,coordinate
0,206 Bourke Street,Victoria,Melbourne,144.966947,-37.812733,14.0,POINT (144.966947 -37.812733),Melbourne CBD - East,206041503,"[-37.812733, 144.966947]"
1,670 Chapel,Victoria,Melbourne,144.996158,-37.837395,28.0,POINT (144.996158 -37.837395),South Yarra - North,206061515,"[-37.837395, 144.996158]"
2,Acland Court Shopping Centre,Victoria,St Kilda,144.980617,-37.868967,11.0,POINT (144.980617 -37.868967),St Kilda - West,206051514,"[-37.868967, 144.980617]"
3,Altona Gate Shopping Centre,Victoria,Melbourne,144.84627,-37.828989,63.0,POINT (144.84627 -37.828989),Altona North,213021343,"[-37.828989, 144.84627]"
4,Arena Shopping Centre,Victoria,Officer,145.435175,-38.064493,30.0,POINT (145.43517539621 -38.064493171914),Beaconsfield - Officer,212011546,"[-38.064493171914, 145.43517539621]"


In [164]:
def find_nearest_shopping_center(property_coord):
    '''
    this function is to get the distance to nearest shopping center
    :param property_coord:
    :return nearest distance to shopping center in km:
    '''
    min_distance = float('inf')
    for shop_coord in shopping_center['coordinate']:
        distance = haversine_distance(property_coord[0], property_coord[1], shop_coord[0], shop_coord[1])
        min_distance = min(min_distance, distance)
    return min_distance



In [165]:
# added the distance to the new column
domain_abs['nearest_shopping_center_distance_km'] = domain_abs['coordinate'].apply(find_nearest_shopping_center)

# print
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Five persons in family,Six or more persons in family,Separate house,Semi-detached house or Townhouse,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,6.0,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756


### calculate to nearest train station

In [181]:
import pandas as pd
import re

In [167]:
# load dataset
train_station_path = "../data/raw/train_stops.csv"
train_station = pd.read_csv(train_station_path)
train_station.head()

Unnamed: 0,TRAIN_STOP_ID,STOP_NAME,geometry,type
0,19970,Royal Park Railway Station (Parkville),POINT (144.95230120600002 -37.781192972999975),METRO_TRAIN
1,19971,Flemington Bridge Railway Station (North Melbo...,POINT (144.9393232120001 -37.78813998399994),METRO_TRAIN
2,19972,Macaulay Railway Station (North Melbourne),POINT (144.93616600400003 -37.79426700499994),METRO_TRAIN
3,19973,North Melbourne Railway Station (West Melbourne),POINT (144.94257002900008 -37.80741897399997),METRO_TRAIN
4,19974,Clifton Hill Railway Station (Clifton Hill),POINT (144.99541696000006 -37.78865703399998),METRO_TRAIN


In [185]:
# define a function to extract coordinates
def extract_coordinates(point_str):
    '''
    this function is to extract coordinates
    :param point_str:
    :return longitude and latitude:
    '''
    # regex to search
    match = re.search(r'\((-?\d+\.\d+)\s+(-?\d+\.\d+)\)', point_str)
    if match:
        longitude = float(match.group(2))
        latitude = float(match.group(1))
        return (longitude, latitude)
    else:
        return None

In [186]:

# add the geo location to a new column
train_station['extracted_coordinates'] = train_station['geometry'].apply(extract_coordinates)

# print
train_station.head()


Unnamed: 0,TRAIN_STOP_ID,STOP_NAME,geometry,type,extracted_coordinates
0,19970,Royal Park Railway Station (Parkville),POINT (144.95230120600002 -37.781192972999975),METRO_TRAIN,"(-37.781192972999975, 144.95230120600002)"
1,19971,Flemington Bridge Railway Station (North Melbo...,POINT (144.9393232120001 -37.78813998399994),METRO_TRAIN,"(-37.78813998399994, 144.9393232120001)"
2,19972,Macaulay Railway Station (North Melbourne),POINT (144.93616600400003 -37.79426700499994),METRO_TRAIN,"(-37.79426700499994, 144.93616600400003)"
3,19973,North Melbourne Railway Station (West Melbourne),POINT (144.94257002900008 -37.80741897399997),METRO_TRAIN,"(-37.80741897399997, 144.94257002900008)"
4,19974,Clifton Hill Railway Station (Clifton Hill),POINT (144.99541696000006 -37.78865703399998),METRO_TRAIN,"(-37.78865703399998, 144.99541696000006)"


In [187]:
def find_nearest_train_statin(property_coord):
    '''
    this function is to find the nearest distance to the train station
    :param property_coord:
    :return nearest distance :
    '''
    min_distance = float('inf')
    for train_coord in train_station['extracted_coordinates']:
        distance = haversine_distance(property_coord[0], property_coord[1], train_coord[0], train_coord[1])
        min_distance = min(min_distance, distance)
    return min_distance

In [188]:
# add the distance to the new column
domain_abs['nearest_train_station_distance_km'] = domain_abs['coordinate'].apply(find_nearest_train_statin)

# print
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Semi-detached house or Townhouse,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_park_distance_km,nearest_tram_station_distance_km,nearest_train_station_distance_km
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,21.0,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,29.641073,0.135415,0.370453
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,21.0,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,29.661964,0.070096,0.273663
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,21.0,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,29.706333,0.114239,0.336648
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,21.0,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,29.054073,0.036919,0.613539
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,21.0,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,29.415873,0.120055,0.324232


### Calculate distance to tram stop

In [172]:
# load the data
tram_station_path = "../data/raw/tram_stops.csv"
tram_station = pd.read_csv(tram_station_path)
tram_station.head()

Unnamed: 0,TRAM_STOP_ID,LATITUDE,STOP_NAME,LONGITUDE,TICKETZONE,ROUTES_USING_STOP,geometry
0,18730,-37.744359,134-Merribell Ave/Nicholson St (Coburg),144.977728,1,1,POINT (144.97772797000005 -37.74435902699997)
1,18732,-37.811375,44-Deepdene Park/Whitehorse Rd (Balwyn),145.068671,1,109,POINT (145.06867116800004 -37.81137501999996)
2,18733,-37.81175,45-Hardwicke St/Whitehorse Rd (Balwyn),145.071785,1,109,POINT (145.071785169 -37.811750049999944)
3,18734,-37.812242,46-Balwyn Cinema/Whitehorse Rd (Balwyn),145.07593,1,109,POINT (145.0759298480001 -37.81224199099995)
4,18735,-37.812919,47-Balwyn Rd/Whitehorse Rd (Balwyn),145.081524,12,109,POINT (145.08152388700012 -37.81291897099993)


In [175]:
# get the coordinate
tram_station['coordinate'] = tram_station[['LATITUDE', 'LONGITUDE']].apply(lambda x: [x['LATITUDE'], x['LONGITUDE']], axis=1)
tram_station.head()

Unnamed: 0,TRAM_STOP_ID,LATITUDE,STOP_NAME,LONGITUDE,TICKETZONE,ROUTES_USING_STOP,geometry,coordinate
0,18730,-37.744359,134-Merribell Ave/Nicholson St (Coburg),144.977728,1,1,POINT (144.97772797000005 -37.74435902699997),"[-37.744359, 144.977728]"
1,18732,-37.811375,44-Deepdene Park/Whitehorse Rd (Balwyn),145.068671,1,109,POINT (145.06867116800004 -37.81137501999996),"[-37.811375, 145.068671]"
2,18733,-37.81175,45-Hardwicke St/Whitehorse Rd (Balwyn),145.071785,1,109,POINT (145.071785169 -37.811750049999944),"[-37.81175, 145.071785]"
3,18734,-37.812242,46-Balwyn Cinema/Whitehorse Rd (Balwyn),145.07593,1,109,POINT (145.0759298480001 -37.81224199099995),"[-37.812242, 145.07593]"
4,18735,-37.812919,47-Balwyn Rd/Whitehorse Rd (Balwyn),145.081524,12,109,POINT (145.08152388700012 -37.81291897099993),"[-37.812919, 145.081524]"


In [179]:
def find_nearest_tram_statin(property_coord):
    '''
    This function is to calculate the distance to the nearest tram station
    :param property_coord:
    :return nearest distance to the tram station in km:
    '''
    min_distance = float('inf')
    for tram_coord in tram_station['coordinate']:
        distance = haversine_distance(property_coord[0], property_coord[1], tram_coord[0], tram_coord[1])
        min_distance = min(min_distance, distance)
    return min_distance

In [180]:
# add the distance to the new column
domain_abs['nearest_tram_station_distance_km'] = domain_abs['coordinate'].apply(find_nearest_tram_statin)

# print
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Separate house,Semi-detached house or Townhouse,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_park_distance_km,nearest_tram_station_distance_km
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,14.0,21.0,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,29.641073,0.135415
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,14.0,21.0,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,29.661964,0.070096
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,14.0,21.0,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,29.706333,0.114239
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,14.0,21.0,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,29.054073,0.036919
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,14.0,21.0,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,29.415873,0.120055


### Calculate distance to bus stop

In [189]:
# load the data
bus_station_path = "../data/raw/bus_stops.csv"
bus_station = pd.read_csv(bus_station_path)
bus_station.head()

Unnamed: 0,BUS_STOP_ID,STOP_NAME,geometry,type
0,23195,Andrew St/Union St (Kilmore),POINT (144.95227992500008 -37.29049199899998),REGIONAL_BUS
1,23197,Wellington Square SC/Queen St (Wallan),POINT (144.9780188630001 -37.41299000999993),REGIONAL_BUS
2,23202,Aquamoves/Tom Collins Dr (Shepparton),POINT (145.3944449050001 -36.38602702399993),REGIONAL_BUS
3,23224,West End Caravan Park/Murray Valley Hwy (Yarra...,POINT (145.9770469550001 -36.02217601499996),REGIONAL_BUS
4,23225,Jane Rd/Fiona Dr (Yarrawonga),POINT (146.0259579330001 -36.006706975999975),REGIONAL_BUS


In [190]:
# add the distance to the new column

bus_station['extracted_coordinates'] = bus_station['geometry'].apply(extract_coordinates)

# print
bus_station.head()

Unnamed: 0,BUS_STOP_ID,STOP_NAME,geometry,type,extracted_coordinates
0,23195,Andrew St/Union St (Kilmore),POINT (144.95227992500008 -37.29049199899998),REGIONAL_BUS,"(-37.29049199899998, 144.95227992500008)"
1,23197,Wellington Square SC/Queen St (Wallan),POINT (144.9780188630001 -37.41299000999993),REGIONAL_BUS,"(-37.41299000999993, 144.9780188630001)"
2,23202,Aquamoves/Tom Collins Dr (Shepparton),POINT (145.3944449050001 -36.38602702399993),REGIONAL_BUS,"(-36.38602702399993, 145.3944449050001)"
3,23224,West End Caravan Park/Murray Valley Hwy (Yarra...,POINT (145.9770469550001 -36.02217601499996),REGIONAL_BUS,"(-36.02217601499996, 145.9770469550001)"
4,23225,Jane Rd/Fiona Dr (Yarrawonga),POINT (146.0259579330001 -36.006706975999975),REGIONAL_BUS,"(-36.006706975999975, 146.0259579330001)"


In [191]:
def find_nearest_bus_stop(property_coord):
    '''
    this function is to find the nearest distance to the bus stop
    :param property_coord:
    :return the nearest distance in km:
    '''
    min_distance = float('inf')
    for bus_coord in bus_station['extracted_coordinates']:
        distance = haversine_distance(property_coord[0], property_coord[1], bus_coord[0], bus_coord[1])
        min_distance = min(min_distance, distance)
    return min_distance


In [192]:
# add the distance to the new column
domain_abs['nearest_bus_stop_distance_km'] = domain_abs['coordinate'].apply(find_nearest_bus_stop)

# print
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_park_distance_km,nearest_tram_station_distance_km,nearest_train_station_distance_km,nearest_bus_stop_distance_km
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,29.641073,0.135415,0.370453,0.135597
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,29.661964,0.070096,0.273663,0.087685
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,29.706333,0.114239,0.336648,0.126732
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,29.054073,0.036919,0.613539,0.112485
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,29.415873,0.120055,0.324232,0.080214


### calculate distance to park

In [168]:
# load the dataset
park_path = "../data/raw/finally_park.csv"
park_df = pd.read_csv(park_path)
park_df.head()

Unnamed: 0,name,latitude,longitude,address,area,suburb_name
0,Brookvale Close Reserve,-38.057068,145.368004,50-52I Brookvale Close BEACONSFIELD,3819.0,BEACONSFIELD
1,Ryelands Drive and Hillcrest Avenue Tree Reserve,-38.01729,145.318246,Ryelands Drive BERWICK,4679.0,BERWICK
2,Hinrichsen Drive Reserve,-38.01424,145.283707,127-129 Hinrichsen Drive HALLAM,1403.0,HALLAM
3,Lawson Way Reserve,-37.976977,145.266181,78-92I Heatherton Road ENDEAVOUR HILLS,11524.0,ENDEAVOUR HILLS
4,Western Way Tree Reserve,-38.022186,145.296446,401I Princes Highway NARRE WARREN,8534.0,NARRE WARREN


In [169]:
# get the coodinate
park_df['coordinate'] = park_df[['latitude', 'longitude']].apply(lambda x: [x['latitude'], x['longitude']], axis=1)
park_df.head()

Unnamed: 0,name,latitude,longitude,address,area,suburb_name,coordinate
0,Brookvale Close Reserve,-38.057068,145.368004,50-52I Brookvale Close BEACONSFIELD,3819.0,BEACONSFIELD,"[-38.05706828292311, 145.368004149094]"
1,Ryelands Drive and Hillcrest Avenue Tree Reserve,-38.01729,145.318246,Ryelands Drive BERWICK,4679.0,BERWICK,"[-38.0172903001506, 145.31824611567]"
2,Hinrichsen Drive Reserve,-38.01424,145.283707,127-129 Hinrichsen Drive HALLAM,1403.0,HALLAM,"[-38.0142400895596, 145.283707218519]"
3,Lawson Way Reserve,-37.976977,145.266181,78-92I Heatherton Road ENDEAVOUR HILLS,11524.0,ENDEAVOUR HILLS,"[-37.9769772942694, 145.26618127186]"
4,Western Way Tree Reserve,-38.022186,145.296446,401I Princes Highway NARRE WARREN,8534.0,NARRE WARREN,"[-38.0221864905952, 145.29644586981]"


In [170]:
def find_nearest_park(property_coord):
    '''
    this function is to get the distance to the nearest park
    :param property_coord:
    :return minimal distance in km:
    '''
    min_distance = float('inf')
    for park_coord in park_df['coordinate']:
        distance = haversine_distance(property_coord[0], property_coord[1], park_coord[0], park_coord[1])
        min_distance = min(min_distance, distance)
    return min_distance

In [171]:
# add the distance to the new column
domain_abs['nearest_park_distance_km'] = domain_abs['coordinate'].apply(find_nearest_park)

# print
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Six or more persons in family,Separate house,Semi-detached house or Townhouse,Flat or Apartment,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_park_distance_km
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,29.641073
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,29.661964
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,29.706333
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,29.054073
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,6.0,14.0,21.0,13278.0,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,29.415873


In [None]:
print(domain_abs)

#### Calculate distance to schools

In [5]:
# load the dataset
school_path = "../data/raw/finally_school.csv"
school_df = pd.read_csv(school_path)
school_df.head()

Unnamed: 0,Education_Sector,School_Name,School_Type,Address_Line_1,Address_Line_2,Address_Town,Address_State,Address_Postcode,X,Y
0,Government,Alberton Primary School,Primary,21 Thomson Street,,Alberton,VIC,3971,146.6666,-38.61771
1,Government,Allansford and District Primary School,Primary,Frank Street,,Allansford,VIC,3277,142.59039,-38.38628
2,Government,Avoca Primary School,Primary,118 Barnett Street,,Avoca,VIC,3467,143.47565,-37.0845
3,Government,Avenel Primary School,Primary,40 Anderson Street,,Avenel,VIC,3664,145.23472,-36.90137
4,Government,Warrandyte Primary School,Primary,5-11 Forbes Street,,Warrandyte,VIC,3113,145.21398,-37.74268


In [6]:
# get the coordinate
school_df['coordinate'] = school_df[['Y', 'X']].apply(lambda x: [x['Y'], x['X']], axis=1)
school_df.head()

Unnamed: 0,Education_Sector,School_Name,School_Type,Address_Line_1,Address_Line_2,Address_Town,Address_State,Address_Postcode,X,Y,coordinate
0,Government,Alberton Primary School,Primary,21 Thomson Street,,Alberton,VIC,3971,146.6666,-38.61771,"[-38.61771, 146.6666]"
1,Government,Allansford and District Primary School,Primary,Frank Street,,Allansford,VIC,3277,142.59039,-38.38628,"[-38.38628, 142.59039]"
2,Government,Avoca Primary School,Primary,118 Barnett Street,,Avoca,VIC,3467,143.47565,-37.0845,"[-37.0845, 143.47565]"
3,Government,Avenel Primary School,Primary,40 Anderson Street,,Avenel,VIC,3664,145.23472,-36.90137,"[-36.90137, 145.23472]"
4,Government,Warrandyte Primary School,Primary,5-11 Forbes Street,,Warrandyte,VIC,3113,145.21398,-37.74268,"[-37.74268, 145.21398]"


In [7]:
school_df.to_csv('../data/raw/school_geo.csv')

In [196]:
def find_nearest_school(property_coord):
    '''
    this function is to get the distance to the nearest school
    :param property_coord:
    :return minimal distance in km:
    '''
    min_distance = float('inf')
    for school_coord in school_df['coordinate']:
        distance = haversine_distance(property_coord[0], property_coord[1], school_coord[0], school_coord[1])
        min_distance = min(min_distance, distance)
    return min_distance


In [197]:
# add the distance to the new column
domain_abs['nearest_school_distance_km'] = domain_abs['coordinate'].apply(find_nearest_school)

# print
domain_abs.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Non-residential properties,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_park_distance_km,nearest_tram_station_distance_km,nearest_train_station_distance_km,nearest_bus_stop_distance_km,nearest_school_distance_km
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,123.0,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,29.641073,0.135415,0.370453,0.135597,0.239541
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,123.0,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,29.661964,0.070096,0.273663,0.087685,0.296386
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,123.0,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,29.706333,0.114239,0.336648,0.126732,0.193919
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,123.0,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,29.054073,0.036919,0.613539,0.112485,0.356671
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,123.0,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,29.415873,0.120055,0.324232,0.080214,0.242857


#### Save the file

In [138]:
# domain_abs.to_csv('../data/raw/tmp_with_cbd_dist.csv')

In [198]:
domain_abs.to_csv('../data/raw/property_to_dist.csv')

### Join with income

In [205]:
# load the income data
income_path = "../data/raw/ABS_data/income_suburb.csv"
income_df = pd.read_csv(income_path)
income_df.head()

Unnamed: 0,suburb,Weekly Income($)
0,Canterbury,2352.0
1,Park Orchards,2329.0
2,Wonga Park,2221.0
3,Brighton,2200.0
4,Camberwell,2122.0


In [206]:
income_df = income_df.rename(columns={'suburb': 'SAL_NAME21'})
income_df.head()

Unnamed: 0,SAL_NAME21,Weekly Income($)
0,Canterbury,2352.0
1,Park Orchards,2329.0
2,Wonga Park,2221.0
3,Brighton,2200.0
4,Camberwell,2122.0


In [207]:
final_df = pd.merge(domain_abs, income_df, on='SAL_NAME21', how='outer')
final_df.head()

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,Total Suburb Dwellings,coordinate,distance_to_melbourne_cbd_km,nearest_shopping_center_distance_km,nearest_park_distance_km,nearest_tram_station_distance_km,nearest_train_station_distance_km,nearest_bus_stop_distance_km,nearest_school_distance_km,Weekly Income($)
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,13443.0,"[-37.8150001, 144.9538708]",0.884547,0.202829,29.641073,0.135415,0.370453,0.135597,0.239541,994.0
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,13443.0,"[-37.815781, 144.9529156]",0.970252,0.082822,29.661964,0.070096,0.273663,0.087685,0.296386,994.0
2,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,13443.0,"[-37.8144537, 144.9534426]",0.925663,0.226635,29.706333,0.114239,0.336648,0.126732,0.193919,994.0
3,521/422 Collins St Melbourne VIC 3000,Apartment / Unit / Flat,$500 weekly,18.0,28.0,39.0,VIC,1.0,1.0,0.0,...,13443.0,"[-37.8170971, 144.9601487]",0.393584,0.340323,29.054073,0.036919,0.613539,0.112485,0.356671,994.0
4,603/199 William Street Melbourne VIC 3000,Apartment / Unit / Flat,$700,22.0,29.0,24.0,VIC,2.0,2.0,0.0,...,13443.0,"[-37.8145716, 144.9573479]",0.583106,0.474756,29.415873,0.120055,0.324232,0.080214,0.242857,994.0


### Save the file


In [208]:
final_df.to_csv('../data/raw/final_data_1.csv')
