# Complete geocode

In [138]:
# imports
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import glob
import json
import datetime

%matplotlib inline
plt.rcParams['figure.figsize'] = (40, 20)

In [139]:
# geocode script import
import sys
sys.path.insert(0, '../python')
import geocode

## Test

In [141]:
# load test csv segment
test_csv = pd.read_csv('../combined_data/sydney_sales_combined.csv')
test_csv = test_csv[279:301]

test_total_address = test_csv.ADDRESS.unique()

print "Test total number of addresses:" + str(len(test_total_address))

test_csv.head(1)

Test total number of addresses:21


Unnamed: 0.1,Unnamed: 0,ADDRESS,SALE PRICE,SALE DATE,AREA,STRATA/NON STRATA,MULTI-PROPERTY SALE (Y/N),PROPERTY NUMBER,DEALING NUMBER,EXTRACTION DATE,SUBURB,SALE DAY,SALE MONTH,SALE YEAR
279,291,"154 UNWINS BRIDGE ROAD, ST PETERS NSW 2044",710000,2012-06-01,240.3,NON STRATA,No,1969069,AH125573,30 April 2017,St Peters,1,6,2012


## Real CSV

In [143]:
# load real csv in full
combined_csv = pd.read_csv('../combined_data/sydney_sales_combined.csv')

total_address = combined_csv.ADDRESS
total_unique_address = combined_csv.ADDRESS.unique()

print "Total number of addresses:" + str(len(total_address))
print "Total number of unique addresses:" + str(len(total_unique_address))

combined_csv.head(1)

Total number of addresses:41955
Total number of unique addresses:37437


Unnamed: 0.1,Unnamed: 0,ADDRESS,SALE PRICE,SALE DATE,AREA,STRATA/NON STRATA,MULTI-PROPERTY SALE (Y/N),PROPERTY NUMBER,DEALING NUMBER,EXTRACTION DATE,SUBURB,SALE DAY,SALE MONTH,SALE YEAR
0,0,"45 ALBERT STREET, ST PETERS NSW 2044",1081625,2016-04-14,309.9,NON STRATA,No,1942072,AK853538,30 April 2017,St Peters,14,4,2016


In [144]:
# function returns address separated into unit and street address

def break_address(row):
    a = row.ADDRESS.split('/')
    if len(a)==1:
        return pd.Series({'unit': None, 'street_address': a[0]})    
    return pd.Series({'unit': a[0], 'street_address': a[1]})

In [145]:
# get list of addresses

address_list = combined_csv.apply(break_address, axis=1)
address_list.head(1)

Unnamed: 0,street_address,unit
0,"45 ALBERT STREET, ST PETERS NSW 2044",


In [146]:
# get unigue addresses
unique_address_list = address_list.drop_duplicates(subset=["street_address"])
unique_address_list = unique_address_list.drop('unit', axis=1)

print "unique number of addresses: " + str(len(unique_address_list))

unique_address_list.head(6)

unique number of addresses: 10321


Unnamed: 0,street_address
0,"45 ALBERT STREET, ST PETERS NSW 2044"
1,"41 ALBERT STREET, ST PETERS NSW 2044"
2,"37 ALBERT STREET, ST PETERS NSW 2044"
3,"35 ALBERT STREET, ST PETERS NSW 2044"
4,"33 ALBERT STREET, ST PETERS NSW 2044"
5,"31 ALBERT STREET, ST PETERS NSW 2044"


### List reduction stats
Started with {{len(total_address)}} and reduced it to {{len(unique_address_list)}}

### Geocoding operation

In [147]:
# geocode and separate data definition

def geocode_save(row):
    
    address = row.street_address
    
    try:
        geo_address = geocode.geocode(address)

        location = geo_address["results"][0]["geometry"]["location"]
        components = geo_address["results"][0]["address_components"]
        viewport = geo_address["results"][0]["geometry"]["viewport"]
    
    
        return pd.Series({"lat": location["lat"],
                          "lng": location["lng"],
                          "street_number": components[0]["short_name"],
                          "street_name": components[1]["short_name"],
                          "g_suburb": components[2]["short_name"],
                          "council_short": components[3]["short_name"],
                          "g_postcode": components[6]["short_name"],
                          "gmaps_id": geo_address["results"][0]["place_id"],
                          "view NE lat": viewport["northeast"]["lat"],
                          "view NE lng": viewport["northeast"]["lng"],
                          "view SW lat": viewport["southwest"]["lat"],
                          "view SW lng": viewport["southwest"]["lng"],
                          "formatted_address": geo_address["results"][0]["formatted_address"],
                          "location_type": geo_address["results"][0]["geometry"]["location_type"],
                          "address_type": geo_address["results"][0]["types"][0]
                          })
    
    except Exception as e:
        message = "Error on: {}\n{}".format(address, e)
        print message

In [148]:
# set timestamp

timestamp = '{:%Y-%m-%d_%H-%M-%S}'.format(datetime.datetime.now())
print timestamp

# geocode and save
geocoded_address_list = unique_address_list.apply(geocode_save, axis=1)
geocoded_address_list['street_address'] = unique_address_list['street_address'] 

geocoded_address_list.to_csv('../geocoded_data/geocoded_address_list_{}.csv'.format(timestamp))

geocoded_address_list.head(1)

Timestamp: 2017-05-29_23-00-18
Error on: 93 SUTHERLAND STREET, ST PETERS NSW 2044
HTTPSConnectionPool(host='maps.googleapis.com', port=443): Max retries exceeded with url: /maps/api/geocode/json?address=93+SUTHERLAND+STREET,+ST+PETERS+NSW+2044&key=AIzaSyCJ9wJjgzrnrtOnZ0Cbl3Wjzwy5szKv4Zg (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f9261e69650>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))
Error on: 45 APPLEBEE STREET, ST PETERS NSW 2044
HTTPSConnectionPool(host='maps.googleapis.com', port=443): Max retries exceeded with url: /maps/api/geocode/json?address=45+APPLEBEE+STREET,+ST+PETERS+NSW+2044&key=AIzaSyCJ9wJjgzrnrtOnZ0Cbl3Wjzwy5szKv4Zg (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f92650ad350>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))
Error on: 33 PARK STREET, ERSKINEVILLE 

Unnamed: 0,address_type,council_short,formatted_address,g_postcode,g_suburb,gmaps_id,lat,lng,location_type,street_name,street_number,view NE lat,view NE lng,view SW lat,view SW lng,street_address
0,street_address,Marrickville,"45 Albert St, St Peters NSW 2044, Australia",2044,St Peters,ChIJl5Rr3VGwEmsRcE6JlBkOIg4,-33.912874,151.179515,ROOFTOP,Albert St,45,-33.911525,151.180864,-33.914223,151.178166,"45 ALBERT STREET, ST PETERS NSW 2044"
