# Calculate closest street to geopoint using GNAF

### Notes:
- Finds closest property to a geopoint and returns the name and type of street/road etc.
- Should be fairly accurate, assuming crashes occur on roads.
- Uses GNAF May 2018 release.

## Import relevant modules

In [1]:
import pandas
import datetime
import numpy
import math
from scipy import spatial
import random
from multiprocessing import Pool

## Import Paths

In [2]:
gnaf_path = "/Users/danielcorcoran/Desktop/GNAF VIC MAY18/OUTPUT/GNAF_MERGED_SUBSET.csv"

In [3]:
nodes_dpc = "/Users/danielcorcoran/Desktop/Tableau (mac)/Tableau Public Projects/Vic Crash Stats (Extracted 4-5-2018)/Vicroads Data/NODES_DPC.csv"

## Create Dataframes

In [4]:
gnaf_data = pandas.read_csv(gnaf_path)
gnaf_data.columns

Index(['ROW_INDEX', 'ADDRESS_DETAIL_PID', 'POSTCODE', 'ADDRESS_SITE_PID',
       'STREET_LOCALITY_PID', 'GNAF_PROPERTY_PID',
       'ADDRESS_DEFAULT_GEOCODE_PID', 'LONGITUDE', 'LATITUDE', 'STREET_NAME',
       'STREET_TYPE_CODE'],
      dtype='object')

In [5]:
node_data = pandas.read_csv(nodes_dpc)
node_data.columns

Index(['NODE_ROW_INDEX', 'ACCIDENT_NO', 'NODE_ID', 'NODE_TYPE', 'AMG_X',
       'AMG_Y', 'LGA_NAME', 'Lga Name All', 'Region Name', 'Deg Urban Name',
       'Lat', 'Long', 'Postcode No', 'node_row_index', 'polygon_index',
       'lga_code', 'lga_name'],
      dtype='object')

## Create comparison points list

In [6]:
comparison_points = []

for index in range(len(gnaf_data)):
    
    comparison_points.append((gnaf_data.loc[index,"LONGITUDE"], gnaf_data.loc[index, "LATITUDE"]))
    
len(comparison_points)

3714347

In [10]:
comparison_points = numpy.array(comparison_points)
base_points = numpy.array(base_points)

## Create base points list

In [7]:
base_points = []

for index in range(len(node_data)):
    
    base_points.append((node_data.loc[index, "Long"],node_data.loc[index,"Lat"]))
    
len(base_points)

163913

## Create Spatial KDTree Object

In [12]:
import sys


In [15]:
sys.setrecursionlimit(10000)

In [16]:
tree = spatial.KDTree(comparison_points)

## Function to return shortest distance and index using KDTree defined above

In [17]:
def return_distance_index_closest_point(point):
    
    distance_index_tuple = tree.query(point)
    return distance_index_tuple

## Run and time process

In [19]:
start_time  = datetime.datetime.now() 

pool = Pool(6)
results = list(pool.map(return_distance_index_closest_point, base_points))

print("\n\n\n\n", datetime.datetime.now() - start_time)





 0:00:17.192224


In [21]:
len(results) == len(base_points)

True

In [22]:
results

[(0.00033405388787777364, 748115),
 (0.0010043843168738437, 3316705),
 (0.000253980319317985, 69424),
 (0.0007741779387959148, 1672264),
 (0.0001763413309555592, 3507205),
 (0.0002667852583984449, 1208074),
 (0.0004530604053607204, 3473685),
 (0.0003596845840522413, 1648621),
 (0.0006312162862255983, 179735),
 (0.00016835973390553395, 1576722),
 (0.00033924917096094415, 482029),
 (0.00043603784239101714, 2122716),
 (0.0006865801479768095, 1440078),
 (0.0004789411789671457, 2383824),
 (0.0004549767154329235, 294289),
 (0.00015885353789713897, 2349470),
 (0.0008432429617387178, 125669),
 (0.0009462314727217048, 940554),
 (0.0003677839039432309, 2724762),
 (0.0006882503882315602, 3331052),
 (0.0002901465147370874, 3000006),
 (0.0002762477686698246, 123311),
 (0.0005408380423877457, 2224170),
 (0.0186621207356644, 3397378),
 (0.001174192536746108, 1222084),
 (0.0003166654385913898, 663089),
 (0.00022374629138193958, 3473201),
 (0.00026183399418335787, 2709740),
 (0.0005316780489231778, 344

In [23]:
node_data2 = pandas.concat([node_data, pandas.Series(results).rename("script_results")], axis = 1)

In [24]:
node_data2

Unnamed: 0,NODE_ROW_INDEX,ACCIDENT_NO,NODE_ID,NODE_TYPE,AMG_X,AMG_Y,LGA_NAME,Lga Name All,Region Name,Deg Urban Name,Lat,Long,Postcode No,node_row_index,polygon_index,lga_code,lga_name,script_results
0,0,T20060000010,43078,I,2519154.655,2390265.154,DANDENONG,DANDENONG,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-37.98862,145.21805,3175,0,25,22670,Greater Dandenong (C),"(0.00033405388787777364, 748115)"
1,1,T20060000018,29720,N,2524272.743,2389996.817,CASEY,CASEY,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-37.99092,145.27632,3804,1,13,21610,Casey (C),"(0.0010043843168738437, 3316705)"
2,2,T20060000022,203074,N,2487321.693,2345019.925,MORNINGTON PENINSULA,MORNINGTON PENINSULA,METROPOLITAN SOUTH EAST REGION,RURAL_VICTORIA,-38.39632,144.85489,3939,2,52,25340,Mornington Peninsula (S),"(0.000253980319317985, 69424)"
3,3,T20060000023,55462,I,2512734.120,2390214.959,DANDENONG,"KINGSTON,DANDENONG",METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-37.98918,145.14496,3173,3,34,23430,Kingston (C) (Vic.),"(0.0007741779387959148, 1672264)"
4,4,T20060000026,202988,N,2488777.662,2347612.069,MORNINGTON PENINSULA,MORNINGTON PENINSULA,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-38.37299,144.87159,3940,4,52,25340,Mornington Peninsula (S),"(0.0001763413309555592, 3507205)"
5,5,T20060000028,277431,I,2508117.147,2363427.424,MORNINGTON PENINSULA,MORNINGTON PENINSULA,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-38.23056,145.09270,3931,5,52,25340,Mornington Peninsula (S),"(0.0002667852583984449, 1208074)"
6,6,T20060000035,203045,N,2579241.745,2372065.357,BAW BAW,BAW BAW,EASTERN REGION,RURAL_VICTORIA,-38.14939,145.90400,3820,6,5,20830,Baw Baw (S),"(0.0004530604053607204, 3473685)"
7,7,T20060000042,203047,N,2514150.699,2371925.888,FRANKSTON,FRANKSTON,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-38.15393,145.16144,3199,7,19,22170,Frankston (C),"(0.0003596845840522413, 1648621)"
8,8,T20060000044,35621,I,2519124.787,2388749.445,DANDENONG,DANDENONG,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-38.00228,145.21775,3175,8,25,22670,Greater Dandenong (C),"(0.0006312162862255983, 179735)"
9,9,T20060000046,205206,N,2511128.875,2377532.927,FRANKSTON,FRANKSTON,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-38.10346,145.12688,3198,9,19,22170,Frankston (C),"(0.00016835973390553395, 1576722)"


In [25]:
results_dataframe = node_data2["script_results"].apply(pandas.Series)

In [27]:
results_dataframe.head()

Unnamed: 0,0,1
0,0.000334,748115.0
1,0.001004,3316705.0
2,0.000254,69424.0
3,0.000774,1672264.0
4,0.000176,3507205.0


In [28]:
results_dataframe.columns = ["Distance", "Gindex"]

In [29]:
node_data_final = pandas.concat([node_data2, results_dataframe], axis = 1 )
node_data_final.head()

Unnamed: 0,NODE_ROW_INDEX,ACCIDENT_NO,NODE_ID,NODE_TYPE,AMG_X,AMG_Y,LGA_NAME,Lga Name All,Region Name,Deg Urban Name,Lat,Long,Postcode No,node_row_index,polygon_index,lga_code,lga_name,script_results,Distance,Gindex
0,0,T20060000010,43078,I,2519154.655,2390265.154,DANDENONG,DANDENONG,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-37.98862,145.21805,3175,0,25,22670,Greater Dandenong (C),"(0.00033405388787777364, 748115)",0.000334,748115.0
1,1,T20060000018,29720,N,2524272.743,2389996.817,CASEY,CASEY,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-37.99092,145.27632,3804,1,13,21610,Casey (C),"(0.0010043843168738437, 3316705)",0.001004,3316705.0
2,2,T20060000022,203074,N,2487321.693,2345019.925,MORNINGTON PENINSULA,MORNINGTON PENINSULA,METROPOLITAN SOUTH EAST REGION,RURAL_VICTORIA,-38.39632,144.85489,3939,2,52,25340,Mornington Peninsula (S),"(0.000253980319317985, 69424)",0.000254,69424.0
3,3,T20060000023,55462,I,2512734.12,2390214.959,DANDENONG,"KINGSTON,DANDENONG",METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-37.98918,145.14496,3173,3,34,23430,Kingston (C) (Vic.),"(0.0007741779387959148, 1672264)",0.000774,1672264.0
4,4,T20060000026,202988,N,2488777.662,2347612.069,MORNINGTON PENINSULA,MORNINGTON PENINSULA,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,-38.37299,144.87159,3940,4,52,25340,Mornington Peninsula (S),"(0.0001763413309555592, 3507205)",0.000176,3507205.0


In [30]:
final_data = node_data_final.merge(right = gnaf_data,
                                  how = "left",
                                  left_on = "Gindex",
                                  right_on = "ROW_INDEX")

In [31]:
final_data.head()

Unnamed: 0,NODE_ROW_INDEX,ACCIDENT_NO,NODE_ID,NODE_TYPE,AMG_X,AMG_Y,LGA_NAME,Lga Name All,Region Name,Deg Urban Name,...,ADDRESS_DETAIL_PID,POSTCODE,ADDRESS_SITE_PID,STREET_LOCALITY_PID,GNAF_PROPERTY_PID,ADDRESS_DEFAULT_GEOCODE_PID,LONGITUDE,LATITUDE,STREET_NAME,STREET_TYPE_CODE
0,0,T20060000010,43078,I,2519154.655,2390265.154,DANDENONG,DANDENONG,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,...,GAVIC421916814,3175,422053715,VIC1958844,578458.0,1278932,145.217716,-37.988626,FOSTER,STREET
1,1,T20060000018,29720,N,2524272.743,2389996.817,CASEY,CASEY,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,...,GAVIC423900268,3804,423990899,VIC2056738,208310432.0,4184146,145.277266,-37.991259,WEST VISTA,PLACE
2,2,T20060000022,203074,N,2487321.693,2345019.925,MORNINGTON PENINSULA,MORNINGTON PENINSULA,METROPOLITAN SOUTH EAST REGION,RURAL_VICTORIA,...,GAVIC423134249,3939,423237624,VIC1930473,1040683.0,4406982,144.854947,-38.396567,BROWNS,ROAD
3,3,T20060000023,55462,I,2512734.12,2390214.959,DANDENONG,"KINGSTON,DANDENONG",METROPOLITAN SOUTH EAST REGION,MELB_URBAN,...,GAVIC424971689,3173,425057037,VIC2070403,2544.0,2468987,145.145732,-37.989122,SPRINGVALE,ROAD
4,4,T20060000026,202988,N,2488777.662,2347612.069,MORNINGTON PENINSULA,MORNINGTON PENINSULA,METROPOLITAN SOUTH EAST REGION,MELB_URBAN,...,GAVIC719219660,3940,714604693,VIC3619977,220275836.0,3007415064,144.871415,-38.373015,ELIZABETH,AVENUE


In [32]:
final_data.shape

(163913, 31)

In [33]:
final_data.to_csv("/users/danielcorcoran/Desktop/NODES_DPC_2.csv",index = False)