### Import Modules

In [None]:
# Shapefile reading and manipulation

import fiona
from shapely.geometry import shape, Point
from shapely import speedups
speedups.enable()

# Data manipulation
import pandas
import numpy

### Set paths

In [None]:
points_path = "/users/danielcorcoran/desktop/DimAddress.csv"
lga_shapefile = "/users/danielcorcoran/desktop/github_repos/python_nb_data_spatial/data/LGA_ABS_16/LGA_2016_AUST.shp"
meshblock_shapefile = "/users/danielcorcoran/desktop/github_repos/python_nb_data_spatial/data/1270055001_mb_2016_vic_shape/MB_2016_VIC.shp"

### Import data and preview

In [None]:
data = pandas.read_csv(points_path, sep = "\t")

In [None]:
data.head()

In [None]:
'''
Create test data
numpy.random.seed(0)

row_count = 20000

data = pandas.concat([pandas.Series([numpy.random.uniform(140,150) for n in range(row_count)]), 
                      pandas.Series([numpy.random.uniform(-32,-40) for n in range(row_count)])], 
                     axis = 1)

data.columns = ['longitude', 'latitude']
data.head()
'''

In [None]:
print('Checking data shape:\n{}'.format(data.shape))

In [None]:
print('Checking data headers:\n{}'.format(list(data.columns)))

In [None]:
long_name = 'Longitude'
lat_name = 'Latitude'

### Create list of polygons containing Victoria only from LGA shapefile

In [None]:
all_polygons = [polygon for polygon in fiona.open(lga_shapefile)]

In [None]:
vic_polygons = []

for polygon in all_polygons:
    if polygon['properties']['STE_NAME16'] == 'Victoria' and polygon['geometry'] is not None:
        vic_polygons.append(polygon)
        
print("Found {} polygons in shapefile within victoria.".format(len(vic_polygons)))

### Build spatial tree

In [None]:
from rtree import index
rtree_index = index.Index()

In [None]:
simplified_polygons = []
tolerance = 0.0005

for position, polygon in enumerate(vic_polygons):
    rtree_index.insert(position, shape(polygon['geometry']).bounds)
    shapeobject = shape(polygon['geometry'])
    simplified_polygons.append(shapeobject.simplify(tolerance = tolerance))

### Process  (with apply)

In [None]:
# Create function to return the properties for a shapefile if a match is found in rtree_index
def return_properties(point):

    for index in rtree_index.intersection(point.coords[0]):
        if point.within(shape(vic_polygons[index]['geometry'])):
        #if point.within(simplified_polygons[index]):
            return vic_polygons[index]['properties']

In [None]:
# Create new features with point objects using long and lat features
data['point'] = (list(zip(data[long_name], data[lat_name])))
data["shapely_point"] = data['point'].apply(Point)

In [None]:
from multiprocessing import Pool
p = Pool(4)

In [None]:
%%time

data['results'] = list(p.map(return_properties, data["shapely_point"]))

# Join original data with results series split into a new dataframe
# (one column for each property)
data = pandas.concat([data, data['results'].apply(pandas.Series)], axis = 1)

# Drop irrelevant columns
data.drop(['shapely_point', 'results', 'point'], axis = 1, inplace = True)

In [None]:
print(data.head())

In [None]:
# Use 'MB_CODE16' for mesh blocks and 'LGA_CODE16' for local government areas
column_header = 'LGA_CODE16'

print('Points found {}. Total Points {}. % Points found {:.2%}'.format(data[column_header].notnull().sum(),
                                                                        data[column_header].shape[0],
                                (data[column_header].notnull().sum()/data[column_header].shape[0])))

### Export

In [None]:
data.to_csv('/users/danielcorcoran/desktop/spatial_join_results.csv', index = False)