# Spatial Joining with fiona and shapely

## Notes 

- Joins longitude and latitude points from a pandas dataframe to shape files polygons (and properties).
- Uses ERSI postcode and lga shape files from the ABS website.
- Includes postcode and local government area options.
- Accuracy can be traded off for speed using the tolerance_setting parameter.


## Importing modules

In [1]:
import fiona
import pandas
from shapely.geometry import Point, asShape
import datetime as dt
from shapely import speedups
speedups.enable()

## Set option for lga or postcode
Note: Shapefiles contain polygon boundaries from 2016, take caution when joining data preceding this time

In [2]:
lga = True
postcode = False

In [3]:
#Default to LGA if both LGA and Postcode are selected.

if lga == False and postcode == False:
    lga = True
    
if lga == True and postcode == True:
    postcode = False

# Conditionally set shape file paths.

if postcode == True:
    shapefile_path = "/Users/danielcorcoran/Desktop/shared crap/POA_16/POA_2016_AUST.shp"
    property_code_name = "POA_CODE16"
elif lga == True:
    shapefile_path = "/Users/danielcorcoran/Desktop/shared crap/LGA_ABS_16/LGA_2016_AUST.shp"
    property_code_name = "LGA_CODE16" 

## Set path for file containing coordinates

In [4]:
#Set path for coordinates data, to be read in.

coord_path = "/users/danielcorcoran/Desktop/fixed_camera_locations.csv"

## Import data

In [5]:
#Read in coordinate data as pandas dataframe

coord_data = pandas.read_csv(coord_path)

## Set column names containing longitude and latitude values 
Check the headers here and set the longitude and latitude column names below

In [6]:
coord_data.head()

Unnamed: 0,row_index,location_long,location_short,longitude,latitude
0,0,Scoresby Road at the intersection of Scoresby ...,Bayswater,145.271051,-37.838904
1,1,Blackshaws Road at the intersection of Blacksh...,Altona North,144.848075,-37.831187
2,2,Warrigal Road at the intersection of Warrigal ...,Cheltenham,145.07425,-37.967517
3,3,"Western Ring Road, Northbound Keilor Park Driv...",Western Ring Road Keilor East,144.850992,-37.73826
4,4,"Western Ring Road, Northbound Western Highway ...",Western Ring Road Deer Park,144.793848,-37.774961


In [7]:
latitude_column_name = "latitude"
longitude_column_name = "longitude"

## Iterate through fiona collection and store polygons, simplified polygons, properties and boundaries
Creating reference data in the form of lists

In [8]:
#Note: Code is BUILT TO FIT the ABS LGA/Postcode shapefiles released in 2016. 
#Take caution when using data from preceding 2016.

fiona_collection = fiona.open(shapefile_path)
fiona_collection_list = list(fiona_collection)

shapes_list = []
simple_shapes_list = []
shapes_properties_list = []
shapes_bounds_list = []

tolerance_setting = 0.0000005

for index in range(len(fiona_collection_list)):
    
    if lga == True and postcode == False:
        current_shape = fiona_collection_list[index]
        state_name = current_shape["properties"]["STE_NAME16"]
        properties = current_shape["properties"]

        if state_name == "Victoria" and current_shape["geometry"] is not None:
            shapes_list.append(asShape(current_shape["geometry"]))
            simple_shapes_list.append(asShape(current_shape["geometry"]).simplify(tolerance = tolerance_setting))
            shapes_properties_list.append(current_shape["properties"])
            shapes_bounds_list.append(asShape(current_shape["geometry"]).bounds)
            
    elif lga == False and postcode == True:    
        current_shape = fiona_collection_list[index]
        postal_state_code = list(current_shape["properties"]["POA_CODE16"])[0]
        properties = current_shape["properties"]

        if postal_state_code == "3" and current_shape["geometry"] is not None:
            shapes_list.append(asShape(current_shape["geometry"]))
            simple_shapes_list.append(asShape(current_shape["geometry"]).simplify(tolerance = tolerance_setting))
            shapes_properties_list.append(current_shape["properties"])
            shapes_bounds_list.append(asShape(current_shape["geometry"]).bounds)

## Check the size of each list
Checking how many polygons we have in total, there should be 698 postcode polygons, or 80 lga polygons

In [9]:
len(shapes_list)

80

In [10]:
len(shapes_bounds_list)

80

In [11]:
len(simple_shapes_list)

80

In [12]:
len(shapes_properties_list)

80





## Calculate the master bounds
master_bounds corresponds to maximum and minimum coordinates for Victoria. These coordinates are required to speed up algorithm time.

In [13]:
master_bounds = [shapes_bounds_list[0][0], shapes_bounds_list[0][1],
shapes_bounds_list[0][2],shapes_bounds_list[0][3]]

In [14]:
for shape_bounds_tuple in shapes_bounds_list:
    
    current_long_low = shape_bounds_tuple[0]
    current_long_high = shape_bounds_tuple[2]
    current_lat_low = shape_bounds_tuple[1]
    current_lat_high = shape_bounds_tuple[3]
    
    master_long_low = master_bounds[0]
    master_long_high = master_bounds[2]
    master_lat_low = master_bounds[1]
    master_lat_high = master_bounds[3]
    
    if current_long_low < master_long_low:
        master_bounds[0] = current_long_low
        print("Master bounds longitude (low) has been updated to",master_bounds[0])
    
    if current_long_high > master_long_high:
        master_bounds[2] = current_long_high
        print("Master bounds longitude (high) has been updated to",master_bounds[2])
        
    if current_lat_low < master_lat_low:
        master_bounds[1] = current_lat_low
        print("Master bounds latitude (low) has been updated to",master_bounds[1])
        
    if current_lat_high > master_lat_high:
        master_bounds[3] = current_lat_high
        print("Master bounds latitude (high) has been updated to",master_bounds[3])

Master bounds longitude (low) has been updated to 142.353480213
Master bounds latitude (low) has been updated to -37.836583308999934
Master bounds latitude (low) has been updated to -38.67879995399994
Master bounds latitude (high) has been updated to -36.25505162799993
Master bounds latitude (high) has been updated to -35.19282674999994
Master bounds latitude (low) has been updated to -38.85769295599994
Master bounds longitude (high) has been updated to 149.9762909950001
Master bounds longitude (low) has been updated to 140.965735138
Master bounds longitude (low) has been updated to 140.96168199600004
Master bounds latitude (high) has been updated to -33.98042558699996
Master bounds latitude (low) has been updated to -39.13674005499996
Master bounds latitude (low) has been updated to -39.159189527999956


In [15]:
#Check master bounds

master_bounds

[140.96168199600004,
 -39.159189527999956,
 149.9762909950001,
 -33.98042558699996]

## Dictionary to store results

In [16]:
results = {"data_row_index":[],
           "matched_code":[]}

## Process, output results

In [17]:
#Reset counter, contains every coordinate match against a shapefile polygon
contains = 0

#Start timer
start = dt.datetime.now()

#Iterate through coordinate data set, upper level loop
for data_index in range(coord_data.shape[0]):
    
    print("Starting check for geopoint #",data_index)
    
    longitude = coord_data.loc[data_index, longitude_column_name]
    latitude = coord_data.loc[data_index, latitude_column_name]
    point = Point(longitude, latitude)
    
    #Check if point is within the maximum bounds for victoria
    if master_bounds[0] <= longitude <= master_bounds[2] and master_bounds[1] <= latitude <= master_bounds[3]:

        #If within Victorias bounds, start checking against each lga polygon
        for polygon_index in range(len(shapes_bounds_list)):

                #Check if point is within maximum bounds for LGA
                if shapes_bounds_list[polygon_index][0] <= longitude <= shapes_bounds_list[polygon_index][2] and shapes_bounds_list[polygon_index][1] <= latitude <= shapes_bounds_list[polygon_index][3]:

                    #If within LGA box boundary, check if the polygon contains point
                    #simple_shapes_list contains 'reduced' lga polygons
                    if shapes_list[polygon_index].contains(point):

                        print("\t>Coordinates at data index",data_index,"contains point")

                        results["data_row_index"].append(data_index)
                        results["matched_code"].append(shapes_properties_list[polygon_index][property_code_name])
                        contains = contains + 1
                        
                        break

    else:
        print("\t>Shape at data index",data_index,"is outside of Victoria's bounds.")

#Show points found and time taken to run
end = dt.datetime.now()
difference_time = end - start

timestamp = end.strftime("%y%m%d_%H%M")

print("\n","POINTS FOUND:",contains)
print("POINTS TOTAL:",coord_data.shape[0])
print("FINISHED IN:",difference_time)

result_dataframe = pandas.DataFrame(results)
result_dataframe.to_csv("sjoin_output_"+timestamp+".csv", index = False)
print("Spine data was exported successfully. Use this dataset to join the original data to shape files.")

Starting check for geopoint # 0
	>Coordinates at data index 0 contains point
Starting check for geopoint # 1
	>Coordinates at data index 1 contains point
Starting check for geopoint # 2
	>Coordinates at data index 2 contains point
Starting check for geopoint # 3
	>Coordinates at data index 3 contains point
Starting check for geopoint # 4
	>Coordinates at data index 4 contains point
Starting check for geopoint # 5
	>Coordinates at data index 5 contains point
Starting check for geopoint # 6
	>Coordinates at data index 6 contains point
Starting check for geopoint # 7
	>Coordinates at data index 7 contains point
Starting check for geopoint # 8
	>Coordinates at data index 8 contains point
Starting check for geopoint # 9
	>Coordinates at data index 9 contains point
Starting check for geopoint # 10
	>Coordinates at data index 10 contains point
Starting check for geopoint # 11
	>Coordinates at data index 11 contains point
Starting check for geopoint # 12
	>Coordinates at data index 12 contains

	>Coordinates at data index 105 contains point
Starting check for geopoint # 106
	>Coordinates at data index 106 contains point
Starting check for geopoint # 107
	>Coordinates at data index 107 contains point
Starting check for geopoint # 108
	>Coordinates at data index 108 contains point
Starting check for geopoint # 109
	>Coordinates at data index 109 contains point
Starting check for geopoint # 110
	>Coordinates at data index 110 contains point
Starting check for geopoint # 111
	>Coordinates at data index 111 contains point
Starting check for geopoint # 112
	>Coordinates at data index 112 contains point
Starting check for geopoint # 113
	>Coordinates at data index 113 contains point
Starting check for geopoint # 114
	>Coordinates at data index 114 contains point
Starting check for geopoint # 115
	>Coordinates at data index 115 contains point
Starting check for geopoint # 116
	>Coordinates at data index 116 contains point
Starting check for geopoint # 117
	>Coordinates at data index 

	>Coordinates at data index 215 contains point
Starting check for geopoint # 216
	>Coordinates at data index 216 contains point
Starting check for geopoint # 217
	>Coordinates at data index 217 contains point
Starting check for geopoint # 218
	>Coordinates at data index 218 contains point
Starting check for geopoint # 219
	>Coordinates at data index 219 contains point
Starting check for geopoint # 220
	>Coordinates at data index 220 contains point
Starting check for geopoint # 221
	>Coordinates at data index 221 contains point
Starting check for geopoint # 222
	>Coordinates at data index 222 contains point
Starting check for geopoint # 223
	>Coordinates at data index 223 contains point
Starting check for geopoint # 224
	>Coordinates at data index 224 contains point
Starting check for geopoint # 225
	>Coordinates at data index 225 contains point
Starting check for geopoint # 226
	>Coordinates at data index 226 contains point
Starting check for geopoint # 227
	>Coordinates at data index 

## Export coordinate data with index label

Data should be exported with data_index label to join to output files data_index column

In [18]:
coord_data.to_csv("/users/danielcorcoran/desktop/fixed_camera_locations_v2.csv", index = False)

print("Coordinate data was exported successfully.")

Coordinate data was exported successfully.


In [19]:
results

{'data_row_index': [0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  124,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  139,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  149,
  150,
  151,
  152,
  153,
  154,
  155,
