In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import fiona
import glob
import os
import contextily as ctx
from scipy.spatial import cKDTree
from shapely.geometry import Point
import json
from tqdm.auto import tqdm
pd.set_option('min_rows', 30)
import sys
sys.path.append('..')
from importlib import reload
# import src.utils as utils
# reload(utils)
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12, 12)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)

## Setup

In [None]:
%%time
parcels = gpd.read_file('input/lds-nz-primary-parcels-CLIPPED-4326.gpkg')

In [80]:
parcels_sample = parcels.sample(10000)

In [81]:
# keep a centroid and polygon version - change between according to need
parcels_sample['geometry_centroid'] = parcels_sample.geometry.centroid
parcels_sample['geometry_polygon'] = parcels_sample.geometry


  parcels_sample['geometry_centroid'] = parcels_sample.geometry.centroid


## 3. Local Area (LA) Information (information on the LA that the consent is located in): 

#### a. Name (just the string is fine; I did not generate numeric codes for the local areas generated) **Local_Area_name**

## 4. 2018 Statistical Area 2 Information (information on the 2018SA2 that the consent is located in)

#### a. Name (string) **SA22018_name**
#### b. Numeric code **SA22018_code**

In [11]:
sa2 = gpd.read_file('NZ-SA/statistical-area-2-2020-generalised.gdb').to_crs(4326)
sa2.sample(3)

In [82]:
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid']
parcels_sample = gpd.sjoin(parcels_sample, sa2[['SA22020_V1_00_NAME', 'SA22020_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'SA22020_V1_00_NAME': 'SA22018_name', 'SA22020_V1_00': 'SA22018_code'})

CPU times: user 8.29 s, sys: 3.88 ms, total: 8.29 s
Wall time: 8.29 s


## 5. Area Unit Information (information on the 2013AU  that the consent is located in):

#### a. Name (string) **AU2013_name**
#### b. Numeric code **AU2013_code**

In [101]:
au2013 = gpd.read_file('input/area-unit-2013.gdb.zip').to_crs(4326)
au2013.sample(3)

Unnamed: 0,AU2013_V1_00,AU2013_V1_00_NAME,AREA_SQ_KM,LAND_AREA_SQ_KM,Shape_Length,geometry
1481,573903,Newlands North,0.804087,0.804087,5958.640337,"MULTIPOLYGON (((174.82896 -41.21949, 174.83011..."
958,525202,Kawakawa-Orere,105.216001,105.216001,57437.556145,"MULTIPOLYGON (((175.14259 -36.93214, 175.14265..."
608,597102,Inland Water-Lake Ellesmere South,63.304671,0.0,75233.330853,"MULTIPOLYGON (((172.57398 -43.76779, 172.57401..."


In [103]:
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid']
parcels_sample = gpd.sjoin(parcels_sample, au2013[['AU2013_V1_00_NAME', 'AU2013_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'AU2013_V1_00_NAME': 'AU2013_name', 'AU2013_V1_00': 'AU2013_code'})

CPU times: user 6.96 s, sys: 0 ns, total: 6.96 s
Wall time: 6.96 s


## 6. 2018 Meshblock Information (information on the 2018MB  that the consent is located in):

#### a. Code **MB2018_code**

In [100]:
mb2018 = gpd.read_file('input/meshblock-2018-clipped-generalised.gdb.zip').to_crs(4326)
mb2018.sample(3)

Unnamed: 0,MB2018_V1_00,LANDWATER,LANDWATER_NAME,LAND_AREA_SQ_KM,AREA_SQ_KM,SHAPE_Length,geometry
52865,4011926,12,Mainland,0.17371,0.17371,2129.422679,"MULTIPOLYGON (((174.91945 -36.94519, 174.92053..."
23832,1429800,12,Mainland,0.021918,0.021918,731.394363,"MULTIPOLYGON (((176.91682 -39.49291, 176.91702..."
15554,759520,12,Mainland,0.020695,0.020695,928.355735,"MULTIPOLYGON (((174.91840 -37.01411, 174.91871..."


In [97]:
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid']
parcels_sample = gpd.sjoin(parcels_sample, mb2018[['MB2018_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'MB2018_V1_00': 'MB2018_code'})

CPU times: user 11.3 s, sys: 8.2 ms, total: 11.3 s
Wall time: 11.3 s


## 7. 2013 Meshblock Information (information on the 2013MB  that the consent is located in):

#### a. Code **MB2013_code**

In [84]:
mb2013 = gpd.read_file('input/meshblock-2013.gdb.zip').to_crs(4326)
mb2013.sample(3)

In [87]:
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid']
parcels_sample = gpd.sjoin(parcels_sample, mb2013[['MeshblockNumber', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'MeshblockNumber': 'MB2013_code'})

CPU times: user 11.4 s, sys: 7.05 ms, total: 11.4 s
Wall time: 11.4 s


## 8. Additional distance information from consent location
- b. Minimum Haversinedistance to Open Space(set of AUP Zones) **Hdist_open**
- c. Minimum Haversine distance to motorway**Hdist_motorway**
- d. Minimum Haversine distanceto main arterial road **Hdist_main_road**
- e. Minimum Haversine distance to rail line **Hdist_rail**
- f. Haversine distance to downtown (use Skytower coordinates) **Hdist_skytowe**

- a. Minimum Haversine distance to coastline **Hdist_coast**  
There are a few different datasets that could be used for this:  
    - NZ Coastlines (Topo 1:50k) https://data.linz.govt.nz/layer/50258-nz-coastlines-topo-150k/
    - NZ Coastline - mean high water https://data.linz.govt.nz/layer/105085-nz-coastline-mean-high-water/
    - NZ Coastlines and Islands Polygons (Topo 1:50k) https://data.linz.govt.nz/layer/51153-nz-coastlines-and-islands-polygons-topo-150k/  

The first doesn't have islands (e.g. Waiheke).

- b. Minimum Haversinedistance to Open Space(set of AUP Zones) **Hdist_open**
- c. Minimum Haversine distance to motorway**Hdist_motorway**
- d. Minimum Haversine distanceto main arterial road **Hdist_main_road**
- e. Minimum Haversine distance to rail line **Hdist_rail**
- f. Haversine distance to downtown (use Skytower coordinates) **Hdist_skytowe**