In [1]:
import geopandas as gpd
from shapely.geometry import Polygon, MultiPolygon
from tqdm import tqdm
# from tqdm import tqdm
from tqdm.auto import tqdm  # for notebooks
tqdm.pandas()
import os

In [2]:
# Routine for merging multiple matching entries into a single entry with multipolygon geometry

def groupby_multipoly(df, by, aggfunc="first"):
    data = df.drop(labels=df.geometry.name, axis=1)
    aggregated_data = data.groupby(by=by).agg(aggfunc)

    # Process spatial component
    def merge_geometries(block):
        return MultiPolygon(block.values)

    g = df.groupby(by=by, group_keys=False)[df.geometry.name].agg(
        merge_geometries
    )

    # Aggregate
    aggregated_geometry = gpd.GeoDataFrame(g, geometry=df.geometry.name, crs=df.crs)
    # Recombine
    aggregated = aggregated_geometry.join(aggregated_data)
    aggregated = aggregated.reset_index()
    
    return aggregated

In [3]:
## If it doesn't already exist, create the filtered 2016 data file
# Note that this can take 1-2 hours, and requires 20GB+ of RAM
columns = ['id', 'appellation', 'affected_surveys', 'parcel_intent',
       'topology_type', 'statutory_actions', 'land_district', 'titles',
       'survey_area', 'calc_area', 'geometry']

input_file = 'input/NZ_Primary_Parcels_Nov_2016.zip!NZ_Primary_Parcels_Nov_2016.gpkg'
output_file = 'input/NZ_Primary_Parcels_Nov_2016_filtered.gpkg'

if not os.path.isfile(output_file):
    # Load data
    parcels = gpd.read_file(input_file)
    # Drop columns we don't need
    parcels = parcels[parcels.columns]
    # Drop data outside Auckland
    parcels = parcels[parcels.land_district.isin(['North Auckland'])]
    # Merge multiple entries matching a single non-contiguous area into multipolygons
    parcels = groupby_multipoly(parcels, by='id')
    # Save file
    parcels.to_file(output_file, driver='GPKG')
else:
    # Just load the existing file
    parcels = gpd.read_file(output_file)

In [None]:
## Alternative generation from shape file
input_file = 'input/Primary_Parcels_2016_AC_extraction_v2_hydro_removed.shp'
output_file = 'input/Primary_Parcels_2016_prepared.gpkg'

# Check supporting files exist
shapefile = os.path.splitext(input_file)[0]
assert os.path.isfile(shapefile + '.dbf')
assert os.path.isfile(shapefile + '.shx')

parcels_in = gpd.read_file(input_file)

# Rename columns
rename_columns = {'appellatio': 'appellation', 'affected_s': 'affected_surveys', 
           'parcel_int': 'parcel_intent', 'topology_t': 'topology_type', 'statutory_': 'statutory_actions',
          'land_distr': 'land_district', 'survey_are': 'survey_area'}
keep_columns = ['titles', 'geometry', 'calc_area', 'id']

parcels_in.rename(rename_columns, axis=1, inplace=True)
copy_columns = list(rename_columns.values()) + keep_columns

parcels_out = parcels_in[copy_columns]

parcels_out = groupby_multipoly(parcels_out, by='id')

parcels_out.to_file(output_file, driver='GPKG')

In [5]:
parcels_in.head()

Unnamed: 0,id_0,gml_parent,gml_pare_1,gml_id,x__change__,x__change_1,id,id0x2Exsi_n,appellatio,appellat_1,...,land_district,land_dis_1,titles,titles0x2Ex,survey_area,survey_a_1,calc_area,calc_area0x,shape,geometry
0,1965544,,member,layer-50772-changeset.2672114,INSERT,,5164073,,Allot M45 PSH OF Aotea,,...,North Auckland,,NA573/64,,161874.0,,307464.0,,,"POLYGON ((175.53456 -36.32982, 175.53558 -36.3..."
1,1607634,,member,layer-50772-changeset.3877020,INSERT,,4766923,,Allot 52 PSH OF Aotea,,...,North Auckland,,,,222577.0,,179598.0,,,"POLYGON ((175.53200 -36.32592, 175.53221 -36.3..."
2,1936418,,member,layer-50772-changeset.2482146,INSERT,,5131796,,Allot 53 PSH OF Aotea,,...,North Auckland,,NA15D/778,,275186.0,,272391.0,,,"POLYGON ((175.53746 -36.31818, 175.53761 -36.3..."
3,2032553,,member,layer-50772-changeset.1146449,INSERT,,5236475,,,,...,North Auckland,,,,,,13770.0,,,"POLYGON ((175.53097 -36.31847, 175.53096 -36.3..."
4,1610804,,member,layer-50772-changeset.2526936,INSERT,,4770388,,Allot 61 PSH OF Aotea,,...,North Auckland,,NA24C/661,,764856.0,,744654.0,,,"POLYGON ((175.53768 -36.31801, 175.53759 -36.3..."
