In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so

import helpers as hp

%load_ext autoreload
%autoreload 2
%matplotlib inline

# Prepare table of 2001 area units and rental area units

In [3]:
# 2001 census area units
path = hp.DATA_DIR/'collected'/'Geographical Table.csv'
f = pd.read_csv(path, dtype={'SAU': str})
f = f.rename(columns={
    'SAU': 'au2001', 
    'SAU.Desc': 'au_name', 
    'TA': 'territory',
    'Region': 'region',
})
del f['Water']
f.head()

# rental area units
path = hp.DATA_DIR/'collected'/'Market Rent Areas.csv'
g = pd.read_csv(path, dtype={'SAU': str})
g = g.rename(columns={
    'SAU': 'au2001', 
    'MARKET RENT DESCRIPTION': 'rental_area',
    'TA': 'territory',
    'AU NAME': 'au_name',
})

# Clean rental areas
def clean(x):
    y = x.split(' - ')
    y = y[1] if 'District' not in y[1] else y[0]
    return y

g['rental_area'] = g['rental_area'].map(clean)


f = f.merge(g[['au2001', 'rental_area']])

path = hp.DATA_DIR/'processed'/'au2001.csv'
f.to_csv(path, index=False)
f.head()

Unnamed: 0,au2001,au_name,territory,region,rental_area
0,500100,Awanui,Far North District,Northland,Rural Far North
1,500202,Mangonui East,Far North District,Northland,Mangonui/Kaeo
2,500203,Taipa Bay-Mangonui,Far North District,Northland,Mangonui/Kaeo
3,500204,Mangonui West,Far North District,Northland,Rural Far North
4,500205,Ahipara,Far North District,Northland,Ahipara/Kaitaia


# Process area units and rental areas into GeoJSON

In [None]:
# Read Shapefile

path = hp.DATA_DIR/'collected'/'NZ_AU01_region_simplified'/'NZ_AU01_region.shp'
au = gpd.read_file(str(path))
au.crs = hp.CRS_NZGD49
au = au.to_crs(hp.CRS_WGS84)
au = au.rename(columns={'AU01': 'au2001', 'AU_DESC': 'au_name'})
print(au.shape)
print(au.head())
au.head().plot()


In [None]:
# Remove water area units

pattern = r'ocean|strait|inlet|harbour'
cond = au['au_name'].str.contains(pattern, case=False)
au = au[~cond].copy()
print(au.shape)
au.head().plot()


In [None]:
# Merge geodata and metadata, drop null regions, and write to file

path = hp.DATA_DIR/'au2001.csv'
f = pd.read_csv(path, dtype={'au2001': str})

g = au.merge(f[['au2001', 'territory', 'region', 'rental_area']])
g = g[g['region'].notnull()].copy()

path = hp.DATA_DIR/'processed'/'au2001.geojson'
with path.open('w') as tgt:
    tgt.write(g.to_json())

g.head()

# Create geodata for rental areas 

In [None]:
# Dissolve area units by area unit group

path = hp.DATA_DIR/'au2001.geojson'
au = gpd.read_file(str(path))

ra = au[['rental_area', 'region', 'territory', 'geometry']].dissolve(by='rental_area').reset_index()

path = hp.DATA_DIR/'processed'/'rental_areas.geojson'
with path.open('w') as tgt:
    tgt.write(ra.to_json())

ra.head()

# Choose representative points for rental areas using approximate centroids of property titles

In [None]:
path = hp.DATA_DIR/'processed'/'rental_areas.geojson'
ra = gpd.read_file(str(path))

path = hp.DATA_DIR/'processed'/'property_titles.geojson'
t = gpd.read_file(str(path))
t.head()

In [5]:
# Spatial-join titles to rental areas

%time f = gpd.sjoin(t[['geometry', 'fid']], ra, op='intersects')
f.head()

NameError: name 't' is not defined

Unnamed: 0,au2001,property_type,#bedrooms,quarter,rent_count,rent_mean,rent_geo_mean,rent_synthetic_lower_quartile,rent_synthetic_upper_quartile,au_name,territory,region,rental_area
550378,563701,Flat or Apartment,1,1993-03-01,,,,,,Waikanae Beach,Kapiti Coast District,Wellington,Waikanae/Otaki
550379,563701,Flat or Apartment,2,1993-03-01,,,,,,Waikanae Beach,Kapiti Coast District,Wellington,Waikanae/Otaki
550380,563701,House,1,1993-03-01,,,,,,Waikanae Beach,Kapiti Coast District,Wellington,Waikanae/Otaki
550381,563701,House,2,1993-03-01,6.0,128.0,127.0,115.0,139.0,Waikanae Beach,Kapiti Coast District,Wellington,Waikanae/Otaki
550382,563701,House,3,1993-03-01,8.0,159.0,158.0,142.0,175.0,Waikanae Beach,Kapiti Coast District,Wellington,Waikanae/Otaki


In [None]:
# Choose representative points for rental areas

def pt(group):
    d = {}
    d['geometry'] = so.unary_union(group['geometry']).representative_point()
    d['territory'] = group['territory'].iat[0]
    d['region'] = group['region'].iat[0]
    return pd.Series(d)

g = gpd.GeoDataFrame(f.groupby('rental_area').apply(pt).reset_index())

path = hp.DATA_DIR/'processed'/'rental_points.geojson'
with path.open('w') as tgt:
    tgt.write(g.to_json())

g.head()

# Prepare regional slices of data

In [3]:
path = hp.DATA_DIR/'processed'/'rental_areas.geojson'
ra = gpd.read_file(str(path))

path = hp.DATA_DIR/'processed'/'rental_points.geojson'
rap = gpd.read_file(str(path))

for region in hp.REGIONS:
    root = hp.get_path(region)
    if not root.exists():
        root.mkdir()
        
    region_c = region.capitalize()

    # Rental areas slice
    f = ra[ra['region'] == region_c].copy()
    path = hp.get_path(region, 'rental_areas')
    with path.open('w') as tgt:
        tgt.write(f.to_json())
        
    # Rental area points slice
    f = rap[rap['region'] == region_c].copy()
    path = hp.get_path(region, 'rental_points')
    with path.open('w') as tgt:
        tgt.write(f.to_json())
        