In [11]:
from pathlib import Path
import json
from functools import reduce

import pandas as pd
import geopandas as gpd
import shapely.ops as so

DATA_DIR = Path('../data')

CRS_NZGD49 = {'init': 'epsg:27200', 'no_defs': True}
CRS_NZTM = {'init': 'epsg:2193', 'no_defs': True}
CRS_WGS84 = {'init': 'epsg:4326'}

%matplotlib inline

In [None]:
# Prepare area unit table

path = DATA_DIR/'raw'/'Geographical Table.csv'
f = pd.read_csv(path, dtype={'SAU': str})
f = f.rename(columns={
    'SAU': 'au2001', 
    'SAU.Desc': 'au_name', 
    'TA': 'territory',
    'Region': 'region',
})
del f['Water']
f.head()

path = DATA_DIR/'raw'/'Market Rent Areas.csv'
g = pd.read_csv(path, dtype={'SAU': str})
g = g.rename(columns={
    'SAU': 'au2001', 
    'MARKET RENT DESCRIPTION': 'rental_area',
    'TA': 'territory',
    'AU NAME': 'au_name',
})

# Clean rental areas
def clean(x):
    y = x.split(' - ')
    y = y[1] if 'District' not in y[1] else y[0]
    return y

g['rental_area'] = g['rental_area'].map(clean)


f = f.merge(g[['au2001', 'rental_area']])

path = DATA_DIR/'au2001.csv'
f.to_csv(str(path), index=False)
f.head()

# Prepare geodata as GeoJSON

In [None]:
# Read Shapefile

path = DATA_DIR/'raw'/'NZ_AU01_region_simplified'/'NZ_AU01_region.shp'
au = gpd.read_file(str(path))
au.crs = CRS_NZGD49
au = au.to_crs(CRS_WGS84)
au = au.rename(columns={'AU01': 'au2001', 'AU_DESC': 'au_name'})
print(au.shape)
print(au.head())
au.head().plot()


In [None]:
# Remove water area units

pattern = r'ocean|strait|inlet|harbour'
cond = au['au_name'].str.contains(pattern, case=False)
au = au[~cond].copy()
print(au.shape)
au.head().plot()


In [None]:
# Merge geodata and metadata, drop null regions, and write to file

path = DATA_DIR/'au2001.csv'
f = pd.read_csv(path, dtype={'au2001': str})

g = au.merge(f[['au2001', 'territory', 'region', 'rental_area']])
g = g[g['region'].notnull()].copy()

path = DATA_DIR/'au2001.geojson'
with path.open('w') as tgt:
    tgt.write(g.to_json())

g.head()

# Create geodata for rental areas 

In [None]:
# Dissolve area units by area unit group

path = DATA_DIR/'au2001.geojson'
au = gpd.read_file(str(path))

ra = au[['rental_area', 'region', 'territory', 'geometry']].dissolve(by='rental_area').reset_index()

path = DATA_DIR/'rental_areas.geojson'
with path.open('w') as tgt:
    tgt.write(ra.to_json())

ra.head()

# Prepare rent data

In [None]:
# Reshape and merge all rent data sets

def clean(f, name):
    f = f.copy()
    f = f.rename(columns={
        'SAU': 'au2001',
        'Property_Type': 'property_type',
        'Bedrooms': '#bedrooms'
    })

    # Drop subtotals
    cond = False
    for col in ['au2001', 'property_type', '#bedrooms']:
        cond |= f[col].str.contains('total', case=False)

    f = f[~cond].copy()
    
    # Reshape
    id_vars = ['au2001', 'property_type', '#bedrooms']
    value_vars = [c for c in f.columns if '-' in c]
    f = pd.melt(f, id_vars=id_vars, value_vars=value_vars,
      var_name='quarter', value_name=name)
    
    return f

paths = [
    DATA_DIR/'raw'/'Detailed Bonds Lodged.csv',
    DATA_DIR/'raw'/'Detailed Mean Rents.csv',
    DATA_DIR/'raw'/'Detailed Geomean Rents.csv',
    DATA_DIR/'raw'/'Detailed Synthetic Lower Quartile Rents.csv',
    DATA_DIR/'raw'/'Detailed Synthetic Upper Quartile Rents.csv',
]
names = ['rent_count', 'rent_mean', 'rent_geo_mean', 'rent_synthetic_lower_quartile', 'rent_synthetic_upper_quartile']
frames = []
for path, name in zip(paths, names):
    f = pd.read_csv(path, dtype={'SAU': str})
    frames.append(clean(f, name))
    
f = reduce(lambda x, y: pd.merge(x, y), frames)

# Merge in region data
path = DATA_DIR/'au2001.csv'
g = pd.read_csv(path, dtype={'au2001': str})
f = f.merge(g)

# Write to file
path = DATA_DIR/'rents.csv'
f.to_csv(str(path), index=False)
f[f['rent_count'].notnull()].head()

# Explorer rents

In [None]:
path = DATA_DIR/'rents.csv'
f = pd.read_csv(path, dtype={'au2001': str})
f.head()


In [None]:
# Slice in time and aggregate 

def aggregate_rents(f, date, groupby_cols=('rental_area', '#bedrooms')):
    """
    """
    cond = f['quarter'] >= date
    f = f[cond].copy()
    
    def my_agg(group):
        d = {}
        d['territory'] = group['territory'].iat[0]
        d['region'] = group['region'].iat[0]
        d['rent_count'] = group['rent_count'].sum()
        d['rent_mean'] = (group['rent_mean']*group['rent_count']).sum()/d['rent_count']
        d['rent_geo_mean'] = (group['rent_geo_mean']**(group['rent_count']/d['rent_count'])).prod()
        return pd.Series(d)

    g = f.groupby(groupby_cols).apply(my_agg).reset_index()
    return g

agg_rents = aggregate_rents(f, '2016-12-01')
agg_rents

In [None]:
cond = agg_rents['region'] == 'Canterbury'
a = agg_rents[cond].copy()

def hits(group):
    d = {}
    d['hit_frac'] = group['rent_count'].dropna().shape[0]/group['rent_count'].shape[0]
    return pd.Series(d)

a.groupby('#bedrooms').apply(hits).reset_index()

# Choose representative points for rental areas using property titles

In [4]:
path = DATA_DIR/'rental_areas.geojson'
ra = gpd.read_file(str(path))

path = DATA_DIR/'property_titles.geojson'
t = gpd.read_file(str(path))
t.head()

Unnamed: 0,LGD_ID,OWNERS,PAR_ID,TTL_TITLE,fid,geometry
0,21149591,Yeung J,6683994,122991,1,POINT (174.9064763832665 -36.95116400076868)
1,3410291,Vajsakovic D,5154438,NA11A/102,2,POINT (174.6539389831913 -36.82909730112425)
2,3414488,Grbic I L:Lowe J D,4826167,NA32C/33,3,POINT (173.3733926499484 -34.87991641687911)
3,3421826,Kim K,5013176,NA459/84,4,POINT (174.7440597997697 -36.79408678372536)
4,3438504,Skeen R,5065364,NA22A/1323,5,POINT (174.6284382829138 -36.85095825040765)


In [10]:
%time f = gpd.sjoin(t[['geometry', 'fid']], ra, op='intersects')
f.head()

CPU times: user 5min 15s, sys: 560 ms, total: 5min 16s
Wall time: 5min 16s


Unnamed: 0,geometry,fid,index_right,id,region,rental_area,territory
0,POINT (174.9064763832665 -36.95116400076868),1,39,39,Auckland,Dannemora,Manukau City
227,POINT (174.920611433581 -36.92607408396498),228,39,39,Auckland,Dannemora,Manukau City
314,POINT (174.9123792005215 -36.95111480042863),315,39,39,Auckland,Dannemora,Manukau City
323,POINT (174.9176956665624 -36.95310901712503),324,39,39,Auckland,Dannemora,Manukau City
329,POINT (174.9017998884318 -36.94410129932859),330,39,39,Auckland,Dannemora,Manukau City


In [20]:
def pt(group):
    d = {}
    d['geometry'] = so.unary_union(group['geometry']).representative_point()
    d['territory'] = group['territory'].iat[0]
    d['region'] = group['region'].iat[0]
    return pd.Series(d)

g = gpd.GeoDataFrame(f.groupby('rental_area').apply(pt).reset_index())

path = DATA_DIR/'rental_area_points.geojson'
with path.open('w') as tgt:
    tgt.write(g.to_json())

g.head()

Unnamed: 0,rental_area,geometry,region,territory
0,Addington,POINT (172.6175536803248 -43.54383789250249),Canterbury,Christchurch City
1,Ahipara/Kaitaia,POINT (173.2510482164061 -35.12922498378152),Northland,Far North District
2,Albany,POINT (174.7136142163085 -36.71740768433499),Auckland,North Shore City
3,Aranui/Bromley/Bexley,POINT (172.6987469999612 -43.52131553561674),Canterbury,Christchurch City
4,Ashburton,POINT (171.720879949281 -43.85885983546948),Canterbury,Ashburton District


In [21]:
g[g['region'] == 'Auckland']

Unnamed: 0,rental_area,geometry,region,territory
2,Albany,POINT (174.7136142163085 -36.71740768433499),Auckland,North Shore City
6,Avondale,POINT (174.689014050193 -36.89612845095292),Auckland,Auckland City
10,Balmoral,POINT (174.7513445789724 -36.89353312125105),Auckland,Auckland City
13,Beachhaven/Birkdale,POINT (174.6988893987124 -36.79717233861077),Auckland,North Shore City
18,Blockhouse Bay/New Windsor,POINT (174.7059674167557 -36.91481011710743),Auckland,Auckland City
19,Botony Downs,POINT (174.9213767017827 -36.91701523623668),Auckland,Manukau City
21,Browns Bay,POINT (174.7347763337456 -36.71729061740317),Auckland,North Shore City
22,Bucklands Beach,POINT (174.9105111501434 -36.87507566798835),Auckland,Manukau City
30,Central East,POINT (174.7686467667584 -36.84997406733503),Auckland,Auckland City
34,Central West,POINT (174.7603149671217 -36.85150535125068),Auckland,Auckland City
