In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so

import helpers as hp

%load_ext autoreload
%autoreload 2


# Download rent data

In [6]:
base_url = 'https://www.mbie.govt.nz/info-services/housing-and-property/sector-information-and-statistics/rental-bond-data/detailed/'
file_names = [
    'detailed-lodged-bonds.csv',
    'detailed-mean-rents.csv',
    'detailed-geo-mean-rents.csv',
]

for file_name in file_names:
    url = base_url + file_name
    print("Getting and saving", url)
    r = requests.get(url)
    path = hp.DATA_DIR/'collected'/file_name
    with path.open('w') as tgt:
        tgt.write(r.text)


Getting and saving https://www.mbie.govt.nz/info-services/housing-and-property/sector-information-and-statistics/rental-bond-data/detailed/detailed-lodged-bonds.csv
Getting and saving https://www.mbie.govt.nz/info-services/housing-and-property/sector-information-and-statistics/rental-bond-data/detailed/detailed-mean-rents.csv
Getting and saving https://www.mbie.govt.nz/info-services/housing-and-property/sector-information-and-statistics/rental-bond-data/detailed/detailed-geo-mean-rents.csv


# Prepare rent data

In [7]:
# Reshape and merge all rent data sets

def clean(f, name):
    f = f.copy()
    f = f.rename(columns={
        'SAU': 'au2001',
        'Property_Type': 'property_type',
        'Bedrooms': '#bedrooms'
    })

    # Drop subtotals
    cond = False
    for col in ['au2001', 'property_type', '#bedrooms']:
        cond |= f[col].str.contains('total', case=False)

    f = f[~cond].copy()
    
    # Reshape
    id_vars = ['au2001', 'property_type', '#bedrooms']
    value_vars = [c for c in f.columns if '-' in c]
    f = pd.melt(f, id_vars=id_vars, value_vars=value_vars,
      var_name='quarter', value_name=name)
    
    return f

paths = [
    hp.DATA_DIR/'collected'/'detailed-lodged-bonds.csv',
    hp.DATA_DIR/'collected'/'detailed-mean-rents.csv',
    hp.DATA_DIR/'collected'/'detailed-geo-mean-rents.csv',
]
names = [
    'rent_count', 
    'rent_mean', 
    'rent_geo_mean',
]
frames = []
for path, name in zip(paths, names):
    f = pd.read_csv(path, dtype={'SAU': str})
    frames.append(clean(f, name))
    
f = reduce(lambda x, y: pd.merge(x, y), frames)

# Merge in region data
path = hp.get_path('au2001_csv')
g = pd.read_csv(path, dtype={'au2001': str})
f = f.merge(g)

# Write to file
path = hp.get_path('rents')
f.to_csv(path, index=False)
f[f['rent_count'].notnull()].head()


Unnamed: 0,au2001,property_type,#bedrooms,quarter,rent_count,rent_mean,rent_geo_mean,au_name,territory,region,rental_area
246,500202,Flat or Apartment,1,1995-03-01,8.0,82.0,82.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
266,500202,Flat or Apartment,1,1996-03-01,5.0,79.0,79.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
269,500202,House,3,1996-03-01,5.0,123.0,118.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
294,500202,House,3,1997-06-01,5.0,154.0,152.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
319,500202,House,3,1998-09-01,7.0,139.0,137.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo


In [8]:
# Print latest 2 quarters
hp.get_latest_quarters(2)

['2018-06-01', '2018-09-01']

# Explore rents

In [9]:
rents = hp.get_data('rents')
print(rents['quarter'].unique())
rents.head()


['1993-03-01' '1993-06-01' '1993-09-01' '1993-12-01' '1994-03-01'
 '1994-06-01' '1994-09-01' '1994-12-01' '1995-03-01' '1995-06-01'
 '1995-09-01' '1995-12-01' '1996-03-01' '1996-06-01' '1996-09-01'
 '1996-12-01' '1997-03-01' '1997-06-01' '1997-09-01' '1997-12-01'
 '1998-03-01' '1998-06-01' '1998-09-01' '1998-12-01' '1999-03-01'
 '1999-06-01' '1999-09-01' '1999-12-01' '2000-03-01' '2000-06-01'
 '2000-09-01' '2000-12-01' '2001-03-01' '2001-06-01' '2001-09-01'
 '2001-12-01' '2002-03-01' '2002-06-01' '2002-09-01' '2002-12-01'
 '2003-03-01' '2003-06-01' '2003-09-01' '2003-12-01' '2004-03-01'
 '2004-06-01' '2004-09-01' '2004-12-01' '2005-03-01' '2005-06-01'
 '2005-09-01' '2005-12-01' '2006-03-01' '2006-06-01' '2006-09-01'
 '2006-12-01' '2007-03-01' '2007-06-01' '2007-09-01' '2007-12-01'
 '2008-03-01' '2008-06-01' '2008-09-01' '2008-12-01' '2009-03-01'
 '2009-06-01' '2009-09-01' '2009-12-01' '2010-03-01' '2010-06-01'
 '2010-09-01' '2010-12-01' '2011-03-01' '2011-06-01' '2011-09-01'
 '2011-12-

Unnamed: 0,au2001,property_type,#bedrooms,quarter,rent_count,rent_mean,rent_geo_mean,au_name,territory,region,rental_area
0,500100,House,2,1993-03-01,,,,Awanui,Far North District,Northland,Rural Far North
1,500100,House,3,1993-03-01,,,,Awanui,Far North District,Northland,Rural Far North
2,500100,House,2,1993-06-01,,,,Awanui,Far North District,Northland,Rural Far North
3,500100,House,3,1993-06-01,,,,Awanui,Far North District,Northland,Rural Far North
4,500100,House,2,1993-09-01,,,,Awanui,Far North District,Northland,Rural Far North


In [10]:
# Slice in time and aggregate 
agg_rents = hp.aggregate_rents(rents, '2018-06-01')
agg_rents.head()

  d['rent_count']


Unnamed: 0,rental_area,#bedrooms,region,rent_count,rent_geo_mean,rent_mean,territory
0,Addington,1,Canterbury,123.0,212.0,213.845528,Christchurch City
1,Addington,2,Canterbury,63.0,319.187081,325.031746,Christchurch City
2,Addington,3,Canterbury,47.0,422.228096,427.361702,Christchurch City
3,Addington,4,Canterbury,6.0,466.0,472.0,Christchurch City
4,Addington,5+,Canterbury,0.0,,,Christchurch City


In [11]:
f = hp.aggregate_rents(rents, '2018-06-01', groupby_cols=('au2001', '#bedrooms'))
cond = f['region'] == 'Auckland'
f[cond]

  d['rent_count']


Unnamed: 0,au2001,#bedrooms,region,rent_count,rent_geo_mean,rent_mean,territory
287,505300,1,Auckland,0.0,,,Rodney District
288,505300,2,Auckland,5.0,429.000000,430.000000,Rodney District
289,505300,3,Auckland,16.0,450.929943,453.312500,Rodney District
290,505300,4,Auckland,12.0,456.459393,457.250000,Rodney District
291,505300,5+,Auckland,0.0,,,Rodney District
292,505400,1,Auckland,0.0,,,Rodney District
293,505400,2,Auckland,0.0,,,Rodney District
294,505400,3,Auckland,0.0,,,Rodney District
295,505400,4,Auckland,0.0,,,Rodney District
296,505500,1,Auckland,7.0,289.000000,293.000000,Rodney District


In [12]:
# What fraction of rental data do we have by #bedrooms?

def hits(group):
    d = {}
    d['hit_frac'] = group['rent_mean'].count()/group['rent_mean'].shape[0]
    return pd.Series(d)

date = '2018-06-01'
f = hp.aggregate_rents(rents, date, groupby_cols=('au2001', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('census area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())

f = hp.aggregate_rents(rents, date, groupby_cols=('rental_area', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('rental area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())


  d['rent_count']


census area units
  #bedrooms  hit_frac
0         1  0.306502
1         2  0.595376
2         3  0.843305
3         4  0.489914
4        5+  0.095847
rental area units
  #bedrooms  hit_frac
0         1  0.666667
1         2  0.929293
2         3  1.000000
3         4  0.808081
4        5+  0.242424


# Select latest two quarters and slice into regional chunks

In [13]:
# Get latest two quarters of rents
start_date = hp.get_latest_quarters(2)[0]
rents = hp.get_data('rents')
rents = rents[rents['quarter'] >= start_date].copy()

# Create regional slices
for region in hp.REGIONS:
    # Build rents
    region_c = region.capitalize()
    region_rents = rents[rents['region'] == region_c].copy()
    print(region, region_rents['quarter'].unique(), '#rows =', region_rents.shape[0])
    path = hp.get_path('rents', region)
    print('  Saving to', path)
    
    # Create region directory if it does not exist
    if not path.parent.exists():
        path.parent.mkdir()
        
    region_rents.to_csv(path, index=False)
    

auckland ['2018-06-01' '2018-09-01'] #rows = 5576
  Saving to /home/araichev/affordability_nz/data/processed/auckland/rents.csv
canterbury ['2018-06-01' '2018-09-01'] #rows = 2544
  Saving to /home/araichev/affordability_nz/data/processed/canterbury/rents.csv
wellington ['2018-06-01' '2018-09-01'] #rows = 2416
  Saving to /home/araichev/affordability_nz/data/processed/wellington/rents.csv


# JSONize regional rents for the web, grouping by rental area and number of bedrooms

In [14]:
for region in hp.REGIONS:
    region_rents = hp.get_data('rents', region)
    d = hp.build_json_rents(region_rents)
    path = hp.get_path('rents_json', region)
    print('**', region, '\n', d)
    with path.open('w') as tgt:
        json.dump(d, tgt)

    

  d['rent_count']


** auckland 
 {'Western Beaches/Rural': {'2': 466.0, '1': None, '3': 505.0, '4': 581.0}, 'Meadowbank': {'2': 516.0, '1': 390.0, '3': 706.0, '4': 879.0}, 'Grey Lynn/Arch Hill': {'2': 651.0, '1': 380.0, '3': 827.0, '4': 1023.0}, 'Pakuranga': {'2': 442.0, '1': None, '3': 548.0, '4': 649.0}, 'Central East': {'2': 540.0, '1': 395.0, '3': 649.0, '4': None}, 'Rothesay/Murrays/Mairangi Bays': {'2': 532.0, '1': 241.0, '3': 660.0, '4': 770.0}, 'Remuera': {'2': 595.0, '1': 467.0, '3': 812.0, '4': 1006.0}, 'Mangere Bridge/Airport': {'2': 436.0, '1': 342.0, '3': 576.0, '4': 687.0}, 'Kelston': {'2': 414.0, '1': 323.0, '3': 510.0, '4': 605.0}, 'Chatswood/Birkenhead/Northcote Point': {'2': 503.0, '1': 330.0, '3': 672.0, '4': 719.0}, 'Waiheke Island': {'2': 466.0, '1': 379.0, '3': 623.0, '4': 705.0}, 'Epsom': {'2': 531.0, '1': 274.0, '3': 804.0, '4': 1022.0}, 'Royal Oak/One Tree Hill': {'2': 518.0, '1': 373.0, '3': 667.0, '4': 802.0}, 'Hillsborough': {'2': 468.0, '1': None, '3': 581.0, '4': 701.0}, 'We

** wellington 
 {'Paramata/Mana/Pukerua Bay': {'2': None, '1': 293.0, '3': 516.0, '4': None}, 'Oriental Bay/Seatoun': {'2': 556.0, '1': 513.0, '3': 706.0, '4': None}, 'Island Bay/Melrose': {'2': 480.0, '1': 213.0, '3': 623.0, '4': None}, 'Te Aro': {'2': 558.0, '1': 371.0, '3': 689.0, '4': 847.0}, 'Titahi Bay/Onepoto/Elsdon': {'2': 434.0, '1': 237.0, '3': 434.0, '4': None}, 'Masterton': {'2': 248.0, '1': None, '3': 334.0, '4': 375.0}, 'Papakowhai/Whitby/Pauatahanui': {'2': None, '1': None, '3': 533.0, '4': 644.0}, 'Eastern Bays': {'2': 392.0, '1': None, '3': 490.0, '4': 675.0}, 'Trentham North/Wallaceville': {'2': 314.0, '1': None, '3': 448.0, '4': None}, 'Kelburn/Aro Valley': {'2': 492.0, '1': 328.0, '3': 668.0, '4': 850.0}, 'Khandallah': {'2': None, '1': 332.0, '3': 633.0, '4': 799.0}, 'Mt Cook': {'2': 512.0, '1': 258.0, '3': 618.0, '4': 709.0}, 'Epuni/Avalon': {'2': 415.0, '1': 171.0, '3': 516.0, '4': None}, 'Tawa/Grenada North': {'2': 406.0, '1': 211.0, '3': 500.0, '4': None}, 'Here