In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so
from requests_html import HTMLSession
    
import helpers as hp

%load_ext autoreload
%autoreload 2


In [8]:
def get_rent_data_urls():
    """
    Scrape MBIE's website to get the location of three files of rental bond data.
    Return a list of three dictionaries, each with the following keys and values.
    
    - ``'kind'``: the kind of data; one of 'rent_count', 'rent_mean', 'rent_geo_mean'
    - ``'filename'``: the name of the CSV data file
    - ``'url'``: the URL of the CSV data file
    
    """
    src_url = "https://www.mbie.govt.nz/building-and-energy/tenancy-and-housing/rental-bond-data/"

    # Scrape the HTML page above to get the data urls of interest
    session = HTMLSession()
    r = session.get(src_url)
    urls = [
        link for link in r.html.absolute_links 
        if "Quarterly" in link 
        and link.split("/")[-1].startswith("detailed-")
    ]
    
    # Filter urls to finer set and include some metadata
    return [
        {
            "kind": "rent_count",
            "filename": "detailed-lodged-bonds.csv",
            "url": [u for u in urls if u.endswith("detailed-lodged-bonds.csv")][0],
            "target_path": hp.DATA_DIR/'collected'/"detailed-lodged-bonds.csv",
        },
        {
            "kind": "rent_mean",
            "filename": "detailed-mean-rents.csv",
            "url": [u for u in urls if u.endswith("detailed-mean-rents.csv")][0],
            "target_path": hp.DATA_DIR/'collected'/"detailed-mean-rents.csv",
        },
        {
            "kind": "rent_geo_mean",
            "filename": "detailed-geo-mean-rents.csv",
            "url": [u for u in urls if u.endswith("detailed-geo-mean-rents.csv")][0],
            "target_path": hp.DATA_DIR/'collected'/"detailed-geo-mean-rents.csv",
        },
    ]


# Download rent data

In [10]:
data = get_rent_data_urls()
print(data)
    
for d in data:
    r = requests.get(d['url'])
    if not r.ok:
        print("Failed to get", d['filename'])
    else:
        print("Getting and saving", d['filename'])
        path = d["target_path"]
        with path.open('w') as tgt:
            tgt.write(r.text)


Getting and saving detailed-lodged-bonds.csv
Getting and saving detailed-mean-rents.csv
Getting and saving detailed-geo-mean-rents.csv


# Prepare rent data

In [12]:
# Reshape and merge all rent data sets

def clean(f, kind):
    f = f.copy()
    f = f.rename(columns={
        'SAU': 'au2001',
        'Property_Type': 'property_type',
        'Bedrooms': '#bedrooms'
    })

    # Drop subtotals
    cond = False
    for col in ['au2001', 'property_type', '#bedrooms']:
        cond |= f[col].str.contains('total', case=False)

    f = f[~cond].copy()
    
    # Reshape
    id_vars = ['au2001', 'property_type', '#bedrooms']
    value_vars = [c for c in f.columns if '-' in c]
    f = pd.melt(f, id_vars=id_vars, value_vars=value_vars,
      var_name='quarter', value_name=kind)
    
    return f

frames = []
for d in data:
    path = d["target_path"]
    print(path)
    f = pd.read_csv(path, dtype={'SAU': str})
    frames.append(clean(f, d["kind"]))
    
f = reduce(lambda x, y: pd.merge(x, y), frames)

# Merge in region data
path = hp.get_path('au2001_csv')
g = pd.read_csv(path, dtype={'au2001': str})
f = f.merge(g)

# Write to file
path = hp.get_path('rents')
f.to_csv(path, index=False)
f[f['rent_count'].notnull()].head()


/home/araichev/affordability_nz/data/collected/detailed-lodged-bonds.csv
/home/araichev/affordability_nz/data/collected/detailed-mean-rents.csv
/home/araichev/affordability_nz/data/collected/detailed-geo-mean-rents.csv


Unnamed: 0,au2001,property_type,#bedrooms,quarter,rent_count,rent_mean,rent_geo_mean,au_name,territory,region,rental_area
256,500202,Flat or Apartment,1,1995-03-01,8.0,82.0,82.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
280,500202,Flat or Apartment,1,1996-03-01,5.0,79.0,79.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
284,500202,House,3,1996-03-01,5.0,123.0,118.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
314,500202,House,3,1997-06-01,5.0,154.0,152.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
344,500202,House,3,1998-09-01,7.0,139.0,137.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo


In [13]:
# Print latest 2 quarters
hp.get_latest_quarters(2)

['2018-09-01', '2018-12-01']

# Explore rents

In [14]:
rents = hp.get_data('rents')
print(rents['quarter'].unique())
rents.head()


['1993-03-01' '1993-06-01' '1993-09-01' '1993-12-01' '1994-03-01'
 '1994-06-01' '1994-09-01' '1994-12-01' '1995-03-01' '1995-06-01'
 '1995-09-01' '1995-12-01' '1996-03-01' '1996-06-01' '1996-09-01'
 '1996-12-01' '1997-03-01' '1997-06-01' '1997-09-01' '1997-12-01'
 '1998-03-01' '1998-06-01' '1998-09-01' '1998-12-01' '1999-03-01'
 '1999-06-01' '1999-09-01' '1999-12-01' '2000-03-01' '2000-06-01'
 '2000-09-01' '2000-12-01' '2001-03-01' '2001-06-01' '2001-09-01'
 '2001-12-01' '2002-03-01' '2002-06-01' '2002-09-01' '2002-12-01'
 '2003-03-01' '2003-06-01' '2003-09-01' '2003-12-01' '2004-03-01'
 '2004-06-01' '2004-09-01' '2004-12-01' '2005-03-01' '2005-06-01'
 '2005-09-01' '2005-12-01' '2006-03-01' '2006-06-01' '2006-09-01'
 '2006-12-01' '2007-03-01' '2007-06-01' '2007-09-01' '2007-12-01'
 '2008-03-01' '2008-06-01' '2008-09-01' '2008-12-01' '2009-03-01'
 '2009-06-01' '2009-09-01' '2009-12-01' '2010-03-01' '2010-06-01'
 '2010-09-01' '2010-12-01' '2011-03-01' '2011-06-01' '2011-09-01'
 '2011-12-

Unnamed: 0,au2001,property_type,#bedrooms,quarter,rent_count,rent_mean,rent_geo_mean,au_name,territory,region,rental_area
0,500100,House,2,1993-03-01,,,,Awanui,Far North District,Northland,Rural Far North
1,500100,House,3,1993-03-01,,,,Awanui,Far North District,Northland,Rural Far North
2,500100,House,2,1993-06-01,,,,Awanui,Far North District,Northland,Rural Far North
3,500100,House,3,1993-06-01,,,,Awanui,Far North District,Northland,Rural Far North
4,500100,House,2,1993-09-01,,,,Awanui,Far North District,Northland,Rural Far North


In [15]:
# Slice in time and aggregate 
agg_rents = hp.aggregate_rents(rents, '2018-06-01')
agg_rents.head()

  g = f.groupby(groupby_cols).apply(my_agg).reset_index()
  d['rent_count']


Unnamed: 0,rental_area,#bedrooms,territory,region,rent_count,rent_mean,rent_geo_mean
0,Addington,1,Christchurch City,Canterbury,176.0,215.215909,210.847596
1,Addington,2,Christchurch City,Canterbury,88.0,323.079545,314.383257
2,Addington,3,Christchurch City,Canterbury,64.0,421.828125,415.562506
3,Addington,4,Christchurch City,Canterbury,6.0,472.0,466.0
4,Addington,5+,Christchurch City,Canterbury,0.0,,


In [16]:
f = hp.aggregate_rents(rents, '2018-06-01', groupby_cols=('au2001', '#bedrooms'))
cond = f['region'] == 'Auckland'
f[cond]

Unnamed: 0,au2001,#bedrooms,territory,region,rent_count,rent_mean,rent_geo_mean
286,505300,1,Rodney District,Auckland,0.0,,
287,505300,2,Rodney District,Auckland,5.0,430.000000,429.000000
288,505300,3,Rodney District,Auckland,22.0,439.863636,436.726565
289,505300,4,Rodney District,Auckland,12.0,457.250000,456.459393
290,505300,5+,Rodney District,Auckland,0.0,,
291,505400,1,Rodney District,Auckland,0.0,,
292,505400,2,Rodney District,Auckland,0.0,,
293,505400,3,Rodney District,Auckland,0.0,,
294,505400,4,Rodney District,Auckland,0.0,,
295,505500,1,Rodney District,Auckland,7.0,293.000000,289.000000


In [17]:
# What fraction of rental data do we have by #bedrooms?

def hits(group):
    d = {}
    d['hit_frac'] = group['rent_mean'].count()/group['rent_mean'].shape[0]
    return pd.Series(d)

date = '2018-06-01'
f = hp.aggregate_rents(rents, date, groupby_cols=('au2001', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('census area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())

f = hp.aggregate_rents(rents, date, groupby_cols=('rental_area', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('rental area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())


census area units
  #bedrooms  hit_frac
0         1  0.340557
1         2  0.650289
2         3  0.863248
3         4  0.536023
4        5+  0.127796
rental area units
  #bedrooms  hit_frac
0         1  0.696970
1         2  0.959596
2         3  1.000000
3         4  0.838384
4        5+  0.282828


# Select latest two quarters and slice into regional chunks

In [18]:
# Get latest two quarters of rents
start_date = hp.get_latest_quarters(2)[0]
rents = hp.get_data('rents')
rents = rents[rents['quarter'] >= start_date].copy()

# Create regional slices
for region in hp.REGIONS:
    # Build rents
    region_c = region.capitalize()
    region_rents = rents[rents['region'] == region_c].copy()
    print(region, region_rents['quarter'].unique(), '#rows =', region_rents.shape[0])
    path = hp.get_path('rents', region)
    print('  Saving to', path)
    
    # Create region directory if it does not exist
    if not path.parent.exists():
        path.parent.mkdir()
        
    region_rents.to_csv(path, index=False)
    

auckland ['2018-09-01' '2018-12-01'] #rows = 5600
  Saving to /home/araichev/affordability_nz/data/processed/auckland/rents.csv
canterbury ['2018-09-01' '2018-12-01'] #rows = 2542
  Saving to /home/araichev/affordability_nz/data/processed/canterbury/rents.csv
wellington ['2018-09-01' '2018-12-01'] #rows = 2420
  Saving to /home/araichev/affordability_nz/data/processed/wellington/rents.csv


# JSONize regional rents for the web, grouping by rental area and number of bedrooms

In [19]:
for region in hp.REGIONS:
    region_rents = hp.get_data('rents', region)
    d = hp.build_json_rents(region_rents)
    path = hp.get_path('rents_json', region)
    print('**', region, '\n', d)
    with path.open('w') as tgt:
        json.dump(d, tgt)

    

** auckland 
 {'Albany': {'1': 304.0, '2': 505.0, '3': 587.0, '4': 710.0}, 'Avondale': {'1': 247.0, '2': 436.0, '3': 545.0, '4': 552.0}, 'Balmoral': {'1': 338.0, '2': 498.0, '3': 712.0, '4': 771.0}, 'Beachhaven/Birkdale': {'1': None, '2': 464.0, '3': 564.0, '4': 593.0}, 'Blockhouse Bay/New Windsor': {'1': 238.0, '2': 501.0, '3': 555.0, '4': 672.0}, 'Botony Downs': {'1': None, '2': 465.0, '3': 597.0, '4': 689.0}, 'Browns Bay': {'1': 408.0, '2': 493.0, '3': 607.0, '4': 693.0}, 'Bucklands Beach': {'1': None, '2': 564.0, '3': 630.0, '4': 777.0}, 'Central East': {'1': 396.0, '2': 544.0, '3': 785.0, '4': None}, 'Central West': {'1': 397.0, '2': 514.0, '3': 687.0, '4': None}, 'Chatswood/Birkenhead/Northcote Point': {'1': 359.0, '2': 490.0, '3': 645.0, '4': 685.0}, 'Dannemora': {'1': None, '2': 509.0, '3': 627.0, '4': 696.0}, 'Devonport': {'1': 449.0, '2': 513.0, '3': 747.0, '4': 916.0}, 'East Coast Bays': {'1': None, '2': 532.0, '3': 685.0, '4': 785.0}, 'Eden Terrace': {'1': 441.0, '2': 585.0

** wellington 
 {'Brooklyn': {'1': 275.0, '2': 467.0, '3': 626.0, '4': 795.0}, 'Carterton/South Wairarapa': {'1': None, '2': 318.0, '3': 374.0, '4': None}, 'Eastern Bays': {'1': None, '2': None, '3': 587.0, '4': 698.0}, 'Epuni/Avalon': {'1': 196.0, '2': 444.0, '3': 530.0, '4': None}, 'Hataitai': {'1': 346.0, '2': 455.0, '3': 672.0, '4': 728.0}, 'Heretaunga/Silverstream': {'1': None, '2': 328.0, '3': 480.0, '4': None}, 'Hutt Central/Waterloo': {'1': 294.0, '2': 433.0, '3': 568.0, '4': 724.0}, 'Island Bay/Melrose': {'1': 202.0, '2': 476.0, '3': 660.0, '4': None}, 'Johnsonville/Newlands': {'1': 207.0, '2': 431.0, '3': 530.0, '4': 678.0}, 'Karori': {'1': None, '2': 521.0, '3': 585.0, '4': 750.0}, 'Karori South/Makara': {'1': None, '2': 476.0, '3': 534.0, '4': None}, 'Kelburn/Aro Valley': {'1': 334.0, '2': 493.0, '3': 682.0, '4': 929.0}, 'Khandallah': {'1': 341.0, '2': None, '3': 726.0, '4': 805.0}, 'Kilbirnie/Lyall Bay': {'1': 301.0, '2': 466.0, '3': 621.0, '4': None}, 'Kingston/Happy Vall