In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so

import helpers as hp

%load_ext autoreload
%autoreload 2


# Prepare rent data

In [5]:
# Reshape and merge all rent data sets

def clean(f, name):
    f = f.copy()
    f = f.rename(columns={
        'SAU': 'au2001',
        'Property_Type': 'property_type',
        'Bedrooms': '#bedrooms'
    })

    # Drop subtotals
    cond = False
    for col in ['au2001', 'property_type', '#bedrooms']:
        cond |= f[col].str.contains('total', case=False)

    f = f[~cond].copy()
    
    # Reshape
    id_vars = ['au2001', 'property_type', '#bedrooms']
    value_vars = [c for c in f.columns if '-' in c]
    f = pd.melt(f, id_vars=id_vars, value_vars=value_vars,
      var_name='quarter', value_name=name)
    
    return f

paths = [
    hp.DATA_DIR/'raw'/'Detailed Bonds Lodged.csv',
    hp.DATA_DIR/'raw'/'Detailed Geomean Rents.csv',
    hp.DATA_DIR/'raw'/'Detailed Mean Rents.csv',
]
names = ['rent_count', 'rent_geo_mean', 'rent_mean']
frames = []
for path, name in zip(paths, names):
    f = pd.read_csv(path, dtype={'SAU': str})
    frames.append(clean(f, name))
    
f = reduce(lambda x, y: pd.merge(x, y), frames)

# Merge in region data
path = hp.DATA_DIR/'au2001.csv'
g = pd.read_csv(path, dtype={'au2001': str})
f = f.merge(g)

# Write to file
path = hp.DATA_DIR/'rents.csv'
f.to_csv(str(path), index=False)
f[f['rent_count'].notnull()].head()

Unnamed: 0,au2001,property_type,#bedrooms,quarter,rent_count,rent_geo_mean,rent_mean,au_name,territory,region,rental_area
236,500202,Flat or Apartment,1,1995-03-01,8.0,82.0,82.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
256,500202,Flat or Apartment,1,1996-03-01,5.0,79.0,79.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
259,500202,House,3,1996-03-01,5.0,118.0,123.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
284,500202,House,3,1997-06-01,5.0,152.0,154.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo
309,500202,House,3,1998-09-01,7.0,137.0,139.0,Mangonui East,Far North District,Northland,Mangonui/Kaeo


# Explore rents

In [7]:
path = hp.DATA_DIR/'rents.csv'
rents = pd.read_csv(path, dtype={'au2001': str})
print(rents['quarter'].unique())
rents.head()


['1993-03-01' '1993-06-01' '1993-09-01' '1993-12-01' '1994-03-01'
 '1994-06-01' '1994-09-01' '1994-12-01' '1995-03-01' '1995-06-01'
 '1995-09-01' '1995-12-01' '1996-03-01' '1996-06-01' '1996-09-01'
 '1996-12-01' '1997-03-01' '1997-06-01' '1997-09-01' '1997-12-01'
 '1998-03-01' '1998-06-01' '1998-09-01' '1998-12-01' '1999-03-01'
 '1999-06-01' '1999-09-01' '1999-12-01' '2000-03-01' '2000-06-01'
 '2000-09-01' '2000-12-01' '2001-03-01' '2001-06-01' '2001-09-01'
 '2001-12-01' '2002-03-01' '2002-06-01' '2002-09-01' '2002-12-01'
 '2003-03-01' '2003-06-01' '2003-09-01' '2003-12-01' '2004-03-01'
 '2004-06-01' '2004-09-01' '2004-12-01' '2005-03-01' '2005-06-01'
 '2005-09-01' '2005-12-01' '2006-03-01' '2006-06-01' '2006-09-01'
 '2006-12-01' '2007-03-01' '2007-06-01' '2007-09-01' '2007-12-01'
 '2008-03-01' '2008-06-01' '2008-09-01' '2008-12-01' '2009-03-01'
 '2009-06-01' '2009-09-01' '2009-12-01' '2010-03-01' '2010-06-01'
 '2010-09-01' '2010-12-01' '2011-03-01' '2011-06-01' '2011-09-01'
 '2011-12-

Unnamed: 0,au2001,property_type,#bedrooms,quarter,rent_count,rent_geo_mean,rent_mean,au_name,territory,region,rental_area
0,500100,House,2,1993-03-01,,,,Awanui,Far North District,Northland,Rural Far North
1,500100,House,3,1993-03-01,,,,Awanui,Far North District,Northland,Rural Far North
2,500100,House,2,1993-06-01,,,,Awanui,Far North District,Northland,Rural Far North
3,500100,House,3,1993-06-01,,,,Awanui,Far North District,Northland,Rural Far North
4,500100,House,2,1993-09-01,,,,Awanui,Far North District,Northland,Rural Far North


In [8]:
# Slice in time and aggregate 

def hits(group):
    d = {}
    d['hit_frac'] = group['rent_count'].dropna().shape[0]/group['rent_count'].shape[0]
    return pd.Series(d)

agg_rents = hp.aggregate_rents(rents, '2017-03-01')
agg_rents.head()

Unnamed: 0,rental_area,#bedrooms,region,rent_count,rent_geo_mean,rent_mean,territory
0,Addington,1,Canterbury,123.0,210.047781,214.154472,Christchurch City
1,Addington,2,Canterbury,79.0,319.482861,325.822785,Christchurch City
2,Addington,3,Canterbury,73.0,403.493954,407.630137,Christchurch City
3,Addington,4,Canterbury,5.0,488.0,494.0,Christchurch City
4,Addington,5+,Canterbury,,,,Christchurch City


In [11]:
# What fraction of rental data do we have by #bedrooms?

date = '2017-03-01'
f = hp.aggregate_rents(rents, date, groupby_cols=('au2001', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('census area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())

f = hp.aggregate_rents(rents, date, groupby_cols=('rental_area', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('rental area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())


census area units
  #bedrooms  hit_frac
0         1  0.341693
1         2  0.613372
2         3  0.817664
3         4  0.544669
4        5+  0.099042
rental area units
  #bedrooms  hit_frac
0         1  0.707071
1         2  0.939394
2         3  1.000000
3         4  0.858586
4        5+  0.232323


# Prepare regional slices of data

In [12]:
# Use full history rental data

path = hp.DATA_DIR/'rents.csv'
rents = pd.read_csv(path, dtype={'au2001': str})

for region in hp.REGIONS:
    root = hp.DATA_DIR/region
    if not root.exists():
        root.mkdir()
        
    region_c = region.capitalize()
       
    # Rents slice
    f = rents[rents['region'] == region_c].copy()
    path = root/'rents.csv'
    f.to_csv(str(path), index=False)
    