In [10]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so
import plotly as py
import plotly.offline as po
import plotly.graph_objs as go

import helpers as hp

po.init_notebook_mode(connected=True)
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Plot rent change over time

In [2]:
path = hp.DATA_DIR/'rents.csv'
rents = pd.read_csv(path, dtype={'au2001': str})
print(rents['quarter'].unique())
rents.head()


['1993-03-01' '1993-06-01' '1993-09-01' '1993-12-01' '1994-03-01'
 '1994-06-01' '1994-09-01' '1994-12-01' '1995-03-01' '1995-06-01'
 '1995-09-01' '1995-12-01' '1996-03-01' '1996-06-01' '1996-09-01'
 '1996-12-01' '1997-03-01' '1997-06-01' '1997-09-01' '1997-12-01'
 '1998-03-01' '1998-06-01' '1998-09-01' '1998-12-01' '1999-03-01'
 '1999-06-01' '1999-09-01' '1999-12-01' '2000-03-01' '2000-06-01'
 '2000-09-01' '2000-12-01' '2001-03-01' '2001-06-01' '2001-09-01'
 '2001-12-01' '2002-03-01' '2002-06-01' '2002-09-01' '2002-12-01'
 '2003-03-01' '2003-06-01' '2003-09-01' '2003-12-01' '2004-03-01'
 '2004-06-01' '2004-09-01' '2004-12-01' '2005-03-01' '2005-06-01'
 '2005-09-01' '2005-12-01' '2006-03-01' '2006-06-01' '2006-09-01'
 '2006-12-01' '2007-03-01' '2007-06-01' '2007-09-01' '2007-12-01'
 '2008-03-01' '2008-06-01' '2008-09-01' '2008-12-01' '2009-03-01'
 '2009-06-01' '2009-09-01' '2009-12-01' '2010-03-01' '2010-06-01'
 '2010-09-01' '2010-12-01' '2011-03-01' '2011-06-01' '2011-09-01'
 '2011-12-

Unnamed: 0,au2001,property_type,#bedrooms,quarter,rent_count,rent_geo_mean,rent_mean,au_name,territory,region,rental_area
0,500100,House,2,1993-03-01,,,,Awanui,Far North District,Northland,Rural Far North
1,500100,House,3,1993-03-01,,,,Awanui,Far North District,Northland,Rural Far North
2,500100,House,2,1993-06-01,,,,Awanui,Far North District,Northland,Rural Far North
3,500100,House,3,1993-06-01,,,,Awanui,Far North District,Northland,Rural Far North
4,500100,House,2,1993-09-01,,,,Awanui,Far North District,Northland,Rural Far North


In [3]:
# What fraction of rental data do we have by #bedrooms?

def hits(group):
    d = {}
    d['hit_frac'] = group['rent_count'].dropna().shape[0]/group['rent_count'].shape[0]
    return pd.Series(d)

f = hp.aggregate_rents(rents, groupby_cols=('au2001', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('census area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())

f = hp.aggregate_rents(rents, groupby_cols=('rental_area', '#bedrooms'))
cond = f['region'] == 'Auckland'
print('rental area units')
print(f[cond].copy().groupby('#bedrooms').apply(hits).reset_index())


census area units
  #bedrooms  hit_frac
0         1  0.717868
1         2  0.906977
2         3  0.968661
3         4  0.884726
4        5+  0.361022
rental area units
  #bedrooms  hit_frac
0         1  0.939394
1         2  1.000000
2         3  1.000000
3         4  0.979798
4        5+  0.666667


In [77]:
# Probe by territory
# region = 'Wellington'
# cond = rents['region'] == region

f = hp.aggregate_rents(rents, groupby_cols=('quarter', 'territory'))
#f['quarter'] = pd.to_datetime(f['quarter'])



In [80]:
# Plot
for region, ff in f.groupby('region'):
    data = []
    for territory, g in ff.groupby('territory'):
        trace = go.Scatter(
          x=g['quarter'], 
          y=g['rent_geo_mean'].round(), 
          name=territory,
        )
        data.append(trace)

    layout = go.Layout(
       height=600,
       title='{!s} geometric mean rents'.format(region),
    )
    fig = go.Figure(data=data, layout=layout)
    po.iplot(fig)
