# !MANUAL!: Stats NZ Consumer price index (CPI)
No automatic way to download the file.

1. http://archive.stats.govt.nz/infoshare/SearchPage.aspx
2. Search for CPIQ.SE9A. Search options: Look for time series identifier
3. Open CPI All Groups for New Zealand (Qrtly-Mar/Jun/Sep/Dec)
4. Select all groups and all Time
5. Right hand corner choose .csv
6. Put into the ./drop/ folder.

# !MANUAL!: Stats NZ Population stats

1. http://archive.stats.govt.nz/infoshare/SearchPage.aspx
2. Search for DPE054AA. Search options: Include at least one of the keywords
3. Open Estimated Resident Population by Age and Sex (1991+) (Qrtly-Mar/Jun/Sep/Dec)
4. Select:
  1. Estimate Type: As at
  2. Population group: Male and Female
  3. Observations: 0 Years, 1 Years, ..., 89 Years, 90 Years and Over
  4. Time: Select All
5. Right hand corner choose .csv
6. Put into the ./drop/ folder.

# !MANUAL!: Stats NZ Dwellings

1. http://archive.stats.govt.nz/infoshare/SearchPage.aspx
2. Search for DDE002AA. Search options: Include at least one of the keywords
3. Estimated Households in Private Occupied Dwellings, As At Quarter Ended (Qrtly-Mar/Jun/Sep/Dec)
4. Select:
  1. Tenure: Select all
  2. Time: Select all
5. Right hand corner choose .csv
6. Put into the ./drop/ folder.
7. Repeat the same for DDE005AA.

In [5]:
import ipywidgets as widgets
import pandas as pd
import numpy as np
import requests
import os
import glob   

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
    
def prepend_history(history, now):  
    """
    Prepend the history rows to the current rows.
    There should be an intersection.
    It assumes that the current values are more precise. 
    It calculates the correction coefficient on the intersection
    which will be used as a multiplier for all historical value.
    """
    def f(ns, hs):
        k = ns.combine(hs, lambda n, h: n/h).mean()
        return ns.combine(hs, lambda n, h: n if not np.isnan(n) else h*k)
    return now.combine(history, f)    


if not os.path.exists("drop"):
    os.mkdir("drop")
if not os.path.exists("out"):
    os.mkdir("out")
if not os.path.exists("out/download"):
    os.mkdir("out/download")
if not os.path.exists("out/drop"):
    os.mkdir("out/drop")    
if not os.path.exists("out/tmp"):
    os.mkdir("out/tmp")
if not os.path.exists("out/gen"):
    os.mkdir("out/gen") 

In [72]:
from shutil import copyfile

def drop(what, src, dst):
    cpis = glob.glob("drop/" + src)
    src_drop_file = "src/drop/" + dst
    drop_file = "out/drop/" + dst
    if len(cpis) == 1:
        os.remove(drop_file)
        os.remove(src_drop_file)
        copyfile(cpis[0], src_drop_file)
        os.rename(cpis[0], drop_file)
        print("Dropped ./" + drop_file)
    elif not os.path.exists(drop_file):
        copyfile(src_drop_file, drop_file)
        print("Copied old file from ./src/drop ./" + drop_file)
        
drop("Consumer price index (CPI)", "CPI*.csv", "cpi_stats-nz_1914-now.csv")
drop("Population", "DPE*.csv", "population_stats-nz_1991-now.csv")
drop("Occupied Dwellings", "DDE268801*.csv", "dwellings-occupied_stats-nz_1991-now.csv")
drop("All Dwellings", "DDE269101*.csv", "dwellings-all_stats-nz_1991-now.csv")

In [7]:
cpi = pd.read_csv('out/drop/cpi_stats-nz_1914-now.csv', index_col=0, header=1, skipfooter=33, engine='python', parse_dates=True, na_values='0.000000')
cpi = cpi.resample('MS').interpolate(method='linear').fillna(method='ffill')
cpi.index.name = 'Date'
deflator = cpi / cpi.iloc[-1]
deflator.columns = ['Deflator']
deflator.to_csv('out/gen/deflator_1914-now.csv')
print("Generated ./out/gen/deflator_1914-now.csv")

Generated ./out/gen/deflator_1914-now.csv


# Reinz Indexes

https://www.interest.co.nz/charts/real-estate/median-price-reinz

House prices csv files are downloaded for all regions and total in the house-prices folder and merge the csv files in one ./now/house-prices-reinz-1992-now.csv

In [8]:
regions = pd.read_csv('src/const/nz-regions.csv', index_col=0)
housePricesDirName = 'out/download/house-prices'

def downloadRegion(code):
    base_url = 'https://www.interest.co.nz/charts-csv/chart_data/real%20estate/medianhouseprice-$code.csv'
    url = base_url.replace('$code', code)
    r = requests.get(url)
    if r.status_code == 200:
        if not os.path.exists(housePricesDirName):
            os.mkdir(housePricesDirName)
            
        with open(housePricesDirName + '/' + code + '.csv', 'wb') as f:
            f.write(r.content)
            #print("Downloaded region " + code)
    else:
        raise Exception('Cannot download region ' + code + ': ' + r.status_code)

def downloadHousePrices():
    downloadRegion('total')
    for code in regions.index.values:
        downloadRegion(code)

def loadDf(code, name): 
    df = pd.read_csv(housePricesDirName + '/' + code + '.csv', 
                header=None, names=['Date', name], 
                index_col='Date', parse_dates=True)
    df.index = df.index + pd.DateOffset(1)
    return df
    
def combineCsv():
    dfs = [loadDf(index, r['Name']) for index, r in regions.iterrows()]
    dfs.append(loadDf('total', 'New Zealand'))
    housePrices = pd.concat(dfs, axis='columns')
    housePrices.to_csv('out/gen/house-prices_reinz_1992-now.csv')
    print("Created ./out/gen/house-prices_reinz_1992-now.csv")
        

downloadHousePrices()
combineCsv()

Created ./out/gen/house-prices_reinz_1992-now.csv


# Stats NZ: Long term house prices

http://archive.stats.govt.nz/browse_for_stats/economic_indicators/NationalAccounts/long-term-data-series/prices.aspx

G6.1 Property prices and indexes.xls
http://archive.stats.govt.nz/~/media/Statistics/browse-categories/economic-indicators/national-accounts/Long-term%20data%20series/G%20Prices/table-g6-1.xls

Sheet AREMOS
Located ./src/history/table-g6-1.xls

We join it with the REINZ index and save at ./out/gen/house-prices-1962-now.csv


In [9]:
houses1992 = pd.read_csv('out/gen/house-prices_reinz_1992-now.csv', index_col='Date', parse_dates=True)
houses1992 = houses1992[['New Zealand']]
houses1992.columns = ['Nominal House Prices']

houses1962 = pd.read_excel(
    'src/history/house-prices_stats-nz_table-g6-1_1962-2004.xls', sheet_name='AREMOS', 
    parse_dates=True, index_col=0, skipfooter=1,  
)
df = houses1962['PQHPI'].to_frame()
df.columns = ['Nominal House Prices']
houses1962['Nominal House Prices'] = houses1962['PQHDR']
houses1962 = houses1962[['Nominal House Prices']]
houses1962 = prepend_history(df, houses1962).dropna()
houses1962

houses = prepend_history(houses1962, houses1992).dropna()
houses = houses.resample('MS').interpolate(method='linear')
houses = pd.merge_asof(houses, deflator, left_index=True, right_index=True)
houses['Deflated House Prices'] = houses['Nominal House Prices']/houses.Deflator
houses.to_csv('out/gen/house-prices_1962-now.csv')

print("Created ./out/gen/house-prices_1962-now.csv")

Created ./out/gen/house-prices_1962-now.csv


# Population

Stats NZ since 1936

In [10]:
pop1936 = pd.read_csv('src/history/population_stats-nz_1936-1995.csv', index_col=0, 
            header=3, skipfooter=33, engine='python', mangle_dupe_cols=True, parse_dates=True)
pop1936['Population'] = pop1936.sum(axis = 1)
pop1936 = pop1936[['Population']]
pop1991 = pd.read_csv('out/drop/population_stats-nz_1991-now.csv', index_col=0, 
            header=3, skipfooter=121, engine='python', mangle_dupe_cols=True, parse_dates=True)
pop1991['Population'] = pop1991.sum(axis = 1)
pop1991 = pop1991[['Population']]
pop = pop1991.combine_first(pop1936)
pop.index.name = 'Date'
pop = pop.resample('MS').interpolate(method='linear')
pop.to_csv('out/gen/population_stats-nz_1936-now.csv')
print("Created ./out/gen/population_stats-nz_1936-now.csv")

Created ./out/gen/population_stats-nz_1936-now.csv


# Dwellings 

Stats NZ since 1961

Count of unoccupied dwellings look like estimated in centuses and then interpolated. It does not have much value.

In [68]:
dw1961 = pd.read_csv('src/history/dwellings_stats-nz_1961-1997.csv', skiprows=[0, 2], 
                     parse_dates=True, index_col=0, skipfooter=27, engine='python')
dw1961.index = dw1961.index + pd.DateOffset(months=3)
dw1961['Dwellings'] = dw1961.sum(axis=1)
del(dw1961['Unoccupied Dwellings'])
dw1991_occ = pd.read_csv('out/drop/dwellings-occupied_stats-nz_1991-now.csv', skiprows=2, 
                         parse_dates=True, index_col=0, skipfooter=41, engine='python',
                         usecols=[0,1], names=['Date', 'Occupied Dwellings']
                        )
dw1991_all = pd.read_csv('out/drop/dwellings-all_stats-nz_1991-now.csv', skiprows=2, 
                         parse_dates=True, index_col=0, skipfooter=37, engine='python',
                         usecols=[0,1], names=['Date', 'Dwellings']
                        )

dw1991 = dw1991_all.join(dw1991_occ).dropna()
dwellings = prepend_history(dw1961, dw1991)
dwellings = dwellings.resample('MS').interpolate(method='linear')
dwellings = pd.merge_asof(dwellings, pop, left_index=True, right_index=True)
dwellings['People per dwelling'] = dwellings['Population']/dwellings['Occupied Dwellings']
dwellings['Occupancy rate'] = dwellings['Occupied Dwellings']/dwellings['Dwellings']
dwellings.to_csv('out/gen/dwellings_stats-nz_1961-now.csv')
print("Created ./out/gen/dwellings_stats-nz_1961-now.csv")

Created ./out/gen/dwellings_stats-nz_1961-now.csv


Unnamed: 0,Dwellings,Occupied Dwellings,Population,People per dwelling,Occupancy rate
1961-04-01,6.997449e+05,6.721848e+05,2.474855e+06,3.681807,0.960614
1961-05-01,7.014100e+05,6.737317e+05,2.479413e+06,3.680120,0.960539
1961-06-01,7.030751e+05,6.752786e+05,2.483972e+06,3.678440,0.960464
1961-07-01,7.047402e+05,6.768255e+05,2.488530e+06,3.676767,0.960390
1961-08-01,7.064053e+05,6.783724e+05,2.493088e+06,3.675103,0.960316
...,...,...,...,...,...
2019-03-01,1.893167e+06,1.766433e+06,4.918860e+06,2.784628,0.933057
2019-04-01,1.895700e+06,1.768500e+06,4.922660e+06,2.783523,0.932901
2019-05-01,1.898267e+06,1.770567e+06,4.929280e+06,2.784013,0.932728
2019-06-01,1.900833e+06,1.772633e+06,4.935900e+06,2.784501,0.932556


# MBIE: Rental Bond Data
https://www.mbie.govt.nz/building-and-energy/tenancy-and-housing/rental-bond-data/

Mean rent by region from 1993

To get more historical rents we get src/history/cpi-groups_stats-nz_1975-1999.csv and Dwelling Rentals serias.

The result is saved out/gen/rent_1975-now.csv

In [36]:
url = 'https://www.mbie.govt.nz/assets/Data-Files/Building-and-construction/Tenancy-and-housing/Rental-bond-data/Region/region-mean-rents.csv'
r = requests.get(url)
if r.status_code == 200:
    with open('out/download/rent-region_mbie_1993-now.csv', 'wb') as f:
        f.write(r.content)
        print("Downloaded out/download/rent-region_mbie_1993-now.csv")
else:
    raise Exception('Cannot download region-mean-rents.csv: ' + r.status_code)

Downloaded out/download/rent-region_mbie_1993-now.csv


In [36]:
rent1975 = pd.read_csv('src/history/cpi-groups_stats-nz_1975-1999.csv', header=2, skipfooter=28, engine='python', parse_dates=True, index_col=0)
rent1975 = rent1975[['Dwelling Rentals**']]
rent1975.columns = ['Nominal Rent']
rent1975 = rent1975.resample('MS').interpolate(method='linear')

rent1993 = pd.read_csv('out/download/rent-region_mbie_1993-now.csv', parse_dates=True, index_col=0)
rent1993 = rent1993[['National Total']]
rent1993.columns = ['Nominal Rent']

rent = prepend_history(rent1975, rent1993)
rent = pd.merge_asof(rent, deflator, left_index=True, right_index=True)
rent['Deflated Rent'] = rent['Nominal Rent']/rent.Deflator
rent = rent[['Nominal Rent', 'Deflated Rent', 'Deflator']].dropna()
rent.to_csv('out/gen/rent_1975-now.csv')
print("Created ./out/gen/rent_1975-now.csv")

Created ./out/gen/rent_1975-now.csv


# GDP and private cosumption

RBNZ GDP since 1987
Stats NZ Long term data: http://archive.stats.govt.nz/browse_for_stats/economic_indicators/NationalAccounts/long-term-data-series/prices.aspx

Table E1 for GDP

Table E4 for private consumption

In [40]:
url = 'https://www.rbnz.govt.nz/-/media/ReserveBank/Files/Statistics/tables/m5/hm5.xlsx'
r = requests.get(url)
if r.status_code == 200:
    with open('out/download/gdp_rbnz_1987-now.xlsx', 'wb') as f:
        f.write(r.content)
        print("Downloaded ./out/download/gdp_rbnz_1987-now.xlsx")
else:
    raise Exception('Cannot download gdp_rbnz_1987-now.xlsx: ' + r.status_code)

Downloaded ./out/download/gdp_rbnz_1987-now.xlsx


In [38]:
gdp1987 = pd.read_excel('out/download/gdp_rbnz_1987-now.xlsx', 
                        header=None, skiprows=5, 
                        parse_dates=True, index_col=0,
                        usecols="A,F,N", names=["Date", "GDP", "Consumption"]
                       )
#gdp1987.index = gdp1987.index + pd.DateOffset(months=-3)
gdp1987 = gdp1987.rolling(4).sum().iloc[3::4]
gdp1987.index = gdp1987.index + pd.DateOffset(days=1)

cons1949 = pd.read_excel(
        'src/history/private-consumption_stats-nz_table-e4_1949-2004.xls', 
         header=None, skiprows=9,
         parse_dates=True, index_col=0, 
         usecols="A,N", names=["Date", "Consumption"]
    ).dropna()
cons1949.index = cons1949.index + pd.DateOffset(months=3)
cons1949


gdp1860 = pd.read_excel(
        'src/history/gdp_stats-nz_table-e1_1860-2004.xls', 
         header=None, skiprows=9,
         parse_dates=True, index_col=0, 
         usecols="A,Z", names=["Date", "GDP"]
    ).dropna()
gdp1860.index = gdp1860.index + pd.DateOffset(months=3)
gdp1860


gdp = prepend_history(cons1949, gdp1987)
gdp = prepend_history(gdp1860, gdp)
gdp = pd.merge_asof(gdp, pop, left_index=True, right_index=True)
gdp = pd.merge_asof(gdp, deflator, left_index=True, right_index=True)
gdp["Deflated GDP"] = gdp["GDP"]/gdp.Deflator
gdp["Deflated Consumption"] = gdp["Consumption"]/gdp.Deflator
mln = 1000000
gdp["GDP per capita"] = gdp["GDP"]*mln/gdp.Population
gdp["Deflated GDP per capita"] = gdp["Deflated GDP"]*mln/gdp.Population
gdp["Consumption per capita"] = gdp["Consumption"]*mln/gdp.Population
gdp["Deflated Consumption per capita"] = gdp["Deflated Consumption"]*mln/gdp.Population

gdp.to_csv("out/gen/gdp_1860-now.csv")
print("Created ./out/gen/gdp_1860-now.csv")

Created ./out/gen/gdp_1860-now.csv
