# !MANUAL!: Stats NZ Infoshare
No automatic way to download the file.

### Steps:
1. http://archive.stats.govt.nz/infoshare/SearchPage.aspx
2. Search for \<Table Code>
3. Open and select all or as described
5. Right hand corner choose .csv
6. Put into the ./drop/ folder.

### Tables: 
1. CPI009AA: CPI All Groups for New Zealand (Qrtly-Mar/Jun/Sep/Dec)
2. CPI017AA: CPI Non-standard All Groups Less/Plus Selected Groupings for New Zealand (Qrtly-Mar/Jun/Sep/Dec)
3. DPE054AA: Estimated Resident Population by Age and Sex (1991+) (Qrtly-Mar/Jun/Sep/Dec)
  1. Estimate Type: As at
  2. Population group: Male and Female
  3. Observations: 0 Years, 1 Years, ..., 89 Years, 90 Years and Over
  4. Time: Select All
4. DDE002AA: Estimated Households in Private Occupied Dwellings, As At Quarter Ended (Qrtly-Mar/Jun/Sep/Dec)
5. DDE005AA: Estimated Private Dwellings, As At Quarter Ended (Qrtly-Mar/Jun/Sep/Dec).
6. SNE205AA: Households, Income & Outlay account, Current Prices (Annual-Mar)
7. SNE095AA: Group: National Accounts - SNA 2008 - SNE

In [21]:
import ipywidgets as widgets
import pandas as pd
import numpy as np
import requests
import os
import glob   

import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
    
def prepend_history(history, now, kfunc=np.mean):  
    """
    Prepend the history rows to the current rows.
    There should be an intersection.
    It assumes that the current values are more precise. 
    It calculates the correction coefficient on the intersection
    which will be used as a multiplier for all historical value.
    """
    def f(ns, hs):
        ks = ns.combine(hs, lambda n, h: n/h)
        if kfunc == 'oldest':
            k = ks.dropna().head(1).mean()
        else:
            k = kfunc(ks)
        if np.isnan(k):
            return ns.combine(hs, lambda n, h: n if not np.isnan(n) else h)
        else:
            return ns.combine(hs, lambda n, h: n if not np.isnan(n) else h*k)
    return now.combine(history, f)    


if not os.path.exists("drop"):
    os.mkdir("drop")
if not os.path.exists("out"):
    os.mkdir("out")
if not os.path.exists("out/download"):
    os.mkdir("out/download")
if not os.path.exists("out/drop"):
    os.mkdir("out/drop")    
if not os.path.exists("out/tmp"):
    os.mkdir("out/tmp")
if not os.path.exists("out/gen"):
    os.mkdir("out/gen") 

In [28]:
from shutil import copyfile

def drop(what, src, dst):
    cpis = glob.glob("drop/" + src)
    src_drop_file = "src/drop/" + dst
    drop_file = "out/drop/" + dst
    if len(cpis) == 1:
        if os.path.exists(drop_file):
            os.remove(drop_file)
        if os.path.exists(src_drop_file):
            os.remove(src_drop_file)
        copyfile(cpis[0], src_drop_file)
        os.rename(cpis[0], drop_file)
        print("Dropped ./" + drop_file)
    elif not os.path.exists(drop_file):
        copyfile(src_drop_file, drop_file)
        print("Copied old file from ./src/drop ./" + drop_file)
        
drop("Consumer price index (CPI)", "CPI316601*.csv", "cpi_stats-nz_1914-now.csv")
drop("Non-standard Consumer price index (CPI)", "CPI318001*.csv", "cpi-nstd_stats-nz_1999-now.csv")
drop("Population", "DPE*.csv", "population_stats-nz_1991-now.csv")
drop("Occupied Dwellings", "DDE268801*.csv", "dwellings-occupied_stats-nz_1991-now.csv")
drop("All Dwellings", "DDE269101*.csv", "dwellings-all_stats-nz_1991-now.csv")
drop("Disposable Income", "SNE532901*.csv", "income_stats-nz_1987-now.csv")
drop("Disposable Income 1972", "SNE449101*.csv", "income_stats-nz_1972-now.csv")

Dropped ./out/drop/income_stats-nz_1972-now.csv


# CPI Consumers price index (Deflator)

!TODO: Purchase of new houses and rent is included in CPI and it needs to be adjusted

In [20]:
cpi = pd.read_csv('out/drop/cpi_stats-nz_1914-now.csv', index_col=0, header=1, skipfooter=33, engine='python', parse_dates=True, na_values='0.000000')
cpi.index.name = 'Date'
cpi.columns = ['CPI']
cpi['CPI - Housing'] = cpi['CPI']

cpi_nstd = pd.read_csv('out/drop/cpi-nstd_stats-nz_1999-now.csv', index_col=0, header=1, skipfooter=37, engine='python', parse_dates=True, na_values='..')
cpi_nstd = cpi_nstd[['All groups less housing and household utilities group']].dropna()
cpi_nstd.columns = ['CPI - Housing']

cpi = prepend_history(history = cpi, now = cpi_nstd, kfunc='oldest')
cpi = cpi / cpi.iloc[-1]
cpi['Deflator'] = cpi['CPI - Housing']
cpi = cpi.resample('MS').interpolate(method='linear').fillna(method='ffill')


deflator = cpi[['Deflator']]

cpi.to_csv('out/gen/cpi_1914-now.csv')
print("Generated ./out/gen/cpi_1914-now.csv")

nan
Series([], Name: CPI, dtype: float64)
Series([], Name: CPI, dtype: float64)
nan
1.0393026330380808
1999-04-01    1.072091
1999-07-01    1.070335
1999-10-01    1.071973
2000-01-01    1.070799
2000-04-01    1.073354
                ...   
2018-07-01    0.994141
2018-10-01    0.993171
2019-01-01    0.991228
2019-04-01    0.990310
2019-07-01    0.989413
Name: CPI - Housing, Length: 82, dtype: float64
1999-04-01    1.072091
Name: CPI - Housing, dtype: float64
1.0720913019960057
Generated ./out/gen/cpi_1914-now.csv


# Reinz Indexes

https://www.interest.co.nz/charts/real-estate/median-price-reinz

House prices csv files are downloaded for all regions and total in the house-prices folder and merge the csv files in one ./now/house-prices-reinz-1992-now.csv

In [4]:
regions = pd.read_csv('src/const/nz-regions.csv', index_col=0)
housePricesDirName = 'out/download/house-prices'

def downloadRegion(code):
    base_url = 'https://www.interest.co.nz/charts-csv/chart_data/real%20estate/medianhouseprice-$code.csv'
    url = base_url.replace('$code', code)
    r = requests.get(url)
    if r.status_code == 200:
        if not os.path.exists(housePricesDirName):
            os.mkdir(housePricesDirName)
            
        with open(housePricesDirName + '/' + code + '.csv', 'wb') as f:
            f.write(r.content)
            #print("Downloaded region " + code)
    else:
        raise Exception('Cannot download region ' + code + ': ' + r.status_code)

def downloadHousePrices():
    downloadRegion('total')
    for code in regions.index.values:
        downloadRegion(code)

def loadDf(code, name): 
    df = pd.read_csv(housePricesDirName + '/' + code + '.csv', 
                header=None, names=['Date', name], 
                index_col='Date', parse_dates=True)
    df.index = df.index + pd.DateOffset(1)
    return df
    
def combineCsv():
    dfs = [loadDf(index, r['Name']) for index, r in regions.iterrows()]
    dfs.append(loadDf('total', 'New Zealand'))
    housePrices = pd.concat(dfs, axis='columns')
    housePrices.to_csv('out/gen/house-prices_reinz_1992-now.csv')
    print("Created ./out/gen/house-prices_reinz_1992-now.csv")
        

downloadHousePrices()
combineCsv()

Created ./out/gen/house-prices_reinz_1992-now.csv


# Stats NZ: Long term house prices

http://archive.stats.govt.nz/browse_for_stats/economic_indicators/NationalAccounts/long-term-data-series/prices.aspx

G6.1 Property prices and indexes.xls
http://archive.stats.govt.nz/~/media/Statistics/browse-categories/economic-indicators/national-accounts/Long-term%20data%20series/G%20Prices/table-g6-1.xls

Sheet AREMOS
Located ./src/history/table-g6-1.xls

We join it with the REINZ index and save at ./out/gen/house-prices-1962-now.csv


In [5]:
houses1992 = pd.read_csv('out/gen/house-prices_reinz_1992-now.csv', index_col='Date', parse_dates=True)
houses1992 = houses1992[['New Zealand']]
houses1992.columns = ['Nominal House Prices']

houses1962 = pd.read_excel(
    'src/history/house-prices_stats-nz_table-g6-1_1962-2004.xls', sheet_name='AREMOS', 
    parse_dates=True, index_col=0, skipfooter=1,  
)
df = houses1962['PQHPI'].to_frame()
df.columns = ['Nominal House Prices']
houses1962['Nominal House Prices'] = houses1962['PQHDR']
houses1962 = houses1962[['Nominal House Prices']]
houses1962 = prepend_history(df, houses1962).dropna()
houses1962

houses = prepend_history(houses1962, houses1992).dropna()
houses = houses.resample('MS').interpolate(method='linear')
houses = pd.merge_asof(houses, deflator, left_index=True, right_index=True)
houses['Deflated House Prices'] = houses['Nominal House Prices']/houses.Deflator
houses.to_csv('out/gen/house-prices_1962-now.csv')

print("Created ./out/gen/house-prices_1962-now.csv")

Created ./out/gen/house-prices_1962-now.csv


# Population

Stats NZ since 1936

In [6]:
pop1936 = pd.read_csv('src/history/population_stats-nz_1936-1995.csv', index_col=0, 
            header=3, skipfooter=33, engine='python', mangle_dupe_cols=True, parse_dates=True)
pop1936['Population'] = pop1936.sum(axis = 1)
pop1936 = pop1936[['Population']]
pop1991 = pd.read_csv('out/drop/population_stats-nz_1991-now.csv', index_col=0, 
            header=3, skipfooter=121, engine='python', mangle_dupe_cols=True, parse_dates=True)
pop1991['Population'] = pop1991.sum(axis = 1)
pop1991 = pop1991[['Population']]
pop = pop1991.combine_first(pop1936)
pop.index.name = 'Date'
pop = pop.resample('MS').interpolate(method='linear')
pop.to_csv('out/gen/population_stats-nz_1936-now.csv')
print("Created ./out/gen/population_stats-nz_1936-now.csv")

Created ./out/gen/population_stats-nz_1936-now.csv


# Dwellings 

Stats NZ since 1961

Count of unoccupied dwellings look like estimated in centuses and then interpolated. It does not have much value.

In [7]:
dw1961 = pd.read_csv('src/history/dwellings_stats-nz_1961-1997.csv', skiprows=[0, 2], 
                     parse_dates=True, index_col=0, skipfooter=27, engine='python')
dw1961.index = dw1961.index + pd.DateOffset(months=3)
dw1961['Dwellings'] = dw1961.sum(axis=1)
del(dw1961['Unoccupied Dwellings'])
dw1991_occ = pd.read_csv('out/drop/dwellings-occupied_stats-nz_1991-now.csv', skiprows=2, 
                         parse_dates=True, index_col=0, skipfooter=41, engine='python',
                         usecols=[0,1, 3], names=['Date', 'Occupied Dwellings', 'Rented Dwellings']
                        )
dw1991_all = pd.read_csv('out/drop/dwellings-all_stats-nz_1991-now.csv', skiprows=2, 
                         parse_dates=True, index_col=0, skipfooter=37, engine='python',
                         usecols=[0,1], names=['Date', 'Dwellings']
                        )

dw1991 = dw1991_all.join(dw1991_occ).dropna()
dwellings = prepend_history(dw1961, dw1991)
dwellings = dwellings.resample('MS').interpolate(method='linear')
dwellings = pd.merge_asof(dwellings, pop, left_index=True, right_index=True)
dwellings['People per Dwelling'] = dwellings['Population']/dwellings['Occupied Dwellings']
dwellings['Occupancy Rate'] = dwellings['Occupied Dwellings']/dwellings['Dwellings']
dwellings['Rented Dwellings Rate'] = dwellings['Rented Dwellings']/dwellings['Occupied Dwellings']
dwellings.to_csv('out/gen/dwellings_stats-nz_1961-now.csv')
print("Created ./out/gen/dwellings_stats-nz_1961-now.csv")

Created ./out/gen/dwellings_stats-nz_1961-now.csv


# MBIE: Rental Bond Data
https://www.mbie.govt.nz/building-and-energy/tenancy-and-housing/rental-bond-data/

Mean rent by region from 1993

To get more historical rents we get src/history/cpi-groups_stats-nz_1975-1999.csv and Dwelling Rentals serias.

The result is saved out/gen/rent_1975-now.csv

In [8]:
url = 'https://www.mbie.govt.nz/assets/Data-Files/Building-and-construction/Tenancy-and-housing/Rental-bond-data/Region/region-mean-rents.csv'
r = requests.get(url)
if r.status_code == 200:
    with open('out/download/rent-region_mbie_1993-now.csv', 'wb') as f:
        f.write(r.content)
        print("Downloaded out/download/rent-region_mbie_1993-now.csv")
else:
    raise Exception('Cannot download region-mean-rents.csv: ' + r.status_code)

Downloaded out/download/rent-region_mbie_1993-now.csv


In [9]:
rent1975 = pd.read_csv('src/history/cpi-groups_stats-nz_1975-1999.csv', header=2, skipfooter=28, engine='python', parse_dates=True, index_col=0)
rent1975 = rent1975[['Dwelling Rentals**']]
rent1975.columns = ['Nominal Rent']
rent1975 = rent1975.resample('MS').interpolate(method='linear')

rent1993 = pd.read_csv('out/download/rent-region_mbie_1993-now.csv', parse_dates=True, index_col=0)
rent1993 = rent1993[['National Total']]
rent1993.columns = ['Nominal Rent']

rent = prepend_history(rent1975, rent1993)
rent = pd.merge_asof(rent, deflator, left_index=True, right_index=True)
rent['Deflated Rent'] = rent['Nominal Rent']/rent.Deflator
rent = rent[['Nominal Rent', 'Deflated Rent', 'Deflator']].dropna()
rent.to_csv('out/gen/rent_1975-now.csv')
print("Created ./out/gen/rent_1975-now.csv")

Created ./out/gen/rent_1975-now.csv


# GDP and private cosumption

RBNZ GDP since 1987
Stats NZ Long term data: http://archive.stats.govt.nz/browse_for_stats/economic_indicators/NationalAccounts/long-term-data-series/prices.aspx

Table E1 for GDP

Table E4 for private consumption

Consumption and GDP includes not only rent but also imputed rent by owner occupied houses:
http://datainfoplus.stats.govt.nz/Item/example.org/87a4fec5-24a8-48fd-a261-3fad8495136e

!TODO: It looks like a part of morgages (here pricipal is mentioned) are included in consumption:
http://archive.stats.govt.nz/browse_for_stats/people_and_communities/Households/HouseholdEconomicSurvey_HOTPYeJun07/Commentary.aspx

Detalization can be found in national accounts: https://www.stats.govt.nz/information-releases/national-accounts-income-and-expenditure-year-ended-march-2019

It's available on infoshare.

In [10]:
url = 'https://www.rbnz.govt.nz/-/media/ReserveBank/Files/Statistics/tables/m5/hm5.xlsx'
r = requests.get(url)
if r.status_code == 200:
    with open('out/download/gdp_rbnz_1987-now.xlsx', 'wb') as f:
        f.write(r.content)
        print("Downloaded ./out/download/gdp_rbnz_1987-now.xlsx")
else:
    raise Exception('Cannot download gdp_rbnz_1987-now.xlsx: ' + r.status_code)

Downloaded ./out/download/gdp_rbnz_1987-now.xlsx


In [11]:
gdp1987 = pd.read_excel('out/download/gdp_rbnz_1987-now.xlsx', 
                        header=None, skiprows=5, 
                        parse_dates=True, index_col=0,
                        usecols="A,F,N", names=["Date", "GDP", "Consumption"]
                       )
#gdp1987.index = gdp1987.index + pd.DateOffset(months=-3)
gdp1987 = gdp1987.rolling(4).sum().iloc[3::4]
gdp1987.index = gdp1987.index + pd.DateOffset(days=1)

cons1949 = pd.read_excel(
        'src/history/private-consumption_stats-nz_table-e4_1949-2004.xls', 
         header=None, skiprows=9,
         parse_dates=True, index_col=0, 
         usecols="A,N", names=["Date", "Consumption"]
    ).dropna()
cons1949.index = cons1949.index + pd.DateOffset(months=3)
cons1949


gdp1860 = pd.read_excel(
        'src/history/gdp_stats-nz_table-e1_1860-2004.xls', 
         header=None, skiprows=9,
         parse_dates=True, index_col=0, 
         usecols="A,Z", names=["Date", "GDP"]
    ).dropna()
gdp1860.index = gdp1860.index + pd.DateOffset(months=3)
gdp1860


gdp = prepend_history(cons1949, gdp1987)
gdp = prepend_history(gdp1860, gdp)

mln = 1000000
gdp = gdp*mln
gdp = pd.merge_asof(gdp, pop, left_index=True, right_index=True)
gdp = pd.merge_asof(gdp, deflator, left_index=True, right_index=True)
gdp = pd.merge_asof(gdp, rent[["Nominal Rent"]], left_index=True, right_index=True)
gdp = pd.merge_asof(gdp, dwellings[["Occupied Dwellings"]], left_index=True, right_index=True)


gdp["Rent"] = gdp["Nominal Rent"]*365/7*gdp["Occupied Dwellings"]
del(gdp["Nominal Rent"])
#gdp["Consumption - Rent"] = gdp["Consumption"] - gdp["Rent"]
#gdp["GDP - Rent"] = gdp["GDP"] - gdp["Rent"]

gdp["Deflated GDP"] = gdp["GDP"]/gdp.Deflator
gdp["Deflated Consumption"] = gdp["Consumption"]/gdp.Deflator
#gdp["Deflated GDP - Rent"] = gdp["GDP - Rent"]/gdp.Deflator
#gdp["Deflated Consumption - Rent"] = gdp["Consumption - Rent"]/gdp.Deflator

#gdp["GDP per capita"] = gdp["GDP"]*mln/gdp.Population
#gdp["Deflated GDP per capita"] = gdp["Deflated GDP"]*mln/gdp.Population
#gdp["Deflated GDP - Rent per capita"] = gdp["Deflated GDP - Rent"]*mln/gdp.Population
#gdp["Consumption per capita"] = gdp["Consumption"]*mln/gdp.Population
#gdp["Deflated Consumption per capita"] = gdp["Deflated Consumption"]*mln/gdp.Population
#gdp["Deflated Consumption - Rent per capita"] = gdp["Deflated Consumption - Rent"]*mln/gdp.Population

gdp.to_csv("out/gen/gdp_1860-now.csv")
print("Created ./out/gen/gdp_1860-now.csv")

gdp

Created ./out/gen/gdp_1860-now.csv


Unnamed: 0_level_0,Consumption,GDP,Population,Deflator,Occupied Dwellings,Rent,Deflated GDP,Deflated Consumption
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1860-04-01,,9.521243e+06,,,,,,
1861-04-01,,1.202683e+07,,,,,,
1862-04-01,,1.523399e+07,,,,,,
1863-04-01,,2.204919e+07,,,,,,
1864-04-01,,2.565724e+07,,,,,,
...,...,...,...,...,...,...,...,...
2015-04-01,1.394520e+11,2.420910e+11,4585510.0,0.965261,1680700.0,3.461642e+10,2.508038e+11,1.444708e+11
2016-04-01,1.459560e+11,2.543530e+11,4678100.0,0.960259,1699900.0,3.643007e+10,2.648795e+11,1.519964e+11
2017-04-01,1.557400e+11,2.702320e+11,4765420.0,0.972763,1721900.0,3.860746e+10,2.777985e+11,1.601007e+11
2018-04-01,1.641610e+11,2.857550e+11,4840670.0,0.982490,1744600.0,4.084482e+10,2.908477e+11,1.670866e+11


# Disposable Income

Before 1987 there is no data for household income. We can get from salaries.

In [44]:
income1987 = pd.read_csv('out/drop/income_stats-nz_1987-now.csv', skiprows=[0, 1, 3, 4], skipfooter=29, engine='python', parse_dates=True, index_col=0)
income1987 = income1987[['Net disposable income']]
income1987.columns = ['Income']

income1972 = pd.read_csv('out/drop/income_stats-nz_1972-now.csv', skiprows=1, skipfooter=29, engine='python', parse_dates=True, index_col=0)
income1972 = income1972[['Compensation Of Employees - received']]
income1972.columns = ['Income']

income = prepend_history(income1972, income1987)
income.index = income.index + pd.DateOffset(months=3)
income = income*1000000
income = pd.merge_asof(income, pop, left_index=True, right_index=True)
income = pd.merge_asof(income, deflator, left_index=True, right_index=True)
income = pd.merge_asof(income, rent[["Nominal Rent", "Deflated Rent"]], left_index=True, right_index=True)
income = pd.merge_asof(income, dwellings[["Occupied Dwellings"]], left_index=True, right_index=True)

income['Deflated Income'] = income['Income']/income.Deflator
income.to_csv("out/gen/income_stats-nz_1972-now.csv")
print("Created ./out/gen/income_stats-nz_1972-now.csv")

Created ./out/gen/income_stats-nz_1972-now.csv
