## Residential Property Price Register Ireland dataset

This notebook shows how to use Holoviews/Panel to create an interactive view of the Irish PPR dataset.

links:

* http://holoviews.org/
* https://www.propertypriceregister.ie/


In [1]:
import os, sys, io, random
import string
import numpy as np
import pandas as pd
from importlib import reload

from bokeh.plotting import figure, output_file, show, output_notebook
from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid, CustomJS, Slider, HoverTool, NumeralTickFormatter, Arrow, NormalHead
from bokeh.models import LinearAxis, Range1d, DataRange1d, BoxZoomTool
from bokeh.models.glyphs import Text, Rect
from bokeh.layouts import gridplot, column, row
from bokeh.embed import components
import bokeh.models.widgets as bhw
from bokeh.models.callbacks import CustomJS
#output_notebook()

import param
import panel as pn
import panel.widgets as pnw
import holoviews as hv
from holoviews import opts
hv.extension('bokeh','matplotlib')

In [2]:
df = pd.read_csv('PPR-ALL.csv.gz')
df['year'] = df.date.str.slice(6,10)
df['datetime'] = pd.to_datetime(df.date)

In [3]:
counties = sorted(list(df.county.unique()))
counties.insert(0,'All')
years = list(df.year.unique().astype(str))

In [5]:
plot_types = ['price distribution','price change','price by county']

year_sel = pnw.MultiSelect(name='year',options=years,value=['2019','2018','2017'],width=250)
max_price = pnw.TextInput(name='max price',value='1000000',width=250)
address = pnw.TextInput(name='address contains',value='',width=250)
county_sel = pnw.MultiSelect(name='county',options=counties,value=['Dublin'],width=250)
debug = pn.pane.Markdown()

@pn.depends(year=year_sel.param.value, county=county_sel.param.value,
            price=max_price.param.value, address=address.param.value)
def plots(year=[2010],county=['Dublin'],price=1e6,address=None):
    """viewer for ppr dataset"""
    from holoviews.operation.timeseries import rolling
    from bokeh.models import NumeralTickFormatter
    x = df   
    x = x[x.year.isin(year)]
    x = x[x.price<float(price)]
    if not 'All' in county:
        x = x[x.county.isin(county)]
    if address is not None:
        addr=x.address.str.lower()        
        x = x[addr.str.contains(address)]
    x = x.sort_values(by='datetime')
    pricebycounty = x.groupby(['county','year']).agg({'price':[np.median,np.std]}).reset_index()#.sort_values('county')
    pricebycounty.columns = ['county','year','price','std']
    #print (pricebycounty)
   
    edges, data = np.histogram(x.price,bins=30)    
    hist = hv.Histogram((edges, data))    

    #tidy2 = x.melt(id_vars=['county'], value_vars=['price'])
    bars = hv.Bars(pricebycounty, ['county', 'year'], ['price']) 
    bars.relabel('Tick formatters').opts(yformatter='%f.0')
    #errors = [list(x.datetime),list(x.std)]
    #errbars = hv.ErrorBars(errors)
    
    cols = ['datetime','price','address']
    curve  = hv.Curve(x, 'datetime', 'price', label='price')    
    w=int(len(x)/20)
    r = x.set_index('datetime').price.rolling(w, win_type ='triang').mean()
    #print (r)
    avg_curve = hv.Curve(r)    
    x['std_price'] = x.price.std()
    #spread = hv.Spread(x,vdims=['avg_price', 'std_price'])
    
    table = hv.Table(x[cols][:50000])
    
    layout =  hv.Layout(hist + bars + avg_curve + table).cols(1)
    layout.opts(
        opts.Bars(width=900, title='median price', xrotation=90, color=hv.Cycle('Category20')),
        opts.Histogram(height=200, width=900, title='price distribution'),
        opts.Curve(height=200, width=900, title='price over time', color='red'),
        opts.Spread(height=200, width=900, title='price over time', color='blue'),
        opts.Table(width=900)
    )    
    debug.object = '## %s entries' %len(x)
    return layout

widgets = pn.WidgetBox('## PPR dataset viewer',year_sel,county_sel,max_price,address, debug)
app = pn.Row(pn.Column(widgets,width=300),plots)
app

In [5]:
#renderer = hv.renderer('bokeh')
#renderer = renderer.instance(mode='server')