## Residential Property Price Register Ireland dataset

https://www.propertypriceregister.ie/


In [158]:
import os, sys, io, random
import string
import numpy as np
import pandas as pd
from importlib import reload

from bokeh.plotting import figure, output_file, show, output_notebook
from bokeh.models import ColumnDataSource, Plot, LinearAxis, Grid, CustomJS, Slider, HoverTool, NumeralTickFormatter, Arrow, NormalHead
from bokeh.models import LinearAxis, Range1d, DataRange1d, BoxZoomTool
from bokeh.models.glyphs import Text, Rect
from bokeh.layouts import gridplot, column, row
from bokeh.embed import components
import bokeh.models.widgets as bhw
from bokeh.models.callbacks import CustomJS
#output_notebook()

import param
import panel as pn
import panel.widgets as pnw
import holoviews as hv
from holoviews import opts
import datashader as ds
hv.extension('bokeh','matplotlib')

In [206]:
df = pd.read_csv('PPR-ALL.csv.gz')
df['year'] = df.date.str.slice(6,10)
df['datetime'] = pd.to_datetime(df.date)

In [207]:
counties = list(df.county.unique())
counties.insert(0,'All')
years = list(df.year.unique().astype(str))

In [238]:
plot_types = ['distribution','']

year_sel = pnw.MultiSelect(name='year',options=years,value=['2019'])
max_price = pnw.TextInput(name='max price',value='1000000')
address = pnw.TextInput(name='address',value='')
county_sel = pnw.MultiSelect(name='county',options=counties,value=['All'])
debug = pn.pane.Markdown()

@pn.depends(year=year_sel.param.value, county=county_sel.param.value,
            price=max_price.param.value, address=address.param.value)
def plots(year=2010,county=None,price=1e6,address=None):
    
    from holoviews.operation.timeseries import rolling
    x = df   
    x = x[x.year.isin(year)]
    x = x[x.price<float(price)]
    if not 'All' in county:
        x = x[x.county.isin(county)]
    if address is not None:
        addr=x.address.str.lower()        
        x = x[addr.str.contains(address)]
    x = x.sort_values(by='datetime')
    pricebycounty = x.groupby('county').agg({'price':np.median}).reset_index().sort_values('price')
    #rollingprice = x.rolling('price')
    
    edges, data = np.histogram(x.price,bins=30)    
    hist = hv.Histogram((edges, data))    

    tidy2 = x.melt(id_vars=['county'], value_vars=['price'])
    bars = hv.BoxWhisker(tidy2, ['county', 'variable'], ['value'])
    bars.relabel('Tick formatters').opts(xformatter='%d',xlabel='price')  
    cols = ['datetime','price','address']
    curve  = hv.Curve(x, 'datetime', 'price', label='price')    
    #avg_curve = rolling(curve, rolling_window=10).relabel('Average')
    w=int(len(x)/20)
    r = x.set_index('datetime').price.rolling(w, win_type ='triang').mean()
    #print (r)
    avg_curve = hv.Curve(r)
    avg_curve.relabel('Tick formatters').opts(yformatter='%d')  
    table = hv.Table(x[cols][:50000])
    
    layout =  hv.Layout(hist + bars + avg_curve + table).cols(1)
    layout.opts(
        opts.Bars(xformatter='%5.d'),
        opts.Histogram(height=200, width=800, title='price distribution'),
        opts.BoxWhisker(height=300, width=800, title='mean price'),
        opts.Curve(height=200, width=800, title='price over time', color='red'),
        opts.Table(width=800)
    )    
    debug.object = '## %s entries' %len(x)
    return layout

widgets = pn.WidgetBox('## PPR dataset',year_sel,county_sel,max_price,address, debug)
app = pn.Row(pn.Column(widgets),plots)
app

In [213]:
from bokeh.resources import INLINE
app.save('out.html', resources=INLINE, embed=True)