In [90]:
# Import libraries
import pandas as pd
import numpy as np
import math

import geopandas as gpd
import json

from bokeh.io import output_notebook, show, output_file, curdoc, hplot
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter
from bokeh.palettes import brewer

from bokeh.io.doc import curdoc
from bokeh.models import Slider, HoverTool, Select
from bokeh.layouts import widgetbox, WidgetBox, row, column
from bokeh.plotting import figure

from bokeh.models import CategoricalColorMapper, HoverTool, ColumnDataSource, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs, TableColumn, DataTable

from bokeh.palettes import Category20_16, inferno

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
from bokeh.models import Label

In [91]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '12pt'

    return p

In [92]:
# Import libraries
import pandas as pd
import numpy as np
import math

import geopandas as gpd
import json

from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter, Text
from bokeh.palettes import brewer, inferno

from bokeh.io.doc import curdoc
from bokeh.layouts import widgetbox, row, column
from bokeh.models import (CategoricalColorMapper, HoverTool, 
                          ColumnDataSource, Panel, 
                          FuncTickFormatter, SingleIntervalTicker, LinearAxis)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, 
                                  Tabs, CheckboxButtonGroup, 
                                  TableColumn, DataTable, Select)


In [93]:
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

In [94]:
def make_tab():
    def make_dataset(neighbourhood_list, range_start = 0, range_end = 10000000, bin_width = 50):
        by_neighbourhood = pd.DataFrame(columns = ['proportion', 'left', 'right'
                                                  'f_proportion', 'f_interval',
                                                  'name', 'color'])
        range_extent = range_end - range_start
        
        # Iterate through all neighbourhoods
        for i, neighbourhood_name in enumerate(neighbourhood_list):
            # subset relevant to the neighbourhood
            subset = df[df.ASSESSMENT_NBHD == neighbourhood_name]
            
            # create histogram with specified bins and range
            price_hist, edges = np.histogram(subset['PRICE'],
                                          bins = bin_width,
                                          range = [range_start, range_end])
            
            # divide the counts by the total to get a proportion and create df
            price_df = pd.DataFrame({'proportion': price_hist / np.sum(price_hist),
                                    'left': edges[:-1], 'right': edges[1:]})
            
            # format the proportion for display
            price_df['f_proportion'] = ['%.2f' % proportion for proportion in price_df['proportion']]
            
            # format the interval for display
            price_df['f_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(price_df['left'], price_df['right'])]
            
            # assign the neighbourhood for labels
            price_df['name'] = neighbourhood_name
            
            # color each neighbourhood differentely
            price_df['color'] = inferno(len(neighbourhood_list))[i]
            
            # add to overall dataframe
            by_neighbourhood = by_neighbourhood.append(price_df)
            
        # overall dataframe
        by_neighbourhood = by_neighbourhood.sort_values(['name', 'left'])
        
        # convert df to columndatasource
        return ColumnDataSource(by_neighbourhood)
    # The function should takes in a columndatasource and returns a bokeh plot object
    def make_plot(src):
            # Blank plot with correct labels
            p = figure(plot_width = 700, plot_height = 700, 
                      title = 'Histogram of Real Estate Prices by Neighbourhood',
                      x_axis_label = 'Price ($)', y_axis_label = 'Proportion')

            # Quad glyphs to create a histogram
            p.quad(source = src, bottom = 0, top = 'proportion', left = 'left', right = 'right',
                   color = 'color', fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
                   hover_fill_alpha = 1.0, line_color = 'black')

            # Hover tool with vline mode
            hover = HoverTool(tooltips=[('Neighbourhood', '@name'), 
                                        ('Price', '@f_interval'),
                                        ('Proportion', '@f_proportion')],
                              mode='vline')

            p.xaxis[0].formatter = NumeralTickFormatter(format="$0")
            p.add_tools(hover)

            # Styling
            p = style(p)

            return p 

        # Update function takes three default parameters
    def update(attr, old, new):
        # Get the list of selected neighbourhoods from the graph
        neighbourhoods_to_plot = [neighbourhood_selection.labels[i] for i in 
                            neighbourhood_selection.active]
        # Make a new dataset based on the selected neighbourhoods and the 
        # make_dataset function defined earlier
        new_src = make_dataset(neighbourhoods_to_plot)

        # Update the source used in the quad glpyhs
        src.data.update(new_src.data)
        
    def static_plot():
        float_vars = ['LATITUDE','LONGITUDE','PRICE','GBA','LIVING_GBA','LANDAREA','AYB','YR_RMDL','EYB', 'ASSESSMENT_NBHD']
        arr_hist, edges = np.histogram(df['PRICE'],
                                       bins = 'auto',
                                       range=[0,df.PRICE.quantile(0.75)])

        # # Put the information in a dataframe
        prices = pd.DataFrame({'property_count': arr_hist, 
                               'left': edges[:-1], 
                               'right': edges[1:]})
        # Add a column showing the extent of each interval
        prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
        # Convert dataframe to column data source
        src = ColumnDataSource(prices)
        # Create the blank plot
        p_temp = figure(plot_height = 600, plot_width = 700, 
                   title = 'Histogram of Real Estate prices in Washinton D.C.',
                   x_axis_label = 'Price ($)', 
                   y_axis_label = 'Number of properties')

        # Add a quad glyph with source this time
        p_temp.quad(bottom=0, top='property_count', left='left', right='right', source=src,
               fill_color='red', line_color='black', fill_alpha = 0.75,
               hover_fill_alpha = 1.0, hover_fill_color = 'navy')


        # Hover tool referring to our own data field using @ 
        h = HoverTool(tooltips = [('Number Of Properties', '@property_count'),
                                  ('Price Interval', '@p_interval')])

        p_temp.xaxis[0].formatter = NumeralTickFormatter(format="$0")

        # style the plot
        p_temp = style(p)

        # add the hovertool
        p_temp.add_tools(h)
        return p_temp
        


    available_neighbourhoods = list(df.ASSESSMENT_NBHD.dropna().unique())
    # Create the checkbox selection element, available neighbourhoods is a  
    # list of all neighbourhoods in the data
    neighbourhood_selection = CheckboxGroup(labels=available_neighbourhoods, active = [0, 1])
    # Link a change in selected buttons to the update function
    neighbourhood_selection.on_change('active', update)    
    
    initial_neighbourhoods = [neighbourhood_selection.labels[i] for i in neighbourhood_selection.active]
    
    src = make_dataset(initial_neighbourhoods)
    
    p = make_plot(src)
    
    # Put controls in a single element
    controls = WidgetBox(neighbourhood_selection)
    
    # Create a row layout
    layout = row(controls, p)
    
    # Make a tab with the layout 
    tab = Panel(child=layout, title = 'Neighbourhood Histogram')
    return tab
#     tabs = Tabs(tabs=[tab])
    
#     doc.add_root(tabs)

In [99]:
def map_tab(doc):

    df = pd.read_csv('../data/DC_Properties.csv')   
    df.loc[:,'SALEDATE'] = pd.to_datetime(df.loc[:,'SALEDATE'],format='%Y-%m-%d %H:%M:%S')
    df.loc[:,'SALEYEAR'] = [x.year for x in df.SALEDATE]

    # Fill house square foot zero values with the average house square footage by bedroom
    average_data = df.groupby('BEDRM').LANDAREA.mean()
    # Use average landarea by bedroom for each 0 value in each bedroom group, up to 14 bedrooms 
    for i in range(0, 14): 
        df.loc[(df['LANDAREA'] == 0) & (df['BEDRM'] == i), 'LANDAREA'] = average_data.loc[i]

    df['price_sf'] = df['PRICE'] / df['LANDAREA']
    df = df[df['SALEYEAR'].notna()]
    df = df[df['ASSESSMENT_NBHD'].notna()]
    df = df[df['PRICE'].notna()]
    df = df[df['LANDAREA'].notna()]
    df = df[df['price_sf'].notna()]
    df = df[df['SALEDATE'].notna()]
    neighborhood_data = df.groupby(
        ['SALEYEAR', 'ASSESSMENT_NBHD', 'CENSUS_TRACT']
    ).agg(
        {
        'PRICE': ['count', 'mean', 'median'],
        'LANDAREA': ['mean'],
        'price_sf': ['mean']
        }
    )

    #Reset the index to 1 level to fill in year
    neighborhood_data = neighborhood_data.set_axis(neighborhood_data.columns.map('_'.join), axis=1, inplace=False)
    neighborhood_data = neighborhood_data.reset_index(level=[0,1])

    # Change data types to integer for price_sf and year
    neighborhood_data = neighborhood_data.astype({'PRICE_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'PRICE_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'LANDAREA_mean': 'int'})
    # neighborhood_data = neighborhood_data.astype({'price_sf_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'SALEYEAR': 'int'})

    # Read in shapefile and examine data
    dc = gpd.read_file('../data/Census_Tracts_in_2010.shp')
    # Set the Coordinate Referance System (crs) for projections
    # ESPG code 4326 is also referred to as WGS84 lat-long projection
    dc.crs = {'init': 'epsg:4326'}

    # Rename columns in geojson map file
    dc = dc.rename(columns={'geometry': 'geometry'}).set_geometry('geometry')


    dc.sort_values(by=['TRACT'])

    neighborhood_data.index = neighborhood_data.index.astype(int)
    dc.TRACT = dc.TRACT.astype(int)

    # This dictionary contains the formatting for the data in the plots
    format_data = [('PRICE_count', 0, 100,'0,0', 'Number of properties remodeled'),
                ('PRICE_mean', 0, 1_500_000,'$0,0', 'Average Sales Price'),
                ('PRICE_median', 0, 1_500_000, '$0,0', 'Median Sales Price'),
                ('LANDAREA_mean', 500, 5000,'0,0', 'Average Square Footage'),
                ('price_sf_mean', 0, 2000,'$0,0', 'Average Price Per Square Foot')]
    
    #Create a DataFrame object from the dictionary 
    format_df = pd.DataFrame(format_data, columns = ['field' , 'min_range', 'max_range' , 'format', 'verbage'])

    def json_data(selectedYear):
        yr = selectedYear

        # Pull selected year from neighborhood summary data
        df_yr = neighborhood_data[neighborhood_data['SALEYEAR'] == yr]

        # Merge the GeoDataframe object (dc) with the neighborhood summary data (neighborhood)
        merged = pd.merge(dc, df_yr, left_on='TRACT', right_index=True, how='left')

        # Fill the null values
        values = {'year': yr, 'PRICE_count': 0, 'PRICE_mean': 0, 'PRICE_median': 0,
                    'sf_mean': 0, 'price_sf_mean': 0, 'ASSESSMENT_NBHD': "", 'LANDAREA_mean': 0}
        merged = merged.fillna(value=values)

        # Bokeh uses geojson formatting, representing geographical features, with json
        # Convert to json
        merged_json = json.loads(merged.to_json())

        # Convert to json preferred string-like object 
        json_data = json.dumps(merged_json)
        return json_data
    
    def style(p):
        # Title 
        p.title.align = 'center'
        p.title.text_font_size = '20pt'
        p.title.text_font = 'serif'

        # Axis titles
        p.xaxis.axis_label_text_font_size = '14pt'
        p.xaxis.axis_label_text_font_style = 'bold'
        p.yaxis.axis_label_text_font_size = '14pt'
        p.yaxis.axis_label_text_font_style = 'bold'

        # Tick labels
        p.xaxis.major_label_text_font_size = '12pt'
        p.yaxis.major_label_text_font_size = '12pt'

        return p
    
    # Create a plotting function
    def make_plot(field_name):    
        # Set the format of the colorbar
        min_range = format_df.loc[format_df['field'] == field_name, 'min_range'].iloc[0]
        max_range = format_df.loc[format_df['field'] == field_name, 'max_range'].iloc[0]
        field_format = format_df.loc[format_df['field'] == field_name, 'format'].iloc[0]

        # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
        color_mapper = LinearColorMapper(palette = palette, low = min_range, high = max_range)

        # Create color bar.
        format_tick = NumeralTickFormatter(format=field_format)
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
        border_line_color=None, location = (0, 0))

        # Create figure object.
        verbage = format_df.loc[format_df['field'] == field_name, 'verbage'].iloc[0]

        p = figure(title = verbage + ' by Neighborhood for Homes in DC by Year remodeled', 
                    plot_height = 700, plot_width = 1000,
                    toolbar_location = None)
        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        p.axis.visible = False

        # Add patch renderer to figure. 
        p.patches('xs','ys', source = geosource, fill_color = {'field' : field_name, 'transform' : color_mapper},
                line_color = 'black', line_width = 0.25, fill_alpha = 1)

        # Specify color bar layout.
        p.add_layout(color_bar, 'right')

        # Add the hover tool to the graph
        p.add_tools(hover)
        return p
    
    # Define the callback function: update_plot
    def update_plot(attr, old, new):
        # The input yr is the year selected from the slider
        yr = slider.value
        new_data = json_data(yr)

        # The input cr is the criteria selected from the select box
        cr = select.value
        input_field = format_df.loc[format_df['verbage'] == cr, 'field'].iloc[0]

        # Update the plot based on the changed inputs
        p = make_plot(input_field)

        # Update the layout, clear the old document and display the new document
        #layout = column(p, widgetbox(select), widgetbox(slider))
        #curdoc().clear()
        #curdoc().add_root(layout)

        # Update the data
        geosource.geojson = new_data
        
    def static_plot():
        float_vars = ['LATITUDE','LONGITUDE','PRICE','GBA','LIVING_GBA','LANDAREA','AYB','YR_RMDL','EYB', 'ASSESSMENT_NBHD']
        arr_hist, edges = np.histogram(df['PRICE'],
                                       bins = 'auto',
                                       range=[0,df.PRICE.quantile(0.75)])

        # # Put the information in a dataframe
        prices = pd.DataFrame({'property_count': arr_hist, 
                               'left': edges[:-1], 
                               'right': edges[1:]})
        # Add a column showing the extent of each interval
        prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
        # Convert dataframe to column data source
        src = ColumnDataSource(prices)
        # Create the blank plot
        p_temp = figure(plot_height = 600, plot_width = 700, 
                   title = 'Histogram of Real Estate prices in Washinton D.C.',
                   x_axis_label = 'Price ($)', 
                   y_axis_label = 'Number of properties')

        # Add a quad glyph with source this time
        p_temp.quad(bottom=0, top='property_count', left='left', right='right', source=src,
               fill_color='red', line_color='black', fill_alpha = 0.75,
               hover_fill_alpha = 1.0, hover_fill_color = 'navy')


        # Hover tool referring to our own data field using @ 
        h = HoverTool(tooltips = [('Number Of Properties', '@property_count'),
                                  ('Price Interval', '@p_interval')])

        p_temp.xaxis[0].formatter = NumeralTickFormatter(format="$0")

        # style the plot
        p_temp = style(p)

        # add the hovertool
        p_temp.add_tools(h)
        print(p_temp)
        return p_temp
        
        
    # Input geojson source that contains features for plotting 
    geosource = GeoJSONDataSource(geojson = json_data(2002))
    input_field = 'PRICE_median'

    # Define a sequential multi-hue color palette.
    palette = inferno(100)

    # Reverse color order so that dark blue is highest obesity.
    #palette = palette[::-1]

        # Add hover tool
    hover = HoverTool(tooltips = [ ('Neighbourhood','@ASSESSMENT_NBHD'),
                                    ('Number of properties remodeled', '@PRICE_count'),
                                    ('Average Price', '$@PRICE_mean{,}'),
                                    ('Median Price', '$@PRICE_median{,}'),
                                    ('Average landarea', '@LANDAREA_mean{,}'),
                                    ('Price/SF ', '$@price_sf_mean{,}')])

    # Call the plotting function
    p = make_plot(input_field)
    p = style(p)
    # Make a slider object: slider 
    slider = Slider(title = 'Year',start = 1990, end = 2018, step = 1, value = 2000)
    #slider.callback_policy = "mouseup"
    slider.on_change('value_throttled', update_plot)

    # Make a selection object: select
    select = Select(title='Select Criteria:', value='Median Sales Price', options=['Median Sales Price', 'Average Sales Price',
                                                                                    'Average Price Per Square Foot',
                                                                                    'Average Square Footage', 'Number of Sales'])
    select.on_change('value', update_plot)

    p_static = static_plot()
    
        
    # set up layout
    widgets = column(widgetbox(select), widgetbox(slider))
    main_row = row(p, p_static, widgets)
    
    # Make a column layout of widgetbox(slider) and plot, and add it to the current document
    # Display the current document
    layout = column(main_row)

    # Make a tab with the layout 
    tab = Panel(child=layout, title = 'Map')
    
#     tab1 = make_tab()
    tabs = Tabs(tabs=[tab])
    doc.add_root(tabs)

    
# Set up an application
# handler = FunctionHandler(map_tab)
# app = Application(handler)
output_notebook()
show(map_tab)

Figure(id='2392', ...)


In [None]:
    def static_plot():
        float_vars = ['LATITUDE','LONGITUDE','PRICE','GBA','LIVING_GBA','LANDAREA','AYB','YR_RMDL','EYB', 'ASSESSMENT_NBHD']
        arr_hist, edges = np.histogram(df['PRICE'],
                                       bins = 'auto',
                                       range=[0,df.PRICE.quantile(0.75)])

        # # Put the information in a dataframe
        prices = pd.DataFrame({'property_count': arr_hist, 
                               'left': edges[:-1], 
                               'right': edges[1:]})
        # Add a column showing the extent of each interval
        prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
        # Convert dataframe to column data source
        src = ColumnDataSource(prices)
        # Create the blank plot
        p_temp = figure(plot_height = 600, plot_width = 700, 
                   title = 'Histogram of Real Estate prices in Washinton D.C.',
                   x_axis_label = 'Price ($)', 
                   y_axis_label = 'Number of properties')

        # Add a quad glyph with source this time
        p_temp.quad(bottom=0, top='property_count', left='left', right='right', source=src,
               fill_color='red', line_color='black', fill_alpha = 0.75,
               hover_fill_alpha = 1.0, hover_fill_color = 'navy')


        # Hover tool referring to our own data field using @ 
        h = HoverTool(tooltips = [('Number Of Properties', '@property_count'),
                                  ('Price Interval', '@p_interval')])

        p_temp.xaxis[0].formatter = NumeralTickFormatter(format="$0")

        # style the plot
        p_temp = style(p)

        # add the hovertool
        p_temp.add_tools(h)
        print(p_temp)
        return p_temp