In [8]:
# Import libraries
import pandas as pd
import numpy as np
import math

import geopandas as gpd
import json

from bokeh.io import output_notebook, show, output_file, curdoc
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter
from bokeh.palettes import brewer, inferno

from bokeh.io.doc import curdoc
from bokeh.models import Slider, HoverTool, Select
from bokeh.layouts import widgetbox, WidgetBox, row, column
from bokeh.plotting import figure


from bokeh.palettes import Category20_16, inferno

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
from bokeh.models import Label
from bokeh.events import DoubleTap
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

from bokeh.models import (CategoricalColorMapper, HoverTool, 
                          ColumnDataSource, Panel, 
                          FuncTickFormatter, SingleIntervalTicker, LinearAxis)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, 
                                  Tabs, CheckboxButtonGroup, 
                                  TableColumn, DataTable, Select)

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application

In [9]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '12pt'

    return p

In [23]:
def map_tab(doc):

    df = pd.read_csv('../data/DC_Properties.csv')

    # Fill house square foot zero values with the average house square footage by bedroom
    average_data = df.groupby('BEDRM').LANDAREA.mean()
    # Use average landarea by bedroom for each 0 value in each bedroom group, up to 14 bedrooms 
    for i in range(0, 14): 
        df.loc[(df['LANDAREA'] == 0) & (df['BEDRM'] == i), 'LANDAREA'] = average_data.loc[i]

    df['price_sf'] = df['PRICE'] / df['LANDAREA']
    df = df[df['ASSESSMENT_NBHD'].notna()]
    df = df[df['PRICE'].notna()]
    df = df[df['LANDAREA'].notna()]
    df = df[df['price_sf'].notna()]

    neighborhood_data = df.groupby(
        ['ASSESSMENT_NBHD', 'CENSUS_TRACT']
    ).agg(
        {
        'PRICE': ['count', 'mean', 'median'],
        'LANDAREA': ['mean'],
        'price_sf': ['mean']
        }
    )

    #Reset the index to 1 level to fill in year
    neighborhood_data = neighborhood_data.set_axis(neighborhood_data.columns.map('_'.join), axis=1, inplace=False)
    neighborhood_data = neighborhood_data.reset_index(level=[0,1])

    # Change data types to integer for price_sf and year
    neighborhood_data = neighborhood_data.astype({'PRICE_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'PRICE_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'LANDAREA_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'price_sf_mean': 'int'})

    # Read in shapefile and examine data
    dc = gpd.read_file('../data/Census_Tracts_in_2010.shp')
    # Set the Coordinate Referance System (crs) for projections
    # ESPG code 4326 is also referred to as WGS84 lat-long projection
    dc.crs = {'init': 'epsg:4326'}

    # Rename columns in geojson map file
    dc = dc.rename(columns={'geometry': 'geometry'}).set_geometry('geometry')


    dc.sort_values(by=['TRACT'])

    neighborhood_data.index = neighborhood_data.index.astype(int)
    dc.TRACT = dc.TRACT.astype(int)

    # This dictionary contains the formatting for the data in the plots
    format_data = [('PRICE_count', 0, 100,'0,0', 'Number of properties'),
                ('PRICE_mean', 0, 1_500_000,'$0,0', 'Average Sales Price'),
                ('PRICE_median', 0, 1_500_000, '$0,0', 'Median Sales Price'),
                ('LANDAREA_mean', 500, 5000,'0,0', 'Average Square Footage'),
                ('price_sf_mean', 0, 2000,'$0,0', 'Average Price Per Square Foot')]
    
    #Create a DataFrame object from the dictionary 
    format_df = pd.DataFrame(format_data, columns = ['field' , 'min_range', 'max_range' , 'format', 'verbage'])

    def json_data():

        # Merge the GeoDataframe object (dc) with the neighborhood summary data (neighborhood)
        merged = pd.merge(dc, neighborhood_data, left_on='TRACT', right_on='CENSUS_TRACT', how='left')

        # Fill the null values
        values = {'PRICE_count': 0, 'PRICE_mean': 0, 'PRICE_median': 0,
                    'sf_mean': 0, 'price_sf_mean': 0, 'ASSESSMENT_NBHD': "", 'LANDAREA_mean': 0}
        merged = merged.fillna(value=values)
        # Bokeh uses geojson formatting, representing geographical features, with json
        # Convert to json
        merged_json = json.loads(merged.to_json())

        # Convert to json preferred string-like object 
        json_data = json.dumps(merged_json)
        return json_data
    
    def style(p):
        # Title 
        p.title.align = 'center'
        p.title.text_font_size = '20pt'
        p.title.text_font = 'serif'

        # Axis titles
        p.xaxis.axis_label_text_font_size = '14pt'
        p.xaxis.axis_label_text_font_style = 'bold'
        p.yaxis.axis_label_text_font_size = '14pt'
        p.yaxis.axis_label_text_font_style = 'bold'

        # Tick labels
        p.xaxis.major_label_text_font_size = '12pt'
        p.yaxis.major_label_text_font_size = '12pt'

        return p
    
    # Create a plotting function
    def make_plot(field_name):    
        # Set the format of the colorbar
        min_range = format_df.loc[format_df['field'] == field_name, 'min_range'].iloc[0]
        max_range = format_df.loc[format_df['field'] == field_name, 'max_range'].iloc[0]
        field_format = format_df.loc[format_df['field'] == field_name, 'format'].iloc[0]

        # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
        color_mapper = LinearColorMapper(palette = palette, low = min_range, high = max_range)

        # Create color bar.
        format_tick = NumeralTickFormatter(format=field_format)
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
        border_line_color=None, location = (0, 0))

        # Create figure object.
        verbage = format_df.loc[format_df['field'] == field_name, 'verbage'].iloc[0]

        p = figure(title = verbage + ' by Neighborhood for Homes in DC', 
                    plot_height = 700, plot_width = 600,
                    toolbar_location = None)
        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        p.axis.visible = False

        # Add patch renderer to figure. 
        p.patches('xs','ys', source = geosource, fill_color = {'field' : field_name, 'transform' : color_mapper},
                line_color = 'black', line_width = 0.25, fill_alpha = 1)

        # Specify color bar layout.
        p.add_layout(color_bar, 'right')

        # Add the hover tool to the graph
        p.add_tools(hover)
        return p
    
    # Define the callback function: update_plot for updating the colormap
    def update_plot(attr, old, new):
        new_data = json_data()

        # The input cr is the criteria selected from the select box
        cr = select.value
        input_field = format_df.loc[format_df['verbage'] == cr, 'field'].iloc[0]

        # Update the plot based on the changed inputs
        p = make_plot(input_field)
        
        p.on_event(DoubleTap, callback)
        
        # set up layout
        widgets = column(widgetbox(select))
        main_row = row(p, p_histogram)

        # Make a column layout of widgetbox(slider) and plot, and add it to the current document
        # Display the current document
        layout = column(main_row, widgets)

        
        # Make a tab with the layout 
        tab = Panel(child=layout, title = 'Map')

        tabs = Tabs(tabs=[tab])
        doc.clear()
        doc.add_root(tabs)
    
        # Update the data
        geosource.geojson = new_data
        
    def histogram_plot():
        # Create the blank plot
        p_temp = figure(plot_height = 600, plot_width = 400, 
                   title = 'Real Estate prices in Washinton D.C.',
                   x_axis_label = 'Price ($)', 
                   y_axis_label = 'Number of properties')

        # Add a quad glyph with source this time
        p_temp.quad(bottom=0, top='property_count', left='left', right='right', source=src,
               fill_color='red', line_color='black', fill_alpha = 0.75,
               hover_fill_alpha = 1.0, hover_fill_color = 'navy')


        # Hover tool referring to our own data field using @ 
        h1 = HoverTool(tooltips = [('Number Of Properties', '@property_count'),
                                  ('Price Interval', '@p_interval')])

        p_temp.xaxis[0].formatter = NumeralTickFormatter(format="$0")

        # style the plot
        p_temp = style(p_temp)

        # add the hovertool
        p_temp.add_tools(h1)
        return p_temp
    
    def get_polygon_by_click(x, y):
        for i, polygon in enumerate(dc.geometry):
            if polygon.contains(Point(x, y)):
                clicked_tract = dc.TRACT.iloc[i]
                update_histogram(clicked_tract)

    def histogram_data(tract):
        df_temp = df.loc[df.CENSUS_TRACT == tract]
        ### for histogram
        arr_hist, edges = np.histogram(df_temp['PRICE'],
                                       bins = np.arange(0,1_500_000,50_000),
                                       range=[0,df_temp.PRICE.quantile(0.75)])

        # # Put the information in a dataframe
        prices = pd.DataFrame({'property_count': arr_hist, 
                               'left': edges[:-1], 
                               'right': edges[1:]})
        # Add a column showing the extent of each interval
        prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
        # Convert dataframe to column data source
        ### end for histogram
        return ColumnDataSource(prices)
        
                    
    def update_histogram(tract):
        new_data = histogram_data(tract)
        src.data.update(new_data.data)
        
    # add a dot where the click happened on map
    def callback(event):
        Coords=(event.x,event.y)
        get_polygon_by_click(event.x, event.y)
        coordList.append(Coords) 
        
    ### initial histogram setup 
    arr_hist, edges = np.histogram(df['PRICE'],
                                   bins = np.arange(0,1_500_000,50_000),
                                   range=[0,df.PRICE.quantile(0.75)])

    # # Put the information in a dataframe
    prices = pd.DataFrame({'property_count': arr_hist, 
                           'left': edges[:-1], 
                           'right': edges[1:]})
    # Add a column showing the extent of each interval
    prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
    # Convert dataframe to column data source
    src = ColumnDataSource(prices)
    p_histogram = histogram_plot()
    p_histogram = style(p_histogram)
    
    ### end for histogram
    
    
    ### shapefile census tract setup
    # Input geojson source that contains features for plotting 
    geosource = GeoJSONDataSource(geojson = json_data())
    input_field = 'PRICE_median'

    # Define a sequential multi-hue color palette.
    palette = inferno(100)

    # Reverse color order so that dark blue is highest obesity.
    #palette = palette[::-1]

        # Add hover tool
    hover = HoverTool(tooltips = [ ('Neighbourhood','@ASSESSMENT_NBHD'),
                                    ('Number of properties', '@PRICE_count'),
                                    ('Average Price', '$@PRICE_mean{,}'),
                                    ('Median Price', '$@PRICE_median{,}'),
                                    ('Average landarea', '@LANDAREA_mean{,}'),
                                    ('Price/SF ', '$@price_sf_mean{,}')])

    # Call the plotting function
    p = make_plot(input_field)
    p = style(p)
    coordList=[]
    p.on_event(DoubleTap, callback)
        # Make a selection object: select
    select = Select(title='Select Criteria:', value='Median Sales Price', options=['Median Sales Price', 'Average Sales Price',
                                                                                    'Average Price Per Square Foot',
                                                                                    'Average Square Footage', 'Number of Sales'])
    select.on_change('value', update_plot)
    ### shapefile census tract setup
    
        
    # set up layout
    widgets = column(widgetbox(select))
    main_row = row(p, p_histogram)
    
    # Make a column layout of widgetbox(slider) and plot, and add it to the current document
    # Display the current document
    layout = column(main_row, widgets)

    # Make a tab with the layout 
    tab = Panel(child=layout, title = 'Map')
    
#     tab1 = make_tab()
    tabs = Tabs(tabs=[tab])
    doc.add_root(tabs)

    
# Set up an application
# handler = FunctionHandler(map_tab)
# app = Application(handler)
output_notebook()
show(map_tab)

ERROR:bokeh.server.protocol_handler:error handling message Message 'PATCH-DOC' (revision 1) content: {'events': [{'kind': 'ModelChanged', 'model': {'type': 'Select', 'id': '3590'}, 'attr': 'value', 'new': 'Number of Sales'}], 'references': []}: IndexError('single positional indexer is out-of-bounds')


In [18]:
def aggregate_census(dataframe, constraint_variable, constraint, aggregate):
    
    df_sub = dataframe.loc[:,[constraint_variable,'PRICE','CENSUS_TRACT']].copy()
    df_sub = df_sub.loc[constraint(df_sub.loc[:,constraint_variable])]    
    agg_df = df_sub.groupby('CENSUS_TRACT',as_index=False).agg(aggregate)
    return pd.merge(agg_df,df_geo,on='CENSUS_TRACT')

In [19]:
import shapefile as shp
def shp_to_df(sf):
    # Extract labels and data
    fields = [x[0] for x in sf.fields[1:]] # Column values
    records = sf.records() # data for dataframe
    shape_data = [x.points for x in sf.shapes()]

    # Create dataframe with values
    df_geo = pd.DataFrame(columns=fields, data=records)
    df_geo['coords'] = shape_data
    
    return df_geo
# First plot: All districts with number inside

Shp_path = '../Data/Census_Tracts_in_2010.shp'
sf = shp.Reader(Shp_path)
df_geo = shp_to_df(sf)
df_geo = df_geo.loc[:,['TRACT','coords']]
df_geo.TRACT = df_geo.TRACT.astype(float)
df_geo.columns = ['CENSUS_TRACT','COORDS']