# Bang for your buck - Appendix, visualization code

This is an appendix to explanatory notebook for the final project in the course Social Data Analysis and Visualization at DTU, in the Spring semester 2020. It contains the code that was used to construct the interactive visualization seen on [Bang for your buck's website.](https://bang-for-your-buck-dtu.herokuapp.com/)

## Table of Contents
1. [Section 2](#section2)
2. [Section 3](#section3)
3. [Section 4](#section4)
4. [Section 5](#section5)
5. [Section 6](#section6)

In [1]:
# Pandas for data management
import pandas as pd
import geopandas as gpd

# os methods for manipulating paths
from os.path import dirname, join

# Bokeh basics 
from bokeh.io import curdoc
from bokeh.models.widgets import Tabs, Div
from bokeh.layouts import column, layout
# Import libraries
import pandas as pd
import numpy as np
import math
import json

from bokeh.io import output_notebook, show, output_file, curdoc
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter
from bokeh.palettes import brewer, inferno
from bokeh.models.annotations import Title
from bokeh.models import Slider, HoverTool, Select
from bokeh.layouts import widgetbox, WidgetBox, row, column
from bokeh.plotting import figure
from bokeh.palettes import inferno
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
from bokeh.models import Label
from bokeh.events import Tap
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

from bokeh.models import (CategoricalColorMapper, HoverTool, 
                          ColumnDataSource, Panel, 
                          FuncTickFormatter, SingleIntervalTicker, LinearAxis)
from bokeh.models.widgets import (CheckboxGroup, Slider, RangeSlider, CheckboxButtonGroup, 
                                  TableColumn, DataTable, Select)

In [2]:
output_notebook()

In [3]:
# some visual pre-processing
properties = pd.read_csv('../data/Properties_clean.csv')
properties = properties[properties.LATITUDE > 30]
properties.loc[:,'PPSF'] = properties.PRICE / properties.AREA
# Fill house square foot zero values with the average house square footage by bedroom
average_data = properties.groupby('BEDRM').AREA.median()
# Use average AREA by bedroom for each 0 value in each bedroom group, up to 14 bedrooms 
for i in range(0, 14): 
    properties.loc[(properties['AREA'] == 0) & (properties['BEDRM'] == i), 'AREA'] = average_data.loc[i]

properties['price_sf'] = properties['PRICE'] / properties['AREA']
properties = properties[properties['ASSESSMENT_NBHD'].notna()]
properties = properties[properties['PRICE'].notna()]
properties = properties[properties['AREA'].notna()]
properties = properties[properties['price_sf'].notna()]
properties = properties[properties['CENSUS_TRACT'].notna()]
properties = properties[properties['TSEYB'].notna()]
properties.TSEYB = properties.TSEYB//10
properties = properties[properties['BEDRM'].notna()]
properties = properties[properties['USECODE'].notna()]

dc_org = gpd.read_file('../data/Census_Tracts_in_2010.shp')

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '12pt'

    return p

### 1. Section 2 <a name="section2"></a>

In [5]:
def slide_2(doc):
    df = properties.copy()

    neighborhood_data = df.groupby(
        ['CENSUS_TRACT']
    ).agg(
        {
        'PRICE': ['count', 'median'],
        'AREA': ['mean'],
        'ASSESSMENT_NBHD': (lambda x: x.value_counts().index[0] if len(np.unique(x)) > 0 else "")
        }
    )

    #Reset the index to 1 level to fill in year
    neighborhood_data = neighborhood_data.set_axis(neighborhood_data.columns.map('_'.join), axis=1, inplace=False)
    neighborhood_data = neighborhood_data.rename({'ASSESSMENT_NBHD_<lambda>':'ASSESSMENT_NBHD'},axis='columns')
    neighborhood_data = neighborhood_data.reset_index(level=[0])

    # Change data types to integer for price_sf and year
    neighborhood_data = neighborhood_data.astype({'PRICE_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'AREA_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'CENSUS_TRACT': 'int'})

    # Read in shapefile and examine data
    dc = dc_org.copy()

    # Rename columns in geojson map file
    dc = dc.rename(columns={'geometry': 'geometry'}).set_geometry('geometry')

    dc.sort_values(by=['TRACT'])

    neighborhood_data.index = neighborhood_data.index.astype(int)
    dc.TRACT = dc.TRACT.astype(int)

    def json_data():

        # Merge the GeoDataframe object (dc) with the neighborhood summary data (neighborhood)
        merged = pd.merge(dc, neighborhood_data, left_on='TRACT', right_on='CENSUS_TRACT', how='left')
        merged = merged.dropna()
        # Fill the null values
        values = {'PRICE_count': 0, 'PRICE_median': 0, 'ASSESSMENT_NBHD': "", 'AREA_mean': 0}
        merged = merged.fillna(value=values)
        # Bokeh uses geojson formatting, representing geographical features, with json
        # Convert to json
        merged_json = json.loads(merged.to_json())

        # Convert to json preferred string-like object 
        json_data = json.dumps(merged_json)
        return json_data

    
    def make_data(field_name):

        # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
        color_mapper = LinearColorMapper(palette = palette, low = 0, high = 1_500_000)

        # Create color bar.
        format_tick = NumeralTickFormatter(format="$0")
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
        border_line_color=None, location = (0, 0))

        return field_name,color_bar
    
    def insert_data(p,field_name):
        t = Title()
        t.text =  'Median sale price for Homes in DC'
        p.title = t

        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        p.axis.visible = False
        color_map_instance = p.select_one(LinearColorMapper)
        # Add patch renderer to figure. 
        p.patches('xs','ys', source = geosource, 
                  fill_color = {'field' : field_name, 'transform' : color_map_instance},
                  line_color = 'black', line_width = 0.25, fill_alpha = 1)

        # Add the hover tool to the graph
        p.add_tools(hover)
        return p
    
    # Create a plotting function
    def make_plot(field_name):    
        field_name,color_bar = make_data(field_name)
        
        
        p = figure(plot_height = 700, plot_width = 600,
                    toolbar_location = None)
        
        p.add_layout(color_bar, 'right')
        p = insert_data(p,field_name)
        
        return p
        
    def histogram_plot():
        # Create the blank plot
        p_temp = figure(plot_height = 700, plot_width = 400, 
                   title = 'Real Estate prices',
                   x_axis_label = 'Price ($)', 
                   y_axis_label = 'Number of properties')

        # Add a quad glyph with source this time
        p_temp.quad(bottom=0, top='property_count', left='left', right='right', source=src,
               fill_color='red', line_color='black', fill_alpha = 0.75,
               hover_fill_alpha = 1.0, hover_fill_color = 'navy')


        # Hover tool referring to our own data field using @ 
        h1 = HoverTool(tooltips = [('Number of properties', '@property_count'),
                                  ('Price Interval', '@p_interval')])

        p_temp.xaxis[0].formatter = NumeralTickFormatter(format="$0")

        # style the plot
        p_temp = style(p_temp)

        # add the hovertool
        p_temp.add_tools(h1)
        return p_temp
    
    def get_polygon_by_click(x, y):
        for i, polygon in enumerate(dc.geometry):
            if polygon.contains(Point(x, y)):
                clicked_tract = dc.TRACT.iloc[i]
                update_histogram(clicked_tract)

    def histogram_data(tract):
        df_temp = df.loc[df.CENSUS_TRACT == tract]
        ### for histogram
        arr_hist, edges = np.histogram(df_temp['PRICE'],
                                       bins = np.arange(0,np.max([1_000_000,df_temp.PRICE.quantile(0.95)]),50_000),
                                       range=[0,np.max([1_000_000,df_temp.PRICE.quantile(0.95)])])

        # # Put the information in a dataframe
        prices = pd.DataFrame({'property_count': arr_hist, 
                               'left': edges[:-1], 
                               'right': edges[1:]})
        # Add a column showing the extent of each interval
        prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
        # Convert dataframe to column data source
        ### end for histogram
        return ColumnDataSource(prices)
        
                    
    def update_histogram(tract):
        new_data = histogram_data(tract)
        src.data.update(new_data.data)
        
    # add a dot where the click happened on map
    def callback(event):
        Coords=(event.x,event.y)
        get_polygon_by_click(event.x, event.y)
        coordList.append(Coords) 
        
    ### initial histogram setup 
    arr_hist, edges = np.histogram(df['PRICE'],
                                   bins = np.arange(0,np.max([1_000_000,df.PRICE.quantile(0.95)]),50_000),
                                   range=[0,np.max([1_000_000,df.PRICE.quantile(0.95)])])

    # # Put the information in a dataframe
    prices = pd.DataFrame({'property_count': arr_hist, 
                           'left': edges[:-1], 
                           'right': edges[1:]})
    # Add a column showing the extent of each interval
    prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
    # Convert dataframe to column data source
    src = ColumnDataSource(prices)
    p_histogram = histogram_plot()
    p_histogram = style(p_histogram)
    p_histogram.xaxis.major_label_orientation = math.pi/4
    
    ### end for histogram
    
    
    ### shapefile census tract setup
    # Input geojson source that contains features for plotting 
    geosource = GeoJSONDataSource(geojson = json_data())
    input_field = 'PRICE_median'

    # Define a sequential multi-hue color palette.
    palette = inferno(100)

    # Reverse color order so that dark blue is highest obesity.

    # Add hover tool
    hover = HoverTool(tooltips = [ ('Neighbourhood','@ASSESSMENT_NBHD'),
                                    ('Number of properties', '@PRICE_count'),
                                    ('Median Price', '$@PRICE_median{,}'),
                                    ('Average area', '@AREA_mean{,}')])

    # Call the plotting function
    p = make_plot(input_field)
    p = style(p)
    coordList=[]
    p.on_event(Tap, callback)
        
    # set up layout
    #widgets = column(widgetbox(select))
    main_row = row(p, p_histogram)
    
    # Make a column layout of widgetbox(slider) and plot, and add it to the current document
    # Display the current document
    layout = column(main_row) #, widgets)

    doc.add_root(layout)

show(slide_2)

<img src="https://raw.githubusercontent.com/goh12/socialdataanalysis2020-screenshots/master/slide2.jpg"></img>

### 2. Section 3 <a name="section3"></a>

In [6]:
# Import libraries
def slide_3(doc):
    df = properties.copy()

    neighborhood_data = df.groupby(
        ['CENSUS_TRACT']
    ).agg(
        {
        'PRICE': ['count', 'median'],
        'AREA': ['mean'],
        'price_sf': ['median'],
        'ASSESSMENT_NBHD': (lambda x: x.value_counts().index[0] if len(np.unique(x)) > 0 else "")
        }
    )

    #Reset the index to 1 level to fill in year
    neighborhood_data = neighborhood_data.set_axis(neighborhood_data.columns.map('_'.join), axis=1, inplace=False)
    neighborhood_data = neighborhood_data.rename({'ASSESSMENT_NBHD_<lambda>':'ASSESSMENT_NBHD'},axis='columns')
    neighborhood_data = neighborhood_data.reset_index(level=[0])

    # Change data types to integer for price_sf and year
    neighborhood_data = neighborhood_data.astype({'PRICE_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'AREA_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'price_sf_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'CENSUS_TRACT': 'int'})

    # Read in shapefile and examine data
    dc = dc_org.copy()
    # Set the Coordinate Referance System (crs) for projections
    # ESPG code 4326 is also referred to as WGS84 lat-long projection
    dc.crs = {'init': 'epsg:4326'}

    # Rename columns in geojson map file
    dc = dc.rename(columns={'geometry': 'geometry'}).set_geometry('geometry')


    dc.sort_values(by=['TRACT'])

    neighborhood_data.index = neighborhood_data.index.astype(int)
    dc.TRACT = dc.TRACT.astype(int)

    # This dictionary contains the formatting for the data in the plots
    format_data = [('price_sf_median', 0, 350,'$0,0', 'Median Price Per Square Foot')]
    
    #Create a DataFrame object from the dictionary 
    format_df = pd.DataFrame(format_data, columns = ['field' , 'min_range', 'max_range' , 'format', 'verbage'])

    def json_data():

        # Merge the GeoDataframe object (dc) with the neighborhood summary data (neighborhood)
        merged = pd.merge(dc, neighborhood_data, left_on='TRACT', right_on='CENSUS_TRACT', how='left')
        merged = merged.dropna()
        # Fill the null values
        values = {'PRICE_count': 0, 'PRICE_median': 0, 'price_sf_median': 0, 'ASSESSMENT_NBHD': "", 'AREA_mean': 0}
        merged = merged.fillna(value=values)
        # Bokeh uses geojson formatting, representing geographical features, with json
        # Convert to json
        merged_json = json.loads(merged.to_json())

        # Convert to json preferred string-like object 
        json_data = json.dumps(merged_json)
        return json_data

    def make_data(field_name):
        # Set the format of the colorbar
        min_range = format_df.loc[format_df['field'] == field_name, 'min_range'].iloc[0]
        max_range = format_df.loc[format_df['field'] == field_name, 'max_range'].iloc[0]
        field_format = format_df.loc[format_df['field'] == field_name, 'format'].iloc[0]

        # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
        color_mapper = LinearColorMapper(palette = palette, low = min_range, high = max_range)

        # Create color bar.
        format_tick = NumeralTickFormatter(format=field_format)
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
        border_line_color=None, location = (0, 0))

        # Create figure object.
        verbage = format_df.loc[format_df['field'] == field_name, 'verbage'].iloc[0]
        return field_name,verbage,color_bar
    
    def insert_data(p,field_name,verbage):
        t = Title()
        t.text =  verbage
        p.title = t

        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        p.axis.visible = False
        color_map_instance = p.select_one(LinearColorMapper)
        # Add patch renderer to figure. 
        p.patches('xs','ys', source = geosource, 
                  fill_color = {'field' : field_name, 'transform' : color_map_instance},
                  line_color = 'black', line_width = 0.25, fill_alpha = 1)

        # Add the hover tool to the graph
        p.add_tools(hover)
        return p
    
    # Create a plotting function
    def make_plot(field_name):    
        field_name,verbage,color_bar = make_data(field_name)
        
        
        p = figure(title = verbage + ' by Neighborhood for Homes in DC', 
                    plot_height = 700, plot_width = 600,
                    toolbar_location = None)
        
        p.add_layout(color_bar, 'right')
        p = insert_data(p,field_name,verbage)
        
        return p
        
    def histogram_plot():
        # Create the blank plot
        p_temp = figure(plot_height = 700, plot_width = 400, 
                   title = 'Price per square foot',
                   x_axis_label = 'Cost per sq.ft. of GBA ($)', 
                   y_axis_label = 'Number of properties')

        # Add a quad glyph with source this time
        p_temp.quad(bottom=0, top='property_count', left='left', right='right', source=src,
               fill_color='red', line_color='black', fill_alpha = 0.75,
               hover_fill_alpha = 1.0, hover_fill_color = 'navy')


        # Hover tool referring to our own data field using @ 
        h1 = HoverTool(tooltips = [('Number of properties', '@property_count'),
                                  ('Price Interval', '@p_interval')])

        p_temp.xaxis[0].formatter = NumeralTickFormatter(format="$0")

        # style the plot
        p_temp = style(p_temp)

        # add the hovertool
        p_temp.add_tools(h1)
        return p_temp
    
    def get_polygon_by_click(x, y):
        for i, polygon in enumerate(dc.geometry):
            if polygon.contains(Point(x, y)):
                clicked_tract = dc.TRACT.iloc[i]
                update_histogram(clicked_tract)

    def histogram_data(tract):
        df_temp = df.loc[df.CENSUS_TRACT == tract]
        ### for histogram
        arr_hist, edges = np.histogram(df_temp['price_sf'],
                                       bins = np.arange(0,np.max([300,df_temp.price_sf.quantile(0.98)]),50),
                                       range = [0,np.max([300,df_temp.price_sf.quantile(0.98)])])

        # # Put the information in a dataframe
        prices = pd.DataFrame({'property_count': arr_hist, 
                               'left': edges[:-1], 
                               'right': edges[1:]})
        # Add a column showing the extent of each interval
        prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
        # Convert dataframe to column data source
        ### end for histogram
        return ColumnDataSource(prices)
        
                    
    def update_histogram(tract):
        new_data = histogram_data(tract)
        src.data.update(new_data.data)
        
    # add a dot where the click happened on map
    def callback(event):
        Coords=(event.x,event.y)
        get_polygon_by_click(event.x, event.y)
        coordList.append(Coords) 
        
    ### initial histogram setup 
    arr_hist, edges = np.histogram(df['price_sf'],
                                bins = np.arange(0,np.max([300,df.price_sf.quantile(0.98)]),50),
                                range=[0,np.max([300,df.price_sf.quantile(0.98)])])

    # # Put the information in a dataframe
    prices = pd.DataFrame({'property_count': arr_hist, 
                           'left': edges[:-1], 
                           'right': edges[1:]})
    # Add a column showing the extent of each interval
    prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
    # Convert dataframe to column data source
    src = ColumnDataSource(prices)
    p_histogram = histogram_plot()
    p_histogram = style(p_histogram)
    p_histogram.xaxis.major_label_orientation = math.pi/4
    
    ### end for histogram
    
    
    ### shapefile census tract setup
    # Input geojson source that contains features for plotting 
    geosource = GeoJSONDataSource(geojson = json_data())
    input_field = 'price_sf_median'

    # Define a sequential multi-hue color palette.
    palette = inferno(100)

    # Reverse color order so that dark blue is highest obesity.
    #palette = palette[::-1]

   # Add hover tool
    hover = HoverTool(tooltips = [ ('Neighbourhood','@ASSESSMENT_NBHD'),
                                    ('Number of properties', '@PRICE_count'),
                                    ('Price/SF ', '$@price_sf_median{,}')])

    # Call the plotting function
    p = make_plot(input_field)
    p = style(p)
    coordList=[]
    p.on_event(Tap, callback)

    ### shapefile census tract setup
    main_row = row(p, p_histogram)
    
    # Make a column layout of widgetbox(slider) and plot, and add it to the current document
    # Display the current document
    layout = column(main_row)
    doc.add_root(layout)

show(slide_3)

<img src="https://raw.githubusercontent.com/goh12/socialdataanalysis2020-screenshots/master/slide3.jpg"></img>

### 3. Section 4 <a name="section4"></a>

In [11]:
def slide_4(doc):

    df = properties.copy()

    neighborhood_data = df.groupby(
        ['CENSUS_TRACT', 'BEDRM']
    ).agg(
        {
            'PRICE': ['count', 'median'],
            'AREA': ['mean'],
            'price_sf': ['median'],
            'ASSESSMENT_NBHD': (lambda x: x.value_counts().index[0] if len(np.unique(x)) > 0 else "")
        }
    )

    #Reset the index to 1 level to fill in year
    neighborhood_data = neighborhood_data.set_axis(neighborhood_data.columns.map('_'.join), axis=1, inplace=False)
    neighborhood_data = neighborhood_data.rename({'ASSESSMENT_NBHD_<lambda>':'ASSESSMENT_NBHD'},axis='columns')
    neighborhood_data = neighborhood_data.reset_index(level=[0,1])
    

    # Change data types to integer 
    neighborhood_data = neighborhood_data.astype({'BEDRM': 'int'})
    neighborhood_data = neighborhood_data.astype({'PRICE_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'AREA_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'price_sf_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'CENSUS_TRACT': 'int'})

    # Read in shapefile and examine data
    dc = dc_org.copy()

    # Rename columns in geojson map file
    dc = dc.rename(columns={'geometry': 'geometry'}).set_geometry('geometry')


    dc.sort_values(by=['TRACT'])

    neighborhood_data.index = neighborhood_data.index.astype(int)
    dc.TRACT = dc.TRACT.astype(int)

    # This dictionary contains the formatting for the data in the plots
    format_data = [('PRICE_count', 0, 700,'0,0', 'Number of properties'),
                ('PRICE_median', 0, 1_500_000, '$0,0', 'Median Sale Price'),
                ('AREA_mean', 500, 5000,'0,0', 'Average Square Footage'),
                ('price_sf_median', 0, 2000,'$0,0', 'Median Price Per Square Foot')]
    
    #Create a DataFrame object from the dictionary 
    format_df = pd.DataFrame(format_data, columns = ['field' , 'min_range', 'max_range' , 'format', 'verbage'])

    def json_data(bedroom):
        bedroom = int(bedroom)
        neighborhood_data_temp = neighborhood_data.loc[neighborhood_data.BEDRM == bedroom,:]
        max_range = np.max(neighborhood_data_temp.PRICE_median)
        # Merge the GeoDataframe object (dc) with the neighborhood summary data (neighborhood)
        merged = pd.merge(dc, neighborhood_data_temp, left_on='TRACT', right_on='CENSUS_TRACT', how='left')
        merged = merged.dropna()
        # Fill the null values
        values = {'PRICE_count': 0, 'PRICE_median': 0, 'BEDRM' : 0,
                    'price_sf_median': 0, 'price_sf_median': 0, 'ASSESSMENT_NBHD': "", 'AREA_mean': 0}
        merged = merged.fillna(value=values)
        # Bokeh uses geojson formatting, representing geographical features, with json
        # Convert to json
        merged_json = json.loads(merged.to_json())
        # Convert to json preferred string-like object 
        json_data = json.dumps(merged_json)

        return json_data
        
    def make_data(field_name):
        # Set the format of the colorbar
        min_range = format_df.loc[format_df['field'] == field_name, 'min_range'].iloc[0]
        max_range = format_df.loc[format_df['field'] == field_name, 'max_range'].iloc[0]
        field_format = 'PRICE_median'# format_df.loc[format_df['field'] == field_name, 'format'].iloc[0]

        # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
        color_mapper = LinearColorMapper(palette = palette, low = min_range, high = max_range)

        # Create color bar.
        format_tick = NumeralTickFormatter(format="$0.0")
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
        border_line_color=None, location = (0, 0))

        # Create figure object.
        verbage = format_df.loc[format_df['field'] == field_name, 'verbage'].iloc[0]
        return field_name,verbage,color_bar
    
    def insert_data(p,field_name,verbage):
        t = Title()
        t.text =  verbage + ' by Bedrooms'
        p.title = t

        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        p.axis.visible = False
        color_map_instance = p.select_one(LinearColorMapper)
        # Add patch renderer to figure. 
        p.patches('xs','ys', source = geosource, 
                  fill_color = {'field' : field_name, 'transform' : color_map_instance},
                  line_color = 'black', line_width = 0.25, fill_alpha = 1)

        # Add the hover tool to the graph
        p.add_tools(hover)
        return p
    
    # Create a plotting function
    def make_plot(field_name):    
        field_name,verbage,color_bar = make_data(field_name)
        
        
        p = figure(title = verbage + ' by Neighborhood for Homes in DC', 
                    plot_height = 700, plot_width = 600,
                    toolbar_location = None)
        
        p.add_layout(color_bar, 'right')
        p = insert_data(p,field_name,verbage)
        
        return p
    
    # Define the callback function: update_plot for updating the colormap
    def update_plot(attr, old, new):
        # The input cr is the criteria selected from the select box
        cr = select.value

        new_data = json_data(cr)

        
        input_field = 'PRICE_median' #format_df.loc[format_df['verbage'] == cr, 'field'].iloc[0]
        # Update the plot based on the changed inputs
        field_name,verbage,_ = make_data(input_field)

        insert_data(p,field_name,verbage)
        
        min_range = format_df.loc[format_df['field'] == input_field, 'min_range'].iloc[0]
        #max_range = format_df.loc[format_df['field'] == input_field, 'max_range'].iloc[0]
        #max_range = np.max(new_data.PRICE_median.dropna())
        color_map_instance = p.select_one(LinearColorMapper)
        color_map_instance.update(low=min_range,high=max_range)

        # Update the data

        geosource.geojson = new_data
        
    def histogram_plot():
        p_temp = figure(x_range=data_dict['x'], plot_width=400, plot_height=700, title="PPSF by number of bedrooms",
           toolbar_location=None, tools="", x_axis_label = 'Number of bedrooms', y_axis_label = 'Price per square foot')

        p_temp.vbar(x='x', top='y', source=source_table_hist, width=0.9)
        
        # Hover tool referring to our own data field using @ 
        h1 = HoverTool(tooltips = [('Number of bedrooms', '@x'),
                                  ('Median price per square foot', '@y_format')])
        # add the hovertool
        p_temp.add_tools(h1)
        
        p_temp.xgrid.grid_line_color = None
        p_temp.y_range.start = 0
        p_temp.yaxis[0].formatter = NumeralTickFormatter(format="$0")
        return p_temp
    
    def get_polygon_by_click(x, y):
        for i, polygon in enumerate(dc.geometry):
            if polygon.contains(Point(x, y)):
                clicked_tract = dc.TRACT.iloc[i]
                update_histogram(clicked_tract)
        
                    
    def update_histogram(tract):
        df_temp = df.loc[df.CENSUS_TRACT == tract]
        median_df = df_temp.groupby('BEDRM',as_index=False).agg({'price_sf':'median'})
        median_df = median_df.sort_values(by='BEDRM')
        
        rooms = list(median_df.BEDRM.astype('int').astype('str')[:10])
        medians = list(median_df.price_sf)[:10]
        medians_format = ['${:,.0f}'.format(median) for median in medians]
        new_dict = {'x':rooms,'y':medians, 'y_format':medians_format} 
        source_table_hist.data.update({'x': rooms, 'y': medians, 'y_format':medians_format})
        
    # add a dot where the click happened on map
    def callback(event):
        Coords=(event.x,event.y)
        get_polygon_by_click(event.x, event.y)
        
    ### initial histogram setup 
    median_df = df.groupby('BEDRM',as_index=False).agg({'price_sf':'median'})
    median_df = median_df.sort_values(by='BEDRM')
    rooms = list(median_df.BEDRM.astype('int').astype('str')[:10])
    medians = list(median_df.price_sf.astype(int))[:10]
    medians_format = ['${:,.0f}'.format(median) for median in medians]
    data_dict = {'x':rooms,'y':medians, 'y_format':medians_format} 
    source_table_hist = ColumnDataSource(data=data_dict)

    p_histogram = histogram_plot()
    p_histogram = style(p_histogram)
    
    ### end for histogram
    max_range = 1_500_000
    
    ### shapefile census tract setup
    # Input geojson source that contains features for plotting 
    geosource = GeoJSONDataSource(geojson = json_data(2))
    input_field = 'PRICE_median'

    # Define a sequential multi-hue color palette.
    palette = inferno(100)

    # Add hover tool
    hover = HoverTool(tooltips = [ ('Neighbourhood','@ASSESSMENT_NBHD'),
                                    ('Number of properties', '@PRICE_count'),
                                    ('Median Price', '$@PRICE_median{,}'),
                                    ('Average area', '@AREA_mean{,}'),
                                    ('Price/SF ', '$@price_sf_median{,}')])

    # Call the plotting function
    p = make_plot(input_field)
    p = style(p)
    coordList=[]
    p.on_event(Tap, callback)
    
    
    select = Select(title='Select Number of Rooms:', value='2', options=np.arange(1, 10).astype('str').tolist())
    
    select.on_change('value', update_plot)
    ### shapefile census tract setup end
    # set up layout
    widgets = column(widgetbox(select))
    main_row = row(p, p_histogram)
    
    # Make a column layout of widgetbox and plot, and add it to the current document
    layout = column(main_row, widgets)
    doc.add_root(layout)
show(slide_4)

<img src="https://raw.githubusercontent.com/goh12/socialdataanalysis2020-screenshots/master/slide4.jpg"></img>

### 4. Section 5 <a name="section5"></a>

In [8]:
def slide_5(doc):
    df = properties.copy()



    neighborhood_data = df.groupby(
        ['CENSUS_TRACT', 'TSEYB']
    ).agg(
        {
            'PRICE': ['count', 'median'],
            'AREA': ['mean'],
            'price_sf': ['median'],
            'ASSESSMENT_NBHD': (lambda x: x.value_counts().index[0] if len(np.unique(x)) > 0 else "")
        }
    )
    
    #Reset the index to 1 level to fill in year
    neighborhood_data = neighborhood_data.set_axis(neighborhood_data.columns.map('_'.join), axis=1, inplace=False)
    neighborhood_data = neighborhood_data.rename({'ASSESSMENT_NBHD_<lambda>':'ASSESSMENT_NBHD'},axis='columns')
    neighborhood_data = neighborhood_data.reset_index(level=[0,1])

    # Change data types to integer 
    neighborhood_data = neighborhood_data.astype({'PRICE_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'AREA_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'price_sf_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'CENSUS_TRACT': 'int'})
    neighborhood_data = neighborhood_data.astype({'TSEYB': 'int'})

    # Read in shapefile and examine data
    dc = dc_org.copy()
    # Rename columns in geojson map file
    dc = dc.rename(columns={'geometry': 'geometry'}).set_geometry('geometry')


    dc.sort_values(by=['TRACT'])

    neighborhood_data.index = neighborhood_data.index.astype(int)
    dc.TRACT = dc.TRACT.astype(int)

    # This dictionary contains the formatting for the data in the plots
    format_data = [('PRICE_count', 0, 700,'0,0', 'Number of properties'),
                ('PRICE_median', 0, 1_500_000, '$0,0', 'Median Sale Price'),
                ('AREA_mean', 500, 5000,'0,0', 'Average Square Footage'),
                ('price_sf_median', 0, 500,'$0,0', 'Median Price Per SF')]
    
    #Create a DataFrame object from the dictionary 
    format_df = pd.DataFrame(format_data, columns = ['field' , 'min_range', 'max_range' , 'format', 'verbage'])
    def json_data(residential_type):
        tseyb = 0
        for key, value in years_since_built_dict.items():
            if residential_type == value:
                tseyb = key
        neighborhood_data_temp = neighborhood_data
        if tseyb <10:
            neighborhood_data_temp = neighborhood_data.loc[neighborhood_data.TSEYB == tseyb,:]
        else: 
            neighborhood_data_temp = neighborhood_data.loc[neighborhood_data.TSEYB >= tseyb, :]
            
        # Merge the GeoDataframe object (dc) with the neighborhood summary data (neighborhood)
        merged = pd.merge(dc, neighborhood_data_temp, left_on='TRACT', right_on='CENSUS_TRACT', how='left')
        merged = merged.dropna()
        # Fill the null values
        values = {'PRICE_count': 0, 'PRICE_median': 0, 'TSEYB':0,
                    'sf_mean': 0, 'price_sf_median': 0, 'ASSESSMENT_NBHD': "", 'AREA_mean': 0}
        merged = merged.fillna(value=values)

        # Bokeh uses geojson formatting, representing geographical features, with json
        # Convert to json
        merged_json = json.loads(merged.to_json())
        # Convert to json preferred string-like object 
        json_data = json.dumps(merged_json)

        return json_data
    
    
    def make_data(field_name):
        # Set the format of the colorbar
        min_range = format_df.loc[format_df['field'] == field_name, 'min_range'].iloc[0]
        max_range = format_df.loc[format_df['field'] == field_name, 'max_range'].iloc[0]
        field_format = 'price_sf_median'# format_df.loc[format_df['field'] == field_name, 'format'].iloc[0]

        # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
        color_mapper = LinearColorMapper(palette = palette, low = min_range, high = max_range)

        # Create color bar.
        format_tick = NumeralTickFormatter(format="$0.0")
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
        border_line_color=None, location = (0, 0))

        # Create figure object.
        verbage = format_df.loc[format_df['field'] == field_name, 'verbage'].iloc[0]
        return field_name,verbage,color_bar
    
    def insert_data(p,field_name,verbage):
        t = Title()
        t.text =  verbage + ' by age'
        p.title = t

        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        p.axis.visible = False
        color_map_instance = p.select_one(LinearColorMapper)
        # Add patch renderer to figure. 
        p.patches('xs','ys', source = geosource, 
                  fill_color = {'field' : field_name, 'transform' : color_map_instance},
                  line_color = 'black', line_width = 0.25, fill_alpha = 1)

        # Add the hover tool to the graph
        p.add_tools(hover)
        return p
    
    # Create a plotting function
    def make_plot(field_name):    
        field_name,verbage,color_bar = make_data(field_name)
        
        
        p = figure(title = verbage + ' by Neighborhood for Homes in DC', 
                    plot_height = 700, plot_width = 600,
                    toolbar_location = None)
        
        p.add_layout(color_bar, 'right')
        p = insert_data(p,field_name,verbage)
        
        return p
    
    # Define the callback function: update_plot for updating the colormap
    def update_plot(attr, old, new):
        # The input cr is the criteria selected from the select box
        cr = select.value

        new_data = json_data(cr)

        
        input_field = 'price_sf_median' #format_df.loc[format_df['verbage'] == cr, 'field'].iloc[0]
        # Update the plot based on the changed inputs
        field_name,verbage,_ = make_data(input_field)

        insert_data(p,field_name,verbage)
        
        min_range = format_df.loc[format_df['field'] == input_field, 'min_range'].iloc[0]
        max_range = format_df.loc[format_df['field'] == input_field, 'max_range'].iloc[0]
        color_map_instance = p.select_one(LinearColorMapper)
        color_map_instance.update(low=min_range,high=max_range)

        # Update the data

        geosource.geojson = new_data
        
    def histogram_plot():
        # Create the blank plot
        p_temp = figure(plot_height = 700, plot_width = 400, 
                   title = 'Real Estate prices',
                   x_axis_label = 'Price per square foot ($)', 
                   y_axis_label = 'Number of properties')

        # Add a quad glyph with source this time
        p_temp.quad(bottom=0, top='property_count', left='left', right='right', source=src,
               fill_color='red', line_color='black', fill_alpha = 0.75,
               hover_fill_alpha = 1.0, hover_fill_color = 'navy')

        # Hover tool referring to our own data field using @ 
        h1 = HoverTool(tooltips = [('Number of properties', '@property_count'),
                                  ('Price Interval', '@p_interval')])

        p_temp.xaxis[0].formatter = NumeralTickFormatter(format="$0")

        # style the plot
        p_temp = style(p_temp)

        # add the hovertool
        p_temp.add_tools(h1)
        return p_temp
    
    def get_polygon_by_click(x, y):
        for i, polygon in enumerate(dc.geometry):
            if polygon.contains(Point(x, y)):
                clicked_tract = dc.TRACT.iloc[i]
                update_histogram(clicked_tract)

    def histogram_data(tract):
        df_temp = df.loc[df.CENSUS_TRACT == tract]
        ### for histogram
        arr_hist, edges = np.histogram(df_temp['price_sf'],
                                        bins = np.arange(0,np.max([300,df_temp.price_sf.quantile(0.98)]),50),
                                        range=[0,np.max([300,df_temp.price_sf.quantile(0.98)])])

        prices = pd.DataFrame({'property_count': arr_hist, 
                               'left': edges[:-1], 
                               'right': edges[1:]})
        # Add a column showing the extent of each interval
        prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
        # Convert dataframe to column data source
        ### end for histogram
        return ColumnDataSource(prices)
        
                    
    def update_histogram(tract):
        new_data = histogram_data(tract)
        src.data.update(new_data.data)
        
    # add a dot where the click happened on map
    def callback(event):
        Coords=(event.x,event.y)
        get_polygon_by_click(event.x, event.y)
        coordList.append(Coords) 
        
    ### initial histogram setup 
    arr_hist, edges = np.histogram(df['price_sf'],
                                    bins = np.arange(0,np.max([600,df.price_sf.quantile(0.98)]),50),
                                    range=[0,np.max([60,df.price_sf.quantile(0.98)])])

    # Put the information in a dataframe
    prices = pd.DataFrame({'property_count': arr_hist, 
                           'left': edges[:-1], 
                           'right': edges[1:]})
    # Add a column showing the extent of each interval
    prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
    # Convert dataframe to column data source
    src = ColumnDataSource(prices)
    p_histogram = histogram_plot()
    p_histogram = style(p_histogram)
    p_histogram.xaxis.major_label_orientation = math.pi/4
    
    ### end for histogram
    
    
    ### shapefile census tract setup
    years_since_built_dict = {0: '0-9 years old',
                    1: '10-19  years old',
                    2: '20-29  years old',
                    3: '30-39  years old',
                    4: '40-49  years old',
                    5: '50-59  years old',
                    6: '60+ years old'}
    
    # Input geojson source that contains features for plotting 
    geosource = GeoJSONDataSource(geojson = json_data('0-4 years since built'))
    input_field = 'price_sf_median'

    # Define a sequential multi-hue color palette.
    palette = inferno(100)

    # Reverse color order so that dark blue is highest obesity.
    #palette = palette[::-1]

        # Add hover tool
    hover = HoverTool(tooltips = [ ('Neighbourhood','@ASSESSMENT_NBHD'),
                                    ('Number of properties', '@PRICE_count'),
                                    ('Median Price', '$@PRICE_median{,}'),
                                    ('Average area', '@AREA_mean{,}'),
                                    ('Price/SF ', '$@price_sf_median{,}')])

    
    # Call the plotting function
    p = make_plot(input_field)
    p = style(p)
    coordList=[]
    p.on_event(Tap, callback)
            
    select = Select(title='Select years since built:', value='Residential-Single Family', options=list(years_since_built_dict.values()))
    
    select.on_change('value', update_plot)
    ### shapefile census tract setup end
    # set up layout
    widgets = column(widgetbox(select))
    main_row = row(p, p_histogram)
    
    # Make a column layout of widgetbox and plot, and add it to the current document
    layout = column(main_row, widgets)
    doc.add_root(layout)
show(slide_5)

<img src="https://raw.githubusercontent.com/goh12/socialdataanalysis2020-screenshots/master/slide5.jpg"></img>

### 5. Section 6 <a name="section6"></a>

In [9]:
def slide_6(doc):
    df = properties.copy()

    neighborhood_data = df.groupby(
        ['CENSUS_TRACT', 'USECODE']
    ).agg(
        {
            'PRICE': ['count', 'median'],
            'AREA': ['mean'],
            'price_sf': ['median'],
            'ASSESSMENT_NBHD': (lambda x: x.value_counts().index[0] if len(np.unique(x)) > 0 else "")
        }
    )
    neighborhood_data

    #Reset the index to 1 level to fill in year
    neighborhood_data = neighborhood_data.set_axis(neighborhood_data.columns.map('_'.join), axis=1, inplace=False)
    neighborhood_data = neighborhood_data.rename({'ASSESSMENT_NBHD_<lambda>':'ASSESSMENT_NBHD'},axis='columns')
    neighborhood_data = neighborhood_data.reset_index(level=[0,1])

    # Change data types to integer 
    neighborhood_data = neighborhood_data.astype({'PRICE_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'AREA_mean': 'int'})
    neighborhood_data = neighborhood_data.astype({'price_sf_median': 'int'})
    neighborhood_data = neighborhood_data.astype({'CENSUS_TRACT': 'int'})
    neighborhood_data = neighborhood_data.astype({'USECODE': 'int'})

    # Read in shapefile and examine data
    dc = dc_org.copy()
    # Set the Coordinate Referance System (crs) for projections
    # ESPG code 4326 is also referred to as WGS84 lat-long projection
    dc.crs = {'init': 'epsg:4326'}

    # Rename columns in geojson map file
    dc = dc.rename(columns={'geometry': 'geometry'}).set_geometry('geometry')


    dc.sort_values(by=['TRACT'])

    neighborhood_data.index = neighborhood_data.index.astype(int)
    dc.TRACT = dc.TRACT.astype(int)

    # This dictionary contains the formatting for the data in the plots
    format_data = [('PRICE_count', 0, 700,'0,0', 'Number of properties'),
                ('PRICE_median', 0, 1_500_000, '$0,0', 'Median Sale Price'),
                ('AREA_mean', 500, 5000,'0,0', 'Average Square Footage'),
                ('price_sf_median', 0, 500,'$0,0', 'Mean Price Per SF')]
    
    #Create a DataFrame object from the dictionary 
    format_df = pd.DataFrame(format_data, columns = ['field' , 'min_range', 'max_range' , 'format', 'verbage'])

    def json_data(residential_type):
        usecode = 1
        for key, value in use_code_dict.items():
            if residential_type == value:
                usecode = key
        
        neighborhood_data_temp = neighborhood_data.loc[neighborhood_data.USECODE == usecode,:]
        # Merge the GeoDataframe object (dc) with the neighborhood summary data (neighborhood)
        merged = pd.merge(dc, neighborhood_data_temp, left_on='TRACT', right_on='CENSUS_TRACT', how='left')
        merged = merged.dropna()
        # Fill the null values
        values = {'PRICE_count': 0,'PRICE_median': 0, 'USECODE':0,
                    'price_sf_median': 0, 'price_sf_median': 0, 'ASSESSMENT_NBHD': "", 'AREA_mean': 0}
        merged = merged.fillna(value=values)
        # Bokeh uses geojson formatting, representing geographical features, with json
        # Convert to json
        merged_json = json.loads(merged.to_json())
        # Convert to json preferred string-like object 
        json_data = json.dumps(merged_json)

        return json_data
    
    def make_data(field_name):
        # Set the format of the colorbar
        min_range = format_df.loc[format_df['field'] == field_name, 'min_range'].iloc[0]
        max_range = format_df.loc[format_df['field'] == field_name, 'max_range'].iloc[0]
        field_format = 'price_sf_median'# format_df.loc[format_df['field'] == field_name, 'format'].iloc[0]

        # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
        color_mapper = LinearColorMapper(palette = palette, low = min_range, high = max_range)

        # Create color bar.
        format_tick = NumeralTickFormatter(format="$0.0")
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=18, formatter=format_tick,
        border_line_color=None, location = (0, 0))

        # Create figure object.
        verbage = format_df.loc[format_df['field'] == field_name, 'verbage'].iloc[0]
        return field_name,verbage,color_bar
    
    def insert_data(p,field_name,verbage):
        t = Title()
        t.text =  verbage + ' by Residential Type'
        p.title = t

        p.xgrid.grid_line_color = None
        p.ygrid.grid_line_color = None
        p.axis.visible = False
        color_map_instance = p.select_one(LinearColorMapper)
        # Add patch renderer to figure. 
        p.patches('xs','ys', source = geosource, 
                  fill_color = {'field' : field_name, 'transform' : color_map_instance},
                  line_color = 'black', line_width = 0.25, fill_alpha = 1)

        # Add the hover tool to the graph
        p.add_tools(hover)
        return p
    
    # Create a plotting function
    def make_plot(field_name):    
        field_name,verbage,color_bar = make_data(field_name)
        
        
        p = figure(title = verbage + ' by Neighborhood for Homes in DC', 
                    plot_height = 700, plot_width = 600,
                    toolbar_location = None)
        
        p.add_layout(color_bar, 'right')
        p = insert_data(p,field_name,verbage)
        
        return p
    
    # Define the callback function: update_plot for updating the colormap
    def update_plot(attr, old, new):
        # The input cr is the criteria selected from the select box
        cr = select.value

        new_data = json_data(cr)

        
        input_field = 'price_sf_median' #format_df.loc[format_df['verbage'] == cr, 'field'].iloc[0]
        # Update the plot based on the changed inputs
        field_name,verbage,_ = make_data(input_field)

        insert_data(p,field_name,verbage)
        
        min_range = format_df.loc[format_df['field'] == input_field, 'min_range'].iloc[0]
        max_range = format_df.loc[format_df['field'] == input_field, 'max_range'].iloc[0]
        color_map_instance = p.select_one(LinearColorMapper)
        color_map_instance.update(low=min_range,high=max_range)

        # Update the data

        geosource.geojson = new_data
        
    def histogram_plot():
        # Create the blank plot
        p_temp = figure(plot_height = 700, plot_width = 400, 
                   title = 'Real Estate prices',
                   x_axis_label = 'Price per square foot ($)', 
                   y_axis_label = 'Number of properties')

        # Add a quad glyph with source this time
        p_temp.quad(bottom=0, top='property_count', left='left', right='right', source=src,
               fill_color='red', line_color='black', fill_alpha = 0.75,
               hover_fill_alpha = 1.0, hover_fill_color = 'navy')

        # Hover tool referring to our own data field using @ 
        h1 = HoverTool(tooltips = [('Number of properties', '@property_count'),
                                  ('Price Interval', '@p_interval')])

        p_temp.xaxis[0].formatter = NumeralTickFormatter(format="$0")

        # style the plot
        p_temp = style(p_temp)

        # add the hovertool
        p_temp.add_tools(h1)
        return p_temp
    
    def get_polygon_by_click(x, y):
        for i, polygon in enumerate(dc.geometry):
            if polygon.contains(Point(x, y)):
                clicked_tract = dc.TRACT.iloc[i]
                update_histogram(clicked_tract)

    def histogram_data(tract):
        df_temp = df.loc[df.CENSUS_TRACT == tract]
        ### for histogram
        arr_hist, edges = np.histogram(df_temp['price_sf'],
                                        bins = np.arange(0,np.max([300,df_temp.price_sf.quantile(0.98)]),50),
                                        range=[0,np.max([300,df_temp.price_sf.quantile(0.98)])])
        prices = pd.DataFrame({'property_count': arr_hist, 
                               'left': edges[:-1], 
                               'right': edges[1:]})
        # Add a column showing the extent of each interval
        prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
        # Convert dataframe to column data source
        ### end for histogram
        return ColumnDataSource(prices)
        
                    
    def update_histogram(tract):
        new_data = histogram_data(tract)
        src.data.update(new_data.data)
        
    # add a dot where the click happened on map
    def callback(event):
        Coords=(event.x,event.y)
        get_polygon_by_click(event.x, event.y)
        coordList.append(Coords) 
        
    ### initial histogram setup 
    arr_hist, edges = np.histogram(df['price_sf'],
                                    bins = np.arange(0,np.max([300,df.price_sf.quantile(0.98)]),50),
                                    range=[0,np.max([30,df.price_sf.quantile(0.98)])])
    # Put the information in a dataframe
    prices = pd.DataFrame({'property_count': arr_hist, 
                           'left': edges[:-1], 
                           'right': edges[1:]})
    # Add a column showing the extent of each interval
    prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]        
    # Convert dataframe to column data source
    src = ColumnDataSource(prices)
    p_histogram = histogram_plot()
    p_histogram = style(p_histogram)
    p_histogram.xaxis.major_label_orientation = math.pi/4
    
    ### end for histogram
    
    
    ### shapefile census tract setup
    use_code_dict = {11: 'Residential Row Single Family',
                 12: 'Residential Detached Single Family',
                 17: 'Residential Condo Vertical',
                 16: 'Residential Condo Horizontal',
                 13: 'Residential Semi Detached Single'}
    
    # Input geojson source that contains features for plotting 
    geosource = GeoJSONDataSource(geojson = json_data('Residential Row Single Family'))
    input_field = 'price_sf_median'

    # Define a sequential multi-hue color palette.
    palette = inferno(100)

    # Reverse color order so that dark blue is highest obesity.
    #palette = palette[::-1]

        # Add hover tool
    hover = HoverTool(tooltips = [ ('Neighbourhood','@ASSESSMENT_NBHD'),
                                    ('Number of properties', '@PRICE_count'),
                                    ('Median Price', '$@PRICE_median{,}'),
                                    ('Average area', '@AREA_mean{,}'),
                                    ('Price/SF ', '$@price_sf_median{,}')])

    
    # Call the plotting function
    p = make_plot(input_field)
    p = style(p)
    coordList=[]
    p.on_event(Tap, callback)
            
    select = Select(title='Select Residential type:', value='Residential-Single Family', options=list(use_code_dict.values()))
    
    select.on_change('value', update_plot)
    ### shapefile census tract setup end
    # set up layout
    widgets = column(widgetbox(select))
    main_row = row(p, p_histogram)
    
    # Make a column layout of widgetbox and plot, and add it to the current document
    layout = column(main_row, widgets)
    doc.add_root(layout)
show(slide_6)

<img src="https://raw.githubusercontent.com/goh12/socialdataanalysis2020-screenshots/master/slide6.jpg"></img>