In [1]:
import pandas as pd
import numpy as np

In [2]:
from bokeh.io import show, output_notebook, push_notebook
from bokeh.plotting import figure

from bokeh.models import CategoricalColorMapper, HoverTool, ColumnDataSource, Panel
from bokeh.models.widgets import CheckboxGroup, Slider, RangeSlider, Tabs, TableColumn, DataTable

from bokeh.layouts import column, row, WidgetBox
from bokeh.palettes import Category20_16, inferno

from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
from bokeh.models import NumeralTickFormatter
from bokeh.models import Label

output_notebook()

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
import seaborn as sns
df = pd.read_csv('../Data/DC_Properties.csv')
print(df.shape)

(158957, 49)


  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file

In [5]:
# Return range of non-outleries based on outliers being outside Q1 - IQR*1.5 and Q3 + IQR*1.5
def outlier_range(tmp_df,coeff = 1.5,verbose=False):
    tmp_df = tmp_df.copy().dropna()
    Q1 = tmp_df.dropna().quantile(0.25)
    Q3 = tmp_df.dropna().quantile(0.75)
    IQR = Q3-Q1
    lower = Q1 - IQR*coeff
    upper = Q3 + IQR*coeff
    df_filt = tmp_df.copy()
    df_filt = df_filt[(df_filt>lower) & (df_filt<upper)]
    if verbose:
        print('persentage of data removed:',100*np.round(1-(len(df_filt)/len(tmp_df)),3))
    return (np.min(df_filt),np.max(df_filt))

### Helper function for styling bokeh plots

In [6]:
def style(p):
    # Title 
    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font = 'serif'

    # Axis titles
    p.xaxis.axis_label_text_font_size = '14pt'
    p.xaxis.axis_label_text_font_style = 'bold'
    p.yaxis.axis_label_text_font_size = '14pt'
    p.yaxis.axis_label_text_font_style = 'bold'

    # Tick labels
    p.xaxis.major_label_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '12pt'

    return p

In [7]:
float_vars = ['LATITUDE','LONGITUDE','PRICE','GBA','LIVING_GBA','LANDAREA','AYB','YR_RMDL','EYB', 'ASSESSMENT_NBHD']
df.loc[:,float_vars] = df.loc[:,float_vars]
df.loc[:,float_vars].iloc[:3]

Unnamed: 0,LATITUDE,LONGITUDE,PRICE,GBA,LIVING_GBA,LANDAREA,AYB,YR_RMDL,EYB,ASSESSMENT_NBHD
0,38.91468,-77.040832,1095000.0,2522.0,,1680,1910.0,1988.0,1972,Old City 2
1,38.914683,-77.040764,,2567.0,,1680,1898.0,2007.0,1972,Old City 2
2,38.914684,-77.040678,2100000.0,2522.0,,1680,1910.0,2009.0,1984,Old City 2


In [8]:
df['PRICE'].describe()

count    9.821600e+04
mean     9.313516e+05
std      7.061325e+06
min      1.000000e+00
25%      2.400000e+05
50%      3.999990e+05
75%      6.520000e+05
max      1.374275e+08
Name: PRICE, dtype: float64

In [9]:
arr_hist, edges = np.histogram(df['PRICE'],
                               bins = 'auto',
                               range=[0,df.PRICE.quantile(0.75)])

# # Put the information in a dataframe
prices = pd.DataFrame({'property_count': arr_hist, 
                       'left': edges[:-1], 
                       'right': edges[1:]})
# Add a column showing the extent of each interval
prices['p_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(prices['left'], prices['right'])]

prices.head()

  keep = (a >= first_edge)
  keep &= (a <= last_edge)
  keep = (tmp_a >= first_edge)
  keep &= (tmp_a <= last_edge)


Unnamed: 0,property_count,left,right,p_interval
0,104,0.0,11854.545455,$0 to $11854
1,156,11854.545455,23709.090909,$11854 to $23709
2,342,23709.090909,35563.636364,$23709 to $35563
3,489,35563.636364,47418.181818,$35563 to $47418
4,653,47418.181818,59272.727273,$47418 to $59272


In [10]:
# Import the ColumnDataSource class
from bokeh.models import ColumnDataSource
# Convert dataframe to column data source
src = ColumnDataSource(prices)
src.data.keys()

dict_keys(['index', 'property_count', 'left', 'right', 'p_interval'])

In [11]:
# Create the blank plot
p = figure(plot_height = 600, plot_width = 700, 
           title = 'Histogram of Real Estate prices in Washinton D.C.',
           x_axis_label = 'Price ($)', 
           y_axis_label = 'Number of properties')

# Add a quad glyph with source this time
p.quad(bottom=0, top='property_count', left='left', right='right', source=src,
       fill_color='red', line_color='black', fill_alpha = 0.75,
       hover_fill_alpha = 1.0, hover_fill_color = 'navy')


# Hover tool referring to our own data field using @ 
h = HoverTool(tooltips = [('Number Of Properties', '@property_count'),
                          ('Price Interval', '@p_interval')])

p.xaxis[0].formatter = NumeralTickFormatter(format="$0")

# style the plot
p = style(p)

# add the hovertool
p.add_tools(h)

# Show the plot
show(p)

### Three things to implement when adding active interaction to bokeh ploit
* make_dataset() Format the specific data to be displayed
* make_plot() Draw the plot with the specified data
* update() Upadte the plot based on user selection

In [12]:
def make_dataset(neighbourhood_list, range_start = 0, range_end = 10000000, bin_width = '50'):
        by_neighbourhood = pd.DataFrame(columns = ['proportion', 'left', 'right'
                                                  'f_proportion', 'f_interval',
                                                  'name', 'color'])
        range_extent = range_end - range_start
        
        # Iterate through all neighbourhoods
        for i, neighbourhood_name in enumerate(neighbourhood_list):
            # subset relevant to the neighbourhood
            subset = df[df.ASSESSMENT_NBHD == neighbourhood_name]
            
            # create histogram with specified bins and range
            price_hist, edges = np.histogram(subset['PRICE'],
                                          bins = 'auto',
                                          range = [range_start, range_end])
            
            # divide the counts by the total to get a proportion and create df
            price_df = pd.DataFrame({'proportion': price_hist / np.sum(price_hist),
                                    'left': edges[:-1], 'right': edges[1:]})
            
            # format the proportion for display
            price_df['f_porportion'] = ['%0.5f' % proportion for proportion in price_df['proportion']]
            
            # format the interval for display
            price_df['f_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(price_df['left'], price_df['right'])]
            
            # assign the neighbourhood for labels
            price_df['name'] = neighbourhood_name
            
            # color each neighbourhood differentely
            price_df['color'] = inferno(len(neighbourhood_list))[i]
            
            # add to overall dataframe
            by_neighbourhood = by_neighbourhood.append(price_df)
            
        # overall dataframe
        by_neighbourhood = by_neighbourhood.sort_values(['name', 'left'])
        
        # convert df to columndatasource
        return ColumnDataSource(by_neighbourhood)

available_neighbourhoods = list(df.ASSESSMENT_NBHD.dropna().unique())
test = make_dataset(available_neighbourhoods)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [13]:
# The function should takes in a columndatasource and returns a bokeh plot object
def make_plot(src):
        # Blank plot with correct labels
        p = figure(plot_width = 700, plot_height = 700, 
                  title = 'Histogram of Real Estate Prices by Neighbourhood',
                  x_axis_label = 'Price ($)', y_axis_label = 'Proportion')

        # Quad glyphs to create a histogram
        p.quad(source = src, bottom = 0, top = 'proportion', left = 'left', right = 'right',
               color = 'color', fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
               hover_fill_alpha = 1.0, line_color = 'black')

        # Hover tool with vline mode
        hover = HoverTool(tooltips=[('Neighbourhood', '@name'), 
                                    ('Price', '@f_interval'),
                                    ('Proportion', '@f_proportion')],
                          mode='vline')

        p.add_tools(hover)

        # Styling
        p = style(p)

        return p 

In [14]:
p = make_plot(test)

In [15]:
# Update function takes three default parameters
def update(attr, old, new):
    # Get the list of selected neighbourhoods from the graph
    neighbourhoods_to_plot = [neighbourhood_selection.labels[i] for i in 
                        neighbourhood_selection.active]
    # Make a new dataset based on the selected neighbourhoods and the 
    # make_dataset function defined earlier
    new_src = make_dataset(neighbourhoods_to_plot)

    # Update the source used in the quad glpyhs
    src.data.update(new_src.data)



# Create the checkbox selection element, available neighbourhoods is a  
# list of all neighbourhoods in the data
neighbourhood_selection = CheckboxGroup(labels=available_neighbourhoods, active = [0, 1])
# Link a change in selected buttons to the update function
neighbourhood_selection.on_change('active', update)

In [16]:
# Put controls in a single element
controls = WidgetBox(neighbourhood_selection)
    
# Create a row layout
layout = row(controls, p)
    
# Make a tab with the layout 
tab = Panel(child=layout, title = 'Delay Histogram')
tabs = Tabs(tabs=[tab])

In [17]:
def make_tab(doc):
    def make_dataset(neighbourhood_list, range_start = 0, range_end = 10000000, bin_width = 50):
        by_neighbourhood = pd.DataFrame(columns = ['proportion', 'left', 'right'
                                                  'f_proportion', 'f_interval',
                                                  'name', 'color'])
        range_extent = range_end - range_start
        
        # Iterate through all neighbourhoods
        for i, neighbourhood_name in enumerate(neighbourhood_list):
            # subset relevant to the neighbourhood
            subset = df[df.ASSESSMENT_NBHD == neighbourhood_name]
            
            # create histogram with specified bins and range
            price_hist, edges = np.histogram(subset['PRICE'],
                                          bins = bin_width,
                                          range = [range_start, range_end])
            
            # divide the counts by the total to get a proportion and create df
            price_df = pd.DataFrame({'proportion': price_hist / np.sum(price_hist),
                                    'left': edges[:-1], 'right': edges[1:]})
            
            # format the proportion for display
            price_df['f_proportion'] = ['%.2f' % proportion for proportion in price_df['proportion']]
            
            # format the interval for display
            price_df['f_interval'] = ['$%d to $%d' % (left, right) for left, right in zip(price_df['left'], price_df['right'])]
            
            # assign the neighbourhood for labels
            price_df['name'] = neighbourhood_name
            
            # color each neighbourhood differentely
            price_df['color'] = inferno(len(neighbourhood_list))[i]
            
            # add to overall dataframe
            by_neighbourhood = by_neighbourhood.append(price_df)
            
        # overall dataframe
        by_neighbourhood = by_neighbourhood.sort_values(['name', 'left'])
        
        # convert df to columndatasource
        return ColumnDataSource(by_neighbourhood)
    # The function should takes in a columndatasource and returns a bokeh plot object
    def make_plot(src):
            # Blank plot with correct labels
            p = figure(plot_width = 700, plot_height = 700, 
                      title = 'Histogram of Real Estate Prices by Neighbourhood',
                      x_axis_label = 'Price ($)', y_axis_label = 'Proportion')

            # Quad glyphs to create a histogram
            p.quad(source = src, bottom = 0, top = 'proportion', left = 'left', right = 'right',
                   color = 'color', fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
                   hover_fill_alpha = 1.0, line_color = 'black')

            # Hover tool with vline mode
            hover = HoverTool(tooltips=[('Neighbourhood', '@name'), 
                                        ('Price', '@f_interval'),
                                        ('Proportion', '@f_proportion')],
                              mode='vline')

            p.xaxis[0].formatter = NumeralTickFormatter(format="$0")
            p.add_tools(hover)
            
            mytext = Label(x=600, y=600, text='here your text')
            p.add_layout(mytext)

            # Styling
            p = style(p)

            return p 

        # Update function takes three default parameters
    def update(attr, old, new):
        # Get the list of selected neighbourhoods from the graph
        neighbourhoods_to_plot = [neighbourhood_selection.labels[i] for i in 
                            neighbourhood_selection.active]
        # Make a new dataset based on the selected neighbourhoods and the 
        # make_dataset function defined earlier
        new_src = make_dataset(neighbourhoods_to_plot)

        # Update the source used in the quad glpyhs
        src.data.update(new_src.data)



    # Create the checkbox selection element, available neighbourhoods is a  
    # list of all neighbourhoods in the data
    neighbourhood_selection = CheckboxGroup(labels=available_neighbourhoods, active = [0, 1])
    # Link a change in selected buttons to the update function
    neighbourhood_selection.on_change('active', update)    
    
    initial_neighbourhoods = [neighbourhood_selection.labels[i] for i in neighbourhood_selection.active]
    
    src = make_dataset(initial_neighbourhoods)
    
    p = make_plot(src)
    
    # Put controls in a single element
    controls = WidgetBox(neighbourhood_selection)
    
    # Create a row layout
    layout = row(controls, p)
    
    # Make a tab with the layout 
    tab = Panel(child=layout, title = 'Neighbourhood Histogram')
    tabs = Tabs(tabs=[tab])
    
    doc.add_root(tabs)
    
# Set up an application
handler = FunctionHandler(make_tab)
app = Application(handler)

In [18]:
show(app)

In [19]:
# Load in flights and inspect
flights = pd.read_csv('data/flights.csv', index_col=0)[['arr_delay', 'carrier', 'name']]
flights.head()

Unnamed: 0,arr_delay,carrier,name
0,11.0,UA,United Air Lines Inc.
1,20.0,UA,United Air Lines Inc.
2,33.0,AA,American Airlines Inc.
3,-18.0,B6,JetBlue Airways
4,-25.0,DL,Delta Air Lines Inc.


In [20]:
# Available carrier list
available_carriers = list(flights['name'].unique())

# Sort the list in-place (alphabetical order)
available_carriers.sort()

In [21]:
def modify_doc(doc):
    
    def make_dataset(carrier_list, range_start = -60, range_end = 120, bin_width = 5):

        by_carrier = pd.DataFrame(columns=['proportion', 'left', 'right', 
                                           'f_proportion', 'f_interval',
                                           'name', 'color'])
        range_extent = range_end - range_start

        # Iterate through all the carriers
        for i, carrier_name in enumerate(carrier_list):

            # Subset to the carrier
            subset = flights[flights['name'] == carrier_name]

            # Create a histogram with 5 minute bins
            arr_hist, edges = np.histogram(subset['arr_delay'], 
                                           bins = int(range_extent / bin_width), 
                                           range = [range_start, range_end])

            # Divide the counts by the total to get a proportion
            arr_df = pd.DataFrame({'proportion': arr_hist / np.sum(arr_hist), 'left': edges[:-1], 'right': edges[1:] })

            # Format the proportion 
            arr_df['f_proportion'] = ['%0.5f' % proportion for proportion in arr_df['proportion']]

            # Format the interval
            arr_df['f_interval'] = ['%d to %d minutes' % (left, right) for left, right in zip(arr_df['left'], arr_df['right'])]

            # Assign the carrier for labels
            arr_df['name'] = carrier_name

            # Color each carrier differently
            arr_df['color'] = Category20_16[i]

            # Add to the overall dataframe
            by_carrier = by_carrier.append(arr_df)

        # Overall dataframe
        by_carrier = by_carrier.sort_values(['name', 'left'])

        return ColumnDataSource(by_carrier)
    
    def style(p):
        # Title 
        p.title.align = 'center'
        p.title.text_font_size = '20pt'
        p.title.text_font = 'serif'

        # Axis titles
        p.xaxis.axis_label_text_font_size = '14pt'
        p.xaxis.axis_label_text_font_style = 'bold'
        p.yaxis.axis_label_text_font_size = '14pt'
        p.yaxis.axis_label_text_font_style = 'bold'

        # Tick labels
        p.xaxis.major_label_text_font_size = '12pt'
        p.yaxis.major_label_text_font_size = '12pt'

        return p
    
    def make_plot(src):
        # Blank plot with correct labels
        p = figure(plot_width = 700, plot_height = 700, 
                  title = 'Histogram of Arrival Delays by Carrier',
                  x_axis_label = 'Delay (min)', y_axis_label = 'Proportion')

        # Quad glyphs to create a histogram
        p.quad(source = src, bottom = 0, top = 'proportion', left = 'left', right = 'right',
               color = 'color', fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
               hover_fill_alpha = 1.0, line_color = 'black')

        # Hover tool with vline mode
        hover = HoverTool(tooltips=[('Carrier', '@name'), 
                                    ('Delay', '@f_interval'),
                                    ('Proportion', '@f_proportion')],
                          mode='vline')

        p.add_tools(hover)

        # Styling
        p = style(p)

        return p
    
    def update(attr, old, new):
        carriers_to_plot = [carrier_selection.labels[i] for i in carrier_selection.active]
        
        new_src = make_dataset(carriers_to_plot,
                               range_start = range_select.value[0],
                               range_end = range_select.value[1],
                               bin_width = binwidth_select.value)

        src.data.update(new_src.data)

        
    carrier_selection = CheckboxGroup(labels=available_carriers, active = [0, 1])
    carrier_selection.on_change('active', update)
    
    binwidth_select = Slider(start = 1, end = 30, 
                         step = 1, value = 5,
                         title = 'Delay Width (min)')
    binwidth_select.on_change('value', update)
    
    range_select = RangeSlider(start = -60, end = 180, value = (-60, 120),
                               step = 5, title = 'Delay Range (min)')
    range_select.on_change('value', update)
    
    
    
    initial_carriers = [carrier_selection.labels[i] for i in carrier_selection.active]
    
    src = make_dataset(initial_carriers,
                      range_start = range_select.value[0],
                      range_end = range_select.value[1],
                      bin_width = binwidth_select.value)
    
    p = make_plot(src)
    
    # Put controls in a single element
    controls = WidgetBox(carrier_selection, binwidth_select, range_select)
    
    # Create a row layout
    layout = row(controls, p)
    
    # Make a tab with the layout 
    tab = Panel(child=layout, title = 'Delay Histogram')
    tabs = Tabs(tabs=[tab])
    
    doc.add_root(tabs)
    
# Set up an application
handler = FunctionHandler(modify_doc)
app = Application(handler)

In [22]:
show(app, 'localhost:8889')

## Find Stats for each Airline

In [23]:
carrier_stats = flights.groupby('name')['arr_delay'].describe().reset_index().rename(columns={'name': 'airline', 'count': 'flights', '50%':'median'})
carrier_stats

Unnamed: 0,airline,flights,mean,std,min,25%,median,75%,max
0,AirTran Airways Corporation,3175.0,20.115906,54.087671,-44.0,-7.0,5.0,24.0,572.0
1,Alaska Airlines Inc.,709.0,-9.930889,36.482633,-74.0,-32.0,-17.0,2.0,198.0
2,American Airlines Inc.,31947.0,0.364291,42.516182,-75.0,-21.0,-9.0,8.0,1007.0
3,Delta Air Lines Inc.,47658.0,1.644341,44.402289,-71.0,-20.0,-8.0,8.0,931.0
4,Endeavor Air Inc.,17294.0,7.379669,50.086778,-68.0,-21.0,-7.0,15.0,744.0
5,Envoy Air,25037.0,10.774733,43.174306,-53.0,-13.0,-1.0,18.0,1127.0
6,ExpressJet Airlines Inc.,51108.0,15.796431,49.861469,-62.0,-14.0,-1.0,26.0,577.0
7,Frontier Airlines Inc.,681.0,21.920705,61.645997,-47.0,-9.0,6.0,31.0,834.0
8,Hawaiian Airlines Inc.,342.0,-6.915205,75.12942,-70.0,-27.75,-13.0,2.75,1272.0
9,JetBlue Airways,54049.0,9.457973,42.842297,-71.0,-14.0,-3.0,17.0,497.0


## Table for Statistics

In [24]:
table_src = ColumnDataSource(carrier_stats)

table_columns = [TableColumn(field='airline', title='Airline'),
                 TableColumn(field='flights', title='Number of Flights'),
                 TableColumn(field='min', title='Min Delay'),
                 TableColumn(field='mean', title='Mean Delay'),
                 TableColumn(field='median', title='Median Delay'),
                 TableColumn(field='max', title='Max Delay')]

carrier_table = DataTable(source=table_src, columns=table_columns, width=1000)

show(carrier_table)

# Plot with Table and Tools

In [25]:
def modify_doc(doc):
    
    def make_dataset(carrier_list, range_start = -60, range_end = 120, bin_width = 5):

        by_carrier = pd.DataFrame(columns=['proportion', 'left', 'right', 
                                           'f_proportion', 'f_interval',
                                           'name', 'color'])
        range_extent = range_end - range_start

        # Iterate through all the carriers
        for i, carrier_name in enumerate(carrier_list):

            # Subset to the carrier
            subset = flights[flights['name'] == carrier_name]

            # Create a histogram with 5 minute bins
            arr_hist, edges = np.histogram(subset['arr_delay'], 
                                           bins = int(range_extent / bin_width), 
                                           range = [range_start, range_end])

            # Divide the counts by the total to get a proportion
            arr_df = pd.DataFrame({'proportion': arr_hist / np.sum(arr_hist), 'left': edges[:-1], 'right': edges[1:] })

            # Format the proportion 
            arr_df['f_proportion'] = ['%0.5f' % proportion for proportion in arr_df['proportion']]

            # Format the interval
            arr_df['f_interval'] = ['%d to %d minutes' % (left, right) for left, right in zip(arr_df['left'], arr_df['right'])]

            # Assign the carrier for labels
            arr_df['name'] = carrier_name

            # Color each carrier differently
            arr_df['color'] = Category20_16[i]

            # Add to the overall dataframe
            by_carrier = by_carrier.append(arr_df)

        # Overall dataframe
        by_carrier = by_carrier.sort_values(['name', 'left'])

        return ColumnDataSource(by_carrier)
    
    def style(p):
        # Title 
        p.title.align = 'center'
        p.title.text_font_size = '20pt'
        p.title.text_font = 'serif'

        # Axis titles
        p.xaxis.axis_label_text_font_size = '14pt'
        p.xaxis.axis_label_text_font_style = 'bold'
        p.yaxis.axis_label_text_font_size = '14pt'
        p.yaxis.axis_label_text_font_style = 'bold'

        # Tick labels
        p.xaxis.major_label_text_font_size = '12pt'
        p.yaxis.major_label_text_font_size = '12pt'

        return p
    
    def make_plot(src):
        # Blank plot with correct labels
        p = figure(plot_width = 700, plot_height = 700, 
                  title = 'Histogram of Arrival Delays by Carrier',
                  x_axis_label = 'Delay (min)', y_axis_label = 'Proportion')

        # Quad glyphs to create a histogram
        p.quad(source = src, bottom = 0, top = 'proportion', left = 'left', right = 'right',
               color = 'color', fill_alpha = 0.7, hover_fill_color = 'color', legend = 'name',
               hover_fill_alpha = 1.0, line_color = 'black')

        # Hover tool with vline mode
        hover = HoverTool(tooltips=[('Carrier', '@name'), 
                                    ('Delay', '@f_interval'),
                                    ('Proportion', '@f_proportion')],
                          mode='vline')

        p.add_tools(hover)

        # Styling
        p = style(p)

        return p
    
    def update(attr, old, new):
        carriers_to_plot = [carrier_selection.labels[i] for i in carrier_selection.active]
        
        new_src = make_dataset(carriers_to_plot,
                               range_start = range_select.value[0],
                               range_end = range_select.value[1],
                               bin_width = binwidth_select.value)

        src.data.update(new_src.data)

        
    carrier_selection = CheckboxGroup(labels=available_carriers, active = [0, 1])
    carrier_selection.on_change('active', update)
    
    binwidth_select = Slider(start = 1, end = 30, 
                         step = 1, value = 5,
                         title = 'Delay Width (min)')
    binwidth_select.on_change('value', update)
    
    range_select = RangeSlider(start = -60, end = 180, value = (-60, 120),
                               step = 5, title = 'Delay Range (min)')
    range_select.on_change('value', update)
    
    
    
    initial_carriers = [carrier_selection.labels[i] for i in carrier_selection.active]
    
    src = make_dataset(initial_carriers,
                      range_start = range_select.value[0],
                      range_end = range_select.value[1],
                      bin_width = binwidth_select.value)
    
    p = make_plot(src)
    
    carrier_stats = flights.groupby('name')['arr_delay'].describe()
    carrier_stats = carrier_stats.reset_index().rename(columns={'name': 'airline', 
                                                                'count': 'flights', 
                                                                '50%':'median'})
    carrier_stats['mean'] = carrier_stats['mean'].round(2)
    
    carrier_src = ColumnDataSource(carrier_stats)
    
    table_columns = [TableColumn(field='airline', title='Airline'),
                     TableColumn(field='flights', title='Number of Flights'),
                     TableColumn(field='min', title='Min Delay'),
                     TableColumn(field='mean', title='Mean Delay'),
                     TableColumn(field='median', title='Median Delay'),
                     TableColumn(field='max', title='Max Delay')]

    carrier_table = DataTable(source=carrier_src, columns=table_columns, width=1000)
    
    # Put controls in a single element
    controls = WidgetBox(carrier_selection, binwidth_select, range_select)
    
    # Create a row layout
    layout = column(row(controls, p), carrier_table)
    
    # Make a tab with the layout 
    tab = Panel(child=layout, title = 'Delay Histogram')
    tabs = Tabs(tabs=[tab])
    
    doc.add_root(tabs)
    
# Set up an application
handler = FunctionHandler(modify_doc)
app = Application(handler)

In [26]:
show(app)