###### Imports

In [1]:
import pandas as pd
import numpy as np

import yaml

from bokeh.io import (
    curdoc,
    output_notebook,
    show
)
from bokeh.plotting import figure
from bokeh.themes import Theme
from bokeh.palettes import OrRd9
from bokeh.tile_providers import CARTODBPOSITRON
from bokeh.models import (
    ColumnDataSource,
    LogColorMapper, 
    HoverTool,
    ColorBar,
    Slider,
    Select,
    MultiSelect,
    Column
)

output_notebook()

##### Load clean data
---

Please view the `UK_Traffic_Exploratory_Visuals.ipynb` if you want to review the cleaning and processing steps to arrive at this point. 

In [2]:
df = pd.read_pickle("./uk_wrangled.pkl")
df.head()

Unnamed: 0,Local_Authority_District,Police_Force,Accident_Severity,Number_of_Vehicles,Number_of_Casualties,Carriageway_Hazards,Urban_or_Rural_Area,Did_Police_Officer_Attend_Scene_of_Accident,Day_of_Week,Road_Type,...,Accident_Index,Year,Speed_limit,LSOA_of_Accident_Location,lad16nm,poly_x,poly_y,st_areashape,xs,ys
0,12,1,Very_Severe,2,1,,Urban,Yes,Friday,Single carriageway,...,201201BS70001_1,2012,30,E06000012,North East Lincolnshire,"[-0.20442280064621207, -0.20429355653431758, -...","[53.637900043713266, 53.637845083392214, 53.63...",191863800.0,"[-22756.242074471204, -22741.85468574708, -227...","[7101874.987282942, 7101864.6680183485, 710186..."
1,12,1,Very_Severe,2,1,,Urban,Yes,Wednesday,One way street,...,201201BS70003_1,2012,30,E06000012,North East Lincolnshire,"[-0.20442280064621207, -0.20429355653431758, -...","[53.637900043713266, 53.637845083392214, 53.63...",191863800.0,"[-22756.242074471204, -22741.85468574708, -227...","[7101874.987282942, 7101864.6680183485, 710186..."
2,12,1,Very_Severe,1,1,,Urban,Yes,Thursday,Single carriageway,...,201201BS70004_1,2012,30,E06000012,North East Lincolnshire,"[-0.20442280064621207, -0.20429355653431758, -...","[53.637900043713266, 53.637845083392214, 53.63...",191863800.0,"[-22756.242074471204, -22741.85468574708, -227...","[7101874.987282942, 7101864.6680183485, 710186..."
3,12,1,Very_Severe,1,1,,Urban,Yes,Wednesday,Single carriageway,...,201201BS70005_1,2012,30,E06000012,North East Lincolnshire,"[-0.20442280064621207, -0.20429355653431758, -...","[53.637900043713266, 53.637845083392214, 53.63...",191863800.0,"[-22756.242074471204, -22741.85468574708, -227...","[7101874.987282942, 7101864.6680183485, 710186..."
4,12,1,Very_Severe,2,1,,Urban,Yes,Friday,Single carriageway,...,201201BS70006_1,2012,30,E06000012,North East Lincolnshire,"[-0.20442280064621207, -0.20429355653431758, -...","[53.637900043713266, 53.637845083392214, 53.63...",191863800.0,"[-22756.242074471204, -22741.85468574708, -227...","[7101874.987282942, 7101864.6680183485, 710186..."


###### Accident Count Choropleth Map

---

In [3]:
val = 2000
test = (df["lad16nm"]
 .value_counts()
 .reset_index()
 .rename(columns={"lad16nm":"Accident_Count",
                  "index":"lad16nm"
                 })
 .merge(df[["lad16nm", "xs", "ys"]])
 .drop_duplicates(subset=["lad16nm"])
 .reset_index(drop=True)
 .query("Accident_Count > %d" %int(val))
)

test["Accident_Count"].min()

2039

In [4]:
dist = ["Torbay", "Plymouth", "Nottingham", "Leeds", "Camden"]
df[df["lad16nm"].isin(dist)]

Unnamed: 0,Local_Authority_District,Police_Force,Accident_Severity,Number_of_Vehicles,Number_of_Casualties,Carriageway_Hazards,Urban_or_Rural_Area,Did_Police_Officer_Attend_Scene_of_Accident,Day_of_Week,Road_Type,...,Accident_Index,Year,Speed_limit,LSOA_of_Accident_Location,lad16nm,poly_x,poly_y,st_areashape,xs,ys
100532,27,1,Very_Severe,1,1,,Urban,Yes,Sunday,Dual carriageway,...,201201FH10079_1,2012,30,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100533,27,1,Very_Severe,1,1,,Urban,Yes,Tuesday,Single carriageway,...,201201FH10157_1,2012,30,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100534,27,1,Very_Severe,2,1,,Urban,Yes,Monday,Dual carriageway,...,201201FH10307_1,2012,30,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100535,27,1,Very_Severe,2,1,,Urban,Yes,Tuesday,Single carriageway,...,201201QA10282_1,2012,30,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100536,27,1,Very_Severe,2,1,,Urban,No,Tuesday,Single carriageway,...,201201QK50019_1,2012,20,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100537,27,1,Very_Severe,2,1,,Urban,Yes,Saturday,Dual carriageway,...,201201QK50073_1,2012,50,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100538,27,1,Very_Severe,2,2,,Urban,Yes,Wednesday,Single carriageway,...,201201QK50610_1,2012,30,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100539,27,1,Very_Severe,2,1,,Urban,Yes,Friday,Single carriageway,...,201201QK50786_1,2012,30,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100540,27,1,Moderate_Severity,2,1,,Urban,Yes,Monday,Roundabout,...,201201TA00424_1,2012,30,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."
100541,27,1,Moderate_Severity,2,1,,Urban,Yes,Tuesday,Single carriageway,...,201201TA01042_1,2012,30,E06000027,Torbay,"[-3.515107720015103, -3.51503844625481, -3.515...","[50.5174265259677, 50.5173770767293, 50.517355...",6.289084e+07,"[-391300.00147558615, -391292.28995586495, -39...","[6536371.931663885, 6536363.274406011, 6536359..."


In [7]:
def modify_map_doc(doc):
    
    df = pd.read_pickle("./uk_wrangled.pkl")
    
    def make_data(df=df, var="Accident_Count", id_var = "lad16nm", districts = "All"):
        
        if var=="Accident_Count":
            accident_data = (df["lad16nm"]
                             .value_counts()
                             .reset_index()
                             .rename(columns={"lad16nm":"Accident_Count",
                                              "index":"lad16nm"}
                                    )
                             .merge(df[["lad16nm", "xs", "ys"]])
                             .drop_duplicates(subset=["lad16nm"])
                             .reset_index(drop=True)
                            )
            
        else:
            accident_data = (df
                             .dropna(subset=[id_var], axis=0)
                             .assign(n=0)
                             .groupby([id_var, var])
                             .n
                             .count()
                             .reset_index()
                             .rename(columns = {"n":"Accident_Count"})
                             .merge(df[[id_var,"poly_x", "poly_y"]])
                             .drop_duplicates(subset=[id_var, var])
                             .sort_values("Accident_Count",ascending=False)
                             .reset_index(drop=True)
                            )
            
        if districts == "All":
            
            accident_data = accident_data
            
        else:
            
            districts = [*districts]
            
            # filter data
            accident_data = accident_data[accident_data.lad16nm.isin(districts)]
            
        return accident_data

    
    def make_map(accident_data=make_data()):

        # create the mapper to color the jurisdictions on the map
        mapper = LogColorMapper(palette=OrRd9[::-1])

        # instantiate the map
        p = figure(x_range=(-630000, 162000),
                   y_range=(6500000, 8600000),

                   # converts to lats/longs
                   x_axis_type="mercator",
                   y_axis_type="mercator",
                   plot_height=1200,
                   plot_width=1000,
                   output_backend="webgl"

                )
        # add tile
        p.grid.grid_line_color = None
        p.axis.visible = False
        p.add_tile(CARTODBPOSITRON)

        # fill plot
        p.patches(xs="xs", ys="ys",
                  fill_color = {"field":"Accident_Count",
                                "transform":mapper
                               },
                  fill_alpha=0.9,
                  line_color=None,
                  source=ColumnDataSource(data=accident_data)
                 )

        # add in hover capabilities for user
        p.add_tools(HoverTool(tooltips = [
            ("Local Authority District ID", "@lad16nm"),
            ("Accident_Count", "@Accident_Count")
        ]))

        # add in color bar to describe magnitude
        p.add_layout(ColorBar(color_mapper=mapper,
                              title = "Number of Accidents"
                             ),
                    "right")

        return p

    # Create Select
    id_vars = ["Local_Authority_District", "Accident_Index",
       "Date", "LSOA_of_Accident_Location", "lad16nm",
       "poly_x", "poly_y", "xs", "ys", "st_areashape"
      ]

    # select the remaining vars that the user will be able to use
    group_vars = [*pd.Series(df.columns)[np.logical_not(pd.Series(df.columns).isin(id_vars))]]
    group_vars.append("Accident_Count")

    # define variable select
    var_select = Select(value="Accident_Severity",
                    options = [*group_vars],
                    title="Select a Variable to view: "
                   )
    
    # define district select
    dist_opts = [*df.lad16nm.unique()].append("All")
    #dstrct_select = MultiSelect(title="Select Which Districts to View: ", value="All", options=dist_opts)

    def update(attr, old, new):
        
        layout.childern[1] = make_map(accident_data=make_data(var=var_select.value, districts=dstrct_select.value))

    var_select.on_change("value", update)

    layout = Column(var_select, make_map())

    doc.add_root(layout)

    doc.theme = Theme(json=yaml.load("""
        attrs:
            Figure:
                toolbar_location: above
                height: 500
                width: 800
    """))

In [8]:
show(modify_map_doc)