# # Aim

Now that we've pre-processed our data and created some standalone Bokeh graphs, we can now look at taking these interactive graphs to the next level. I.e., we can add drop-downs and look at how to hook up the notebook to a remote server.

But for now, let's try and get these drop-downs working so that we can switch between colouring our data points by region, urban-rural classification, or shannon index. We also want to get some drop-downs working for our religion dataset, so that we can switch between different religions to explore the relationship between % of religious group in an LA and their contribution to the SO NR rate.

## Import libraries

In [39]:
# used to manipulate dataframes
import pandas as pd

# used to create visualisations
import seaborn as sns
import matplotlib.pylab as plt

# used to create interactive visualisations
from bokeh.io import show, curdoc, output_notebook
from bokeh.layouts import column
from bokeh.models import (
    ColumnDataSource,
    ColorBar,
    BasicTicker,
    PrintfTickFormatter,
    LinearColorMapper,
)

from bokeh.palettes import Category10
from bokeh.plotting import figure


## Read-in data

We have some pre-processed data from our previous notebooks (started in 'Main_Lang_NR_SO.ipynb', and finished in 'Religion_1_SO.ipynb') that we will read in now.

In [31]:

df = pd.read_csv('../Data/final_lang_so.csv')

# Let's take a quick glance

df.head()

Unnamed: 0.1,Unnamed: 0,LA_name,Observation,Non_Eng_Percentages,NR_rate,region,Urb_Rur,Shannon_idx
0,0,Adur,1971,3.14,6.47,South East,Predominantly Urban,0.176281
1,1,Allerdale,1073,1.15,6.18,North West,Predominantly Rural,0.050053
2,2,Amber Valley,1850,1.51,6.77,East Midlands,Predominantly Urban,0.104063
3,3,Arun,9469,5.89,7.09,South East,Predominantly Urban,0.146576
4,4,Ashfield,3944,3.22,6.77,East Midlands,Predominantly Urban,0.176282


# Non-English + Non-response: Scatterplots

Shows the relationship between the % of Non-English speakers and % of SO non-response for our 331 local authorities in England and Wales.

In [40]:
import pandas as pd
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Select
from bokeh.plotting import figure, curdoc
from bokeh.palettes import Category10
from bokeh.transform import factor_cmap
from bokeh.models import ColorBar, BasicTicker, PrintfTickFormatter, LinearColorMapper



# Prepare data sources
source = ColumnDataSource(df)


# Bokeh has a hover tool, allowing you to scroll over dps to reveal info
# To configure the tool, we must set our tooltips arguments...

# We simply define a list of tuples which refer to column values in our merged_df 

tool = [
    ("index", "$index"),
    ("(x,y)", "(@Non_Eng_Percentages, @NR_rate)"),
    ("name", "@LA_name"),
]

# Create first graph figure, set title and x and y labels

p0 = figure(title = "Relationship between Non-response Rate and Non-English Speakers", x_axis_label = "Percentage of Non-English Speakers",
           y_axis_label = "Non-response rate", tooltips = tool)

# Create scatterplot and x and y values from columns
p0.scatter("Non_Eng_Percentages", "NR_rate", source=source, fill_alpha=0.5, size=10)


# Plot 1 (By Region)
p1 = figure(title="Relationship between Non-response Rate and Non-English Speakers",
            x_axis_label="Percentage of Non-English Speakers",
            y_axis_label="Non-response Rate",
            tooltips=tool)


# To colour each data point by region we first loop over each unique region and its colour
for region, color in zip(df.region.unique(), Category10[10]):
    # Subset dataframe by region for each unique region
    b = df[df.region == region]
    #     Each dp within that region is then plotted with its data and specific colour
    p1.circle(x='Non_Eng_Percentages', y='NR_rate', size=10, alpha=0.5, color=color,
              legend_label=region, muted_color=color, muted_alpha=0.1, source=ColumnDataSource(b))

    
# Set location of legend
p1.legend.location = "bottom_right"
# Set click policy to hide 
# When a specific legend is clicked, its dps are removed from the graph
p1.legend.click_policy = "hide"
# Set legend title
p1.legend.title = "Regions"

# Plot 2 (Urban vs Rural)
p_2 = figure(title="Relationship between Non-response Rate and Non-English Speakers",
            x_axis_label="Percentage of Non-English Speakers",
            y_axis_label="Non-response Rate",
            tooltips=tool)

for urb_rur, color in zip(df.Urb_Rur.unique(), Category10[10]):
    c = df[df.Urb_Rur == urb_rur]
    p_2.circle(x='Non_Eng_Percentages', y='NR_rate', size=10, alpha=0.5, color=color,
              legend_label=urb_rur, muted_color=color, muted_alpha=0.1, source=ColumnDataSource(c))

p_2.legend.location = "bottom_right"
p_2.legend.click_policy = "hide"
p_2.legend.title = "Urban-Rural"



# Plot 3 (Shannon Index)

# Created color map object in Bokeh
# Viridis256 chosen because it's good at representing continuous variables
color_map = LinearColorMapper(palette="Viridis256", low=df.Shannon_idx.min(), high=df.Shannon_idx.max())

p3 = figure(title="Relationship between Non-response Rate and Non-English Speakers",
            x_axis_label="Non-response Rate",
            y_axis_label="Percentage of Non-English Speakers",
            tooltips=tool)

p3.scatter("Non_Eng_Percentages", "NR_rate", source=source, fill_alpha=0.5, size=10,
           color={'field': 'Shannon_idx', 'transform': color_map})

# Create colour bar and set the color_mapper parameter 
color_bar = ColorBar(color_mapper=color_map,
                     title='Shannon Index',
                     ticker=BasicTicker(desired_num_ticks=5),
                     formatter=PrintfTickFormatter(format='%.2f'))

# Add the colour bar to the right of the p3 graph
p3.add_layout(color_bar, 'right')

# Create dropdown selection menu
dropdown = Select(title="Color By:", value="None", options=["Default", "Region", "Urban", "Shannon Index"])

# Define the update function
def update_scatterplots(attr, old, new):
    if dropdown.value == "Default":
        p0.visible = True
        p1.visible = False
        p_2.visible = False
        p3.visible = False
    elif dropdown.value == "Region":
        p0.visible = False
        p1.visible = True
        p_2.visible = False
        p3.visible = False
    elif dropdown.value == "Urban":
        p0.visible = False
        p1.visible = False
        p_2.visible = True
        p3.visible = False
    elif dropdown.value == "Shannon Index":
        p0.visible = False
        p1.visible = False
        p_2.visible = False
        p3.visible = True

# Set initial visibility
p0.visible = True
p1.visible = False
p_2.visible = False
p3.visible = False

# Add the callback to the dropdown menu
dropdown.on_change('value', update_scatterplots)

# Create a layout with the dropdown menu and the scatterplots
layout = column(dropdown, p0, p1, p_2, p3)

# Add the layout to the document
curdoc().add_root(layout)

# Display output
show(layout)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



# Religion + Non-response: Scatterplots

In [41]:
# Read-in pre-processed data for religion

rel = pd.read_csv('../Data/religion_so_cleaned.csv')

In [42]:
rel

Unnamed: 0,LA_name,Total_Observation,No religion_Percentage,No religion_Observation,Christian_Percentage,Christian_Observation,Buddhist_Percentage,Buddhist_Observation,Hindu_Percentage,Hindu_Observation,...,Other religion_Percentage,Other religion_Observation,Buddhist_NR,Christian_NR,Hindu_NR,Jewish_NR,Muslim_NR,No religion_NR,Other religion_NR,Sikh_NR
0,Adur,49935,47.51,23725,49.18,24556,0.51,256,0.34,169,...,0.73,365,0.04,2.05,0.02,0.01,0.05,2.12,0.04,0.00
1,Allerdale,75914,33.10,25129,65.88,50010,0.27,205,0.08,57,...,0.38,285,0.02,2.54,0.00,0.01,0.03,1.33,0.04,0.00
2,Amber Valley,99179,46.62,46233,51.74,51318,0.26,261,0.18,179,...,0.69,685,0.02,2.42,0.01,0.00,0.01,1.90,0.05,0.02
3,Arun,131265,39.56,51925,58.34,76574,0.36,468,0.23,303,...,0.63,822,0.03,2.69,0.03,0.01,0.04,1.82,0.07,0.00
4,Ashfield,96859,49.49,47936,48.56,47038,0.24,235,0.32,313,...,0.58,559,0.02,2.29,0.04,0.00,0.05,2.11,0.05,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,Wrexham,103237,41.62,42968,56.16,57981,0.35,361,0.26,264,...,0.46,470,0.02,3.05,0.02,0.01,0.11,2.03,0.05,0.00
327,Wychavon,104730,34.63,36263,63.55,66551,0.30,311,0.15,160,...,0.49,517,0.02,2.57,0.01,0.00,0.03,1.43,0.05,0.00
328,Wyre,89649,32.76,29367,65.81,58994,0.31,280,0.14,128,...,0.50,445,0.02,2.51,0.00,0.00,0.04,1.43,0.05,0.00
329,Wyre Forest,79812,37.88,30229,60.03,47914,0.27,213,0.14,113,...,0.55,436,0.03,2.80,0.01,0.00,0.05,1.75,0.06,0.01


In [None]:
# We'll also read in the 

Calculate the non-response rate contributions for each religious group for each LA.

In [18]:
import pandas as pd
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, Select, HTMLTemplateFormatter
from bokeh.models.widgets import DataTable, TableColumn, Div
from bokeh.plotting import figure, show, curdoc
from bokeh.io import output_notebook

# Prepare data

rel['selected_religion'] = rel['Christian_Percentage']
rel['selected_percentages'] = rel['Christian_NR']

source = ColumnDataSource(rel)

# Define tooltips
tool = [
    ("index", "$index"),
    ("(x,y)", "(@selected_religion{0.2f}, @selected_percentages{0.2f})"),
    ("name", "@LA_name"),
]

# Create figure
p_2 = figure(title="Relationship between % of religious group in given LA, and their non-response rate",
            y_axis_label="Non-response Rate", x_axis_label="Percentage of religious group in given LA", tooltips=tool)


# Scatter plot
p_2.scatter("selected_religion", "selected_percentages", source=source, fill_alpha=0.5, size=10)

# Define callback for updating data source
def update_plot(attr, old, new):
    selected_religion = select_religion.value
    rel['selected_religion'] = rel[f'{selected_religion}_Percentage']
    rel['selected_percentages'] = rel[f'{selected_religion}_NR']
    source.data = source.from_df(rel)
    
# Create select widget
options = ['Christian', 'Muslim', 'Jewish', 'Buddhist', 'Hindu', 'Sikh', 'Other']  # Update with all available religious groups
select_religion = Select(title="Religious Group:", value='Christian', options=options)
select_religion.on_change('value', update_plot)

# Layout
layout = column(select_religion, p_2)

# Create DataTable for layout1
source1 = ColumnDataSource(totals)

columns1 = [
    TableColumn(field="Religion_categories", title="Religion", formatter=create_formatter('Christian')),
    TableColumn(field="Observation", title="Observation", formatter=create_formatter('Christian')),
    TableColumn(field="Percent_of_survey_respondents", title="% of respondents", formatter=create_formatter('Christian')),
]

heading1 = Div(text="<h1>Totals</h1>", width=300)

data_table1 = DataTable(source=source1, columns=columns1, editable=False, width=500, index_position=None)

layout1 = column(heading1, data_table1)

# Create DataTable for layout2
source2 = ColumnDataSource(nr_totals)

columns2 = [
    TableColumn(field="Religion_categories", title="Religion", formatter=create_formatter('Christian')),
    TableColumn(field="Observation", title="Observation", formatter=create_formatter('Christian')),
    TableColumn(field="Non_response_rate", title="Non response rate", formatter=create_formatter('Christian')),
    TableColumn(field="Contribution_to_overall_non_response_rate", title="% of total Non-response rate", formatter=create_formatter('Christian')),
]

heading2 = Div(text="<h1>Non-response rates</h1>", width=300)

data_table2 = DataTable(source=source2, columns=columns2, editable=False, width=700, index_position=None)

layout2 = column(heading2, data_table2)

# Scatter plot
output_notebook()

# Prepare data
rel['selected_religion'] = rel['Christian_%']  # Default religion
rel['selected_percentages'] = rel['Group_Percentages_Christian']

source = ColumnDataSource(rel)

# Define tooltips
tool = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("name", "@LA_name"),
]

# Create figure
p4 = figure(title="Relationship between % of religious group in given LA, and their non-response rate",
            y_axis_label="Non-response Rate", x_axis_label="Percentage of religious group in given LA", tooltips=tool)

# Scatter plot
p4.scatter("selected_religion", "selected_percentages", source=source, fill_alpha=0.5, size=10)

def update_highlighted_rows(selected_religion):
    formatter = create_formatter(selected_religion)
    for col in columns1:
        col.formatter = formatter
    for col in columns2:
        col.formatter = formatter
    data_table1.columns = columns1
    data_table2.columns = columns2

# Define callback for updating data source

def update_plot(attr, old, new):
    selected_religion = select_religion.value
    rel['selected_religion'] = rel[f'{selected_religion}_%']
    rel['selected_percentages'] = rel[f'Group_Percentages_{selected_religion}']
    source.data = source.from_df(rel)
    update_highlighted_rows(selected_religion)

# Create select widget
options = ['Christian', 'No religion', 'Muslim', 'Jewish', 'Buddhist', 'Hindu', 'Sikh', 'Other']
select_religion = Select(title="Religious Group:", value='Christian', options=options)
select_religion.on_change('value', update_plot)

# Initial update of the highlighted rows
update_highlighted_rows(select_religion.value)

# Layout
layout = column(select_religion, p4)
l = row(layout1, layout2)

# Show plot
curdoc().add_root(column(layout, l))