# Load packages, read-in pre-processed data

In [1]:
#Import relevant packages

import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt

from bokeh.io import show, curdoc, output_notebook
from bokeh.layouts import column
from bokeh.models import (
    ColumnDataSource,
    Label,
    LabelSet,
    CheckboxGroup,
    CustomJS,
    Button,
)
from bokeh.models.annotations import LabelSet
from bokeh.palettes import Category10
from bokeh.plotting import figure

import numpy as np

In [2]:
#  Read-in the pre-processed data

# df without urban-rural classification
df = pd.read_csv('/Users/loucap/Documents/GitWork/InteractiveGender/Data/cleaned_lang_GI.csv')

# Let's take a quick glance

df.head()

Unnamed: 0,LA_code,LA_name,GI_code,GI_categories,Observation_x,Percentages,Non-response_rate,region_x,Observation_y,Observation_total,Percentage,region_y,Shannon_idx
0,E06000001,Hartlepool,6,Not answered,3777,5.057173,5.057173,North East,1875,92337,2.030605,North East,0.973174
1,E06000002,Middlesbrough,6,Not answered,6991,6.135793,6.135793,North East,10510,143923,7.302516,North East,1.191342
2,E06000003,Redcar and Cleveland,6,Not answered,5533,4.925139,4.925139,North East,1460,136533,1.069339,North East,0.817963
3,E06000004,Stockton-on-Tees,6,Not answered,7381,4.671223,4.671223,North East,5674,196603,2.886019,North East,1.029102
4,E06000005,Darlington,6,Not answered,3951,4.484371,4.484371,North East,4403,107800,4.084416,North East,1.000758


In [3]:
# df with urban-rural classification
df2 = pd.read_csv('/Users/loucap/Documents/GitWork/InteractiveGender/Data/urban_rural_GI.csv')

# Let's take a quick glance
# IMPORTANT: we only have urb_rural classification for ENGLISH LA's
df2.head()

Unnamed: 0,LA_code,LA_name,GI_code,GI_categories,Observation_x,Percentages,Non-response_rate,region_x,Observation_y,Observation_total,Percentage,region_y,Urb_Rur
0,E06000001,Hartlepool,6,Not answered,3777,5.057173,5.057173,North East,1875,92337,2.030605,North East,Predominantly Urban
1,E06000002,Middlesbrough,6,Not answered,6991,6.135793,6.135793,North East,10510,143923,7.302516,North East,Predominantly Urban
2,E06000003,Redcar and Cleveland,6,Not answered,5533,4.925139,4.925139,North East,1460,136533,1.069339,North East,Urban with Significant Rural
3,E06000004,Stockton-on-Tees,6,Not answered,7381,4.671223,4.671223,North East,5674,196603,2.886019,North East,Predominantly Urban
4,E06000005,Darlington,6,Not answered,3951,4.484371,4.484371,North East,4403,107800,4.084416,North East,Predominantly Urban


# Interactive scatterplots

## Shows the relationship between the % of Non-English speakers and % of Non-response for our 331 Local Authorities in England and Wales.

### COLOURED BY REGION

In [9]:
df['Urb_Rur'] = df2['Urb_Rur']

In [4]:
LABELS = ["Labels"]
checkbox_group = CheckboxGroup(labels=LABELS, active=[0, 1])
# urban = df.Urb_Rur.unique()


source=ColumnDataSource(df)

tool = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("name","@LA_name"),
]

p1 = figure(title="Relationship between Non-response Rate and Non-English Speakers", x_axis_label="Percentage of Non-English Speakers", y_axis_label= "Non-response Rate", tooltips = tool)
output_notebook()

for region, color in zip(df.region_x.unique(), Category10[10]):
    b = df[df.region_x == region]
    p1.circle(x = 'Percentage', y = 'Non-response_rate', size = 10, alpha = 0.5, color = color, legend_label = region, muted_color = color, muted_alpha = 0.1, source = b)

labels = LabelSet(x='Percentage', y='Non-response_rate', text='LA_name',x_offset=5, y_offset=5, text_font_size = "5pt",source=ColumnDataSource(df))
tool = [
    ("Name","$LA_name"),
]

# p1.add_layout(labels)
p1.legend.location = "bottom_right"
p1.legend.click_policy="hide"
p1.legend.title = "Regions"

def callback():
    p1.add_layout(labels)

    
# add a button widget and configure with the call back
button = Button(label="Labels")
button.on_event('button_click', callback)
show(p1)

curdoc().add_root(column(button, p1))

### COLOURED BY URBAN-RURAL CLASSIFICATION

In [12]:
from bokeh.transform import factor_cmap, factor_mark
from bokeh.plotting import figure, show
from bokeh.palettes import Category10

tool = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("name","@LA_name"),
]
# p1.legend.location = "bottom_right"
# p1.legend.click_policy="hide"
# p1.legend.title = "Regions"

urban = df2.Urb_Rur.unique()


p2 = figure(title="Relationship between Non-response Rate and Non-English Speakers", x_axis_label="Non-response Rate", y_axis_label="Percentage of Non-English Speakers", tooltips = tool)
output_notebook()

urban_rural_sources = {}  # Create a dictionary to store the ColumnDataSource objects
for urb_rur in df.Urb_Rur.unique():
    urban_rural_sources[urb_rur] = ColumnDataSource(df[df.Urb_Rur == urb_rur])

for urb_rur, color in zip(df.Urb_Rur.unique(), Category10[10]):
    p2.circle(x='Percentage', y='Non-response_rate', size=10, alpha=0.5, color=color,
              legend_label=urb_rur, muted_color=color, muted_alpha=0.1, source=urban_rural_sources[urb_rur])
# p2.scatter("Percentage", "Non-response_rate", source = df2, fill_alpha = 0.5, size = 10, color = factor_cmap('Urb_Rur', Category10[10], urban), legend_field = 'Urb_Rur')

p2.legend.location = "bottom_right"
p2.legend.click_policy = "hide"
p2.legend.title = "Urban-Rural"

show(p2)

### COLOURED BY SHANNON INDEX

Here, I have calculated the religious diversity index for each LA using the Shannon index.

In [6]:
from bokeh.models import ColorBar, BasicTicker, PrintfTickFormatter
from bokeh.models import LogColorMapper

color_map = LogColorMapper(palette="Viridis256", low=df.Shannon_idx.min(), high=df.Shannon_idx.max())

tool = [
    ("index", "$index"),
    ("(x,y)", "($x, $y)"),
    ("name","@LA_name"),
    ("Shannon_idx", "@Shannon_idx")
]


p2 = figure(title="Relationship between Non-response Rate and Non-English Speakers", x_axis_label="Non-response Rate", y_axis_label="Percentage of Non-English Speakers", tooltips = tool)
output_notebook()

p2.scatter("Percentage", "Non-response_rate", source = df, fill_alpha = 0.5, size = 10,  color={'field': 'Shannon_idx', 'transform': color_map})

color_bar = ColorBar(color_mapper=color_map,
                     title='Shannon Index',
                     ticker=BasicTicker(desired_num_ticks=5),
                     formatter=PrintfTickFormatter(format='%.2f'))

# Add the color bar to the plot
p2.add_layout(color_bar, 'right')


# p2.add_layout(color_bar, "right")
show(p2)

In [13]:
# Read-in pre-processed data for religion

rel = pd.read_csv('/Users/loucap/Documents/GitWork/InteractiveGender/Data/cleaned_religion_GI.csv')

In [14]:
rel.columns

Index(['Unnamed: 0', 'LA_code', 'LA_name', 'GI_code', 'GI_categories',
       'Religion_code', 'Religion_categories', 'Observation',
       'Percentages_Atheist', 'Total_counts', 'Total_Atheist',
       'Religion_code_Christian', 'Religion_categories_Christian',
       'Observation_Christian', 'Percentages_Christian',
       'Total_counts_Christian', 'Total_Christian', 'Religion_code_Muslim',
       'Religion_categories_Muslim', 'Observation_Muslim',
       'Percentages_Muslim', 'Total_counts_Muslim', 'Total_Muslim',
       'Religion_code_Other', 'Religion_categories_Other', 'Observation_Other',
       'Percentages_Other', 'Total_counts_Other', 'Total_Other',
       'Religion_code_Buddhist', 'Religion_categories_Buddhist',
       'Observation_Buddhist', 'Percentages_Buddhist', 'Total_counts_Buddhist',
       'Total_Buddhist', 'Religion_code_Jewish', 'Religion_categories_Jewish',
       'Observation_Jewish', 'Percentages_Jewish', 'Total_counts_Jewish',
       'Total_Jewish', 'Religion_c

In [16]:
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Select, HoverTool
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

# Prepare data
rel['selected_religion'] = rel['Total_Christian']  # Default religion
rel['selected_percentages'] = rel['Percentages_Christian']

source = ColumnDataSource(rel)

# Define tooltips
tool = [
    ("index", "$index"),
    ("(x,y)", "(@selected_religion{0.2f}, @selected_percentages{0.2f})"),
    ("name", "@LA_name"),
]

# Create figure
p_2 = figure(title="Relationship between % of religious group in given LA, and their non-response rate",
            y_axis_label="Non-response Rate", x_axis_label="Percentage of religious group in given LA", tooltips=tool)
output_notebook()

# Scatter plot
p_2.scatter("selected_religion", "selected_percentages", source=source, fill_alpha=0.5, size=10)

hover_tool = HoverTool(tooltips=tool, mode='mouse')  # Change mode to 'mouse'
p_2.add_tools(hover_tool)

# Define callback for updating data source
def update_plot(attr, old, new):
    selected_religion = select_religion.value
    rel['selected_religion'] = rel[f'Total_{selected_religion}']
    rel['selected_percentages'] = rel[f'Percentages_{selected_religion}']
    source.data = source.from_df(rel)

# Create select widget
options = ['Christian', 'Muslim', 'Jewish', 'Buddhist', 'Hindu', 'Sikh', 'Other']  # Update with all available religious groups
select_religion = Select(title="Religious Group:", value='Christian', options=options)
select_religion.on_change('value', update_plot)

# Layout
layout = column(select_religion, p_2)

# Show plot
show(layout)

curdoc().add_root(layout)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html

