In [31]:
# TODO: Fix pandas warning:
    # /usr/local/lib64/python3.8/site-packages/pandas/core/frame.py:1549:
    # FutureWarning: Using short name for 'orient' is deprecated.
    # Only the options: ('dict', list, 'series', 'split', 'records', 'index') will be used in a future version. 
    # Use one of the above to silence this warning.
    # warnings.warn(
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd


import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import HTML

# General
import os

# Drawing
import cartopy
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from cartopy.io import shapereader
from matplotlib.cm import get_cmap
import matplotlib.cm as cm
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable

plt.ioff()

In [32]:
DATA_URL = 'https://www.scotland.police.uk/spa-media/s20pfb1o/coronavirus-enforcement-information-to-9-june-2021.xlsx'

EXPLANATION = """\
<div class="app-sidebar">
<p><em>Compare different development indicators.</em><p>

<p>Select what indicators to plot in the dropdowns, and use the slider
to sub-select a fraction of years to include in the plot.</p>

<p>Data and idea copied from the <a href="https://dash.plot.ly/getting-started-part-2">
Plotly Dash documentation</a>.</p>

<p>This example demonstrates combining matplotlib with Jupyter widgets. For more interactive plots,
consider using <a href="https://github.com/bloomberg/bqplot">bqplot</a>.
</div>
"""

In [33]:
HTML("""\
<style>
.app-subtitle {
    font-size: 1.5em;
}

.app-subtitle a {
    color: #106ba3;
}

.app-subtitle a:hover {
    text-decoration: underline;
}

.app-sidebar p {
    margin-bottom: 1em;
    line-height: 1.7;
}

.app-sidebar a {
    color: #106ba3;
}

.app-sidebar a:hover {
    text-decoration: underline;
}
</style>
""")

In [34]:
class App:
    def __init__(self, df):
        self._df = df

        # Get dropdown options, cut out the first one - as this is just Divisions
        available_indicators = list(self._df)
        del available_indicators[0]

        self._dropdown = self._create_indicator_dropdown(available_indicators, 0)
        self._plot_container = widgets.Output()
        _app_container = widgets.VBox([
            self._dropdown,
            self._plot_container
        ], layout=widgets.Layout(align_items='center', flex='3 0 auto'))
        self.container = widgets.VBox([
            widgets.HTML(
                (
                    '<h1>Development indicators</h1>'
                    '<h2 class="app-subtitle"><a href="https://github.com/pbugnion/voila-gallery/blob/master/country-indicators/index.ipynb">Link to code</a></h2>'
                ), 
                layout=widgets.Layout(margin='0 0 5em 0')
            ),
            widgets.HBox([
                _app_container, 
                widgets.HTML(EXPLANATION, layout=widgets.Layout(margin='0 0 0 2em'))
            ])
        ], layout=widgets.Layout(flex='1 1 auto', margin='0 auto 0 auto', max_width='1024px'))
        self._update_app()
        
    # The class method, we use this to gather the data then pre-process it
    @classmethod
    def from_url(cls, url):
        raw_data = pd.read_excel(url, sheet_name=1)
        raw_data.drop(['Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17'], axis=1, inplace=True)

        # Taking account of NaNs
        # Explanation:
        # The xlsx to pandas dataframe conversion seems to have taken "NA" for a division "N" and an Area Command "Inverness"
        # and interpret that "NA" as actually: "NaN". Which is very annoying. So the below overwrites the SD letter of area commands
        # that are inverness and turns them back to "NA"
        raw_data.loc[raw_data["Area Commands"] == "Inverness", "SD Letter"] = raw_data["SD Letter"].fillna("NA")

        if (raw_data.isnull().sum().sum() != 0):
            raise ValueError("We have NaNs in our dataframe")

        division_grouped = raw_data.groupby('Division Letter', as_index=False
                                            ).agg(
                                            {"Asked / Informed": "sum",
                                            "Warned / Instructed": "sum",
                                            "Removed from Place or Premises": "sum",
                                            "FPN": "sum",
                                            "Arrested": "sum",
                                            })

        # Process Population Data

        # Data from: https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/population/population-estimates/mid-year-population-estimates/mid-2019

        raw_pop_data = pd.read_csv(os.path.join(os.getcwd(), 'datasets', 'Population', 'mid-year-pop-est-19-data_Table 2.csv'))
        # Keep only the specific columns
        raw_pop_data = raw_pop_data[['Unnamed: 1','Unnamed: 2']]
        # Rename them inplace
        raw_pop_data.rename(columns={'Unnamed: 1': 'Council areas', 'Unnamed: 2': 'Population'}, inplace=True)
        # Drop upper rows that are bad
        raw_pop_data = raw_pop_data.drop(raw_pop_data.index[[0,1,2,3,4]]).reset_index(drop=True)
        # Drop from certain row, minus 1 for the row above position
        raw_pop_data = raw_pop_data[:(raw_pop_data[raw_pop_data['Council areas'] == 'NHS Board areas'].index[0] - 1)]
        # Strip out all the commas in Objects of the Population column
        raw_pop_data["Population"].replace(',','', regex=True, inplace=True)
        # Convert string to int
        raw_pop_data["Population"] = raw_pop_data["Population"].astype(str).astype(int)

        # Group Pop Data

        # We group the council areas into our police divisions
        # First, set our index
        raw_pop_data.set_index('Council areas')
        # Create our division dictionary
        div_dict = {'A': ["Moray", "Aberdeenshire", "Aberdeen City"],
                    'C': ["Stirling", "Clackmannanshire", "Falkirk"],
                    'D': ["Angus", "Dundee City", "Perth and Kinross"],
                    'E': ["City of Edinburgh"],
                    'G': ["East Renfrewshire", "Glasgow City", "East Dunbartonshire"],
                    'J': ["Scottish Borders", "East Lothian", "Midlothian", "West Lothian"],
                    'K': ["Inverclyde", "Renfrewshire"],
                    'L': ["Argyll and Bute", "West Dunbartonshire"],
                    'N': ["Na h-Eileanan Siar", "Orkney Islands", "Highland", "Shetland Islands"],
                    'P': ["Fife"],
                    'Q': ["South Lanarkshire", "North Lanarkshire"],
                    'U': ["South Ayrshire", "East Ayrshire", "North Ayrshire"],
                    'V': ["Dumfries and Galloway"]
                    }

        div_pop = {}

        def divisionPopulation(row):
            incomingRow = row.tolist()

            for div, councils in div_dict.items():
                for council in councils:
                    if (council == incomingRow[0]):
                        if div in div_pop:
                            div_pop[div] += incomingRow[1]
                        else:
                            div_pop[div] = incomingRow[1]

        raw_pop_data.apply(lambda row: divisionPopulation(row), axis=1)

        div_pop_data = pd.DataFrame(div_pop.items(), columns=['Division Letter', 'Population'])

        # Merge Data

        df = pd.merge(division_grouped, div_pop_data, on='Division Letter')

        df['Asked / Informed per 100k'] = df.apply (lambda row: row['Asked / Informed']/(row['Population'] / 100000) if row['Population'] > 0 else 0, axis=1)
        df['Warned / Instructed per 100k'] = df.apply (lambda row: row['Warned / Instructed']/(row['Population'] / 100000) if row['Population'] > 0 else 0, axis=1)
        df['Removed from Place or Premises per 100k'] = df.apply (lambda row: row['Removed from Place or Premises']/(row['Population'] / 100000) if row['Population'] > 0 else 0, axis=1)
        df['FPN per 100k'] = df.apply (lambda row: row['FPN']/(row['Population'] / 100000) if row['Population'] > 0 else 0, axis=1)
        df['Arrested per 100k'] = df.apply (lambda row: row['Arrested']/(row['Population'] / 100000) if row['Population'] > 0 else 0, axis=1)
        
        return cls(df)
        
    def _create_indicator_dropdown(self, indicators, initial_index):
        dropdown = widgets.Dropdown(options=indicators, value=indicators[initial_index])
        dropdown.observe(self._on_change, names=['value'])
        return dropdown
    
    def _create_plot(self, indicator):
        fig = plt.figure(figsize=(6,8), dpi=100)
        projectionPARAM = ccrs.TransverseMercator(central_longitude=-2.0, central_latitude=49.0, false_easting=400000.0, false_northing=-100000.0, scale_factor=0.9996012717, approx=False)
        ax = fig.add_subplot(1, 1, 1, projection=projectionPARAM)
        ax.set_extent([-8, 0, 54.5, 61]) # Ideal coordinate map range for plotting Scotland

        police_dict = (self._df[['Division Letter', indicator]].set_index('Division Letter').T.to_dict('r'))[0]

        # Downloaded from: https://spatialdata.gov.scot/geonetwork/srv/eng/catalog.search;jsessionid=61F713CF39B3EE2F440F48E9C31BA806#/metadata/4364af71-167a-4236-b5a0-bd4109913231
        area_file = os.path.join(os.getcwd(), 'datasets', 'ScottishPoliceDivisions', 'SG_ScottishPoliceDivisions_2019.shp')
        police_divisions = shapereader.Reader(area_file)

        norm = colors.Normalize(vmin=0., vmax=max(police_dict.values()))
        cmap = get_cmap('PuBu')

        for record in police_divisions.records():
            code = record.attributes['AdminCode']
            police_entry = police_dict.get(code, -1)
            if police_entry == -1:
                police_color = "Silver"
            else:
                police_color = cmap(police_entry/max(police_dict.values()))
            ax.add_geometries(
                    [record.geometry],
                    #facecolor=numpy.random.rand(3,),
                    facecolor=police_color,
                    linewidth=0,
                    crs=projectionPARAM,
            )

        # following https://matplotlib.org/2.0.2/mpl_toolkits/axes_grid/users/overview.html#colorbar-whose-height-or-width-in-sync-with-the-master-axes
        # we need to set axes_class=plt.Axes, else it attempts to create
        # a GeoAxes as colorbar

        divider = make_axes_locatable(ax)
        ax_cb = divider.new_horizontal(size="5%", pad=0.1, axes_class=plt.Axes)

        fig.add_axes(ax_cb)

        sm = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
        cb = plt.colorbar(sm, cax=ax_cb)
        cb.set_label(indicator)

        plt.plot()
        
    def _on_change(self, _):
        self._update_app()
        
    def _update_app(self):
        indicator = self._dropdown.value
        self._plot_container.clear_output(wait=True)
        with self._plot_container:
            self._create_plot(indicator)
            plt.show()

In [35]:
app = App.from_url(DATA_URL)

app.container

VBox(children=(HTML(value='<h1>Development indicators</h1><h2 class="app-subtitle"><a href="https://github.com…