# Step by step guide to choropleth maps

This is a guide to making Choropleth maps of increasing complexity to visualise geographical data.

In [None]:
#from IPython.core.interactiveshell import InteractiveShell
#InteractiveShell.ast_node_interactivity = "all"
#import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import geopandas as gpd
import json
import plotly.express as px

The aim is to visualise children's obesity rates in London MSOAs (statistical areas). To view the full data analysis, please refer to the main notebook.

Since MSOAs are the lowest level of granularity available in our data, we will use MSOA geographical files to create a Choropleth map. The shape files for London are available from https://data.london.gov.uk/dataset/statistical-gis-boundary-files-london.

## 1. Static geovisualisation using geopandas

In [None]:
# First, we need to read our data from a csv into a dataframe
reception = pd.read_csv("reception_cleaned.csv")

In [None]:
# convert the shape files into a geopandas df
london_msoa_geodata = gpd.read_file("MSOA_2011_London_gen_MHW") # folder containing the set of shapes files

# Convert to the EPSG:4326 coordinate system (to match the coordinate system used by plotly later)
#london_msoa_geodata['geometry'] = london_msoa_geodata['geometry'].to_crs(epsg=4326)
london_msoa_geodata.head()

In [None]:
# This shows the basic map plotted with the geojson without any data attached
london_msoa_geodata.plot()

In [None]:
# Merge the geodataframe and obesity rates dataframe together
merged = london_msoa_geodata.set_index('MSOA11NM').join(reception.set_index('MSOA'))
merged.head()

In [None]:
reception.columns

In [None]:
# set a variable that will call whatever column we want to visualise on the map
variable = "15_16to17_18"
# set the range for the choropleth
vmin, vmax = reception['15_16to17_18'].min(), reception['15_16to17_18'].max()
# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(10, 6))
# remove axis
ax.axis("off")
# add a title
ax.set_title("Obesity rate in Reception age children", fontdict={'fontsize': '18', 'fontweight' : '3'})
# create an annotation for the data source
ax.annotate(s="Source: GSST",xy=(0.1, .08),  xycoords='figure fraction', horizontalalignment='left', verticalalignment='top',\
            fontsize=12, color='#555555')
# Create colorbar as a legend
sm = plt.cm.ScalarMappable(cmap='Blues', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# empty array for the data range
sm._A = []
# add the colorbar to the figure
cbar = fig.colorbar(sm)
# create map
merged.plot(column=variable, cmap='Blues', linewidth=0.8, ax=ax, edgecolor='0.8')


## 2. Interactive visualisation with Plotly

In a Mapbox choropleth map, each row of data_frame is represented by a colored region on a Mapbox map

In [None]:
# Plotly requires a base map and we will use the Mapbox API for that
mapbox_api_key = "pk.eyJ1IjoiYWxpY2Vtb3lvbiIsImEiOiJja2Juc240bG8weTFyMnhwb3VrbzU1OWk4In0.nx7urMH1n84mSzleqAcy6A"
px.set_mapbox_access_token(mapbox_api_key)

In [None]:
# convert shape files into geojson dataframe
london_msoa_df = gpd.read_file("MSOA_2011_London_gen_MHW") # folder containing the set of shapes files

In [None]:
# Convert coordinates system to match the coordinate system used by Plotly
london_msoa_df = london_msoa_df.to_crs(epsg=4326)

In [None]:
type(london_msoa_df)

In [None]:
# turn geo data into json format as input to the choropleth
london_msoa_json = json.loads(london_msoa_df.to_json())

In [None]:
print(reception['MSOA'])
print(london_msoa_json["features"][0]["properties"]['MSOA11NM'])

One issue here is that our dataset contains data for all MSOAs in the UK, but our map is specific to London MSOAs. While this was no an issue when using geopandas & matplotlib, it will cause errors with Plotly. So we need to trim our dataset before proceeding further.

In [None]:
print(len(london_msoa_json['features']))

In [None]:
msoas_included = []

for i in range (0, len(london_msoa_json['features'])):
    msoas_included.append(london_msoa_json["features"][i]["properties"]["MSOA11NM"])
    
reception_london = reception[reception['MSOA'].isin(msoas_included)]

In [None]:
reception_london.shape

In [None]:
reception_london.tail()

In [None]:
# Generate a simple choropleth displaying data on hover of each region
fig = px.choropleth_mapbox(reception_london,
                           geojson=london_msoa_json,
                           color='15_16to17_18',
                           locations='MSOA',
                           featureidkey="properties.MSOA11NM",
                           hover_data=["MSOA","15_16to17_18"],
                           center={"lat": 51.509865, "lon": -0.118092}, 
                           mapbox_style="basic", zoom=9)

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

#### Making the color scale more meaningful & removing the white roads

There are a few simple improvements we can make to the map:
- A colorscale going from green to red would be more meaningful and would immediately highlight problem areas.
- The white roads should be removed as they are distracting and do not match the MSOAs. This can be done by changing to a different style for the mapbox basemap.
- A meaningful title for the color bar

In [None]:
# We'll use the national average as a tipping point between green and red for the colour scale
reception_natnl_avg = reception['15_16to17_18'].mean()

fig = px.choropleth_mapbox(reception_london,
                           geojson=london_msoa_json,
                           color='15_16to17_18', 
                           locations='MSOA',
                           featureidkey="properties.MSOA11NM",
                           # adds diverging color scale
                           color_continuous_scale=px.colors.diverging.RdYlGn_r,
                           # add a tipping  point for colour divergence
                           color_continuous_midpoint=reception_natnl_avg,
                           hover_data=["MSOA","15_16to17_18"],
                           title="MSOA distribution of obesity rates in London",
                           center={"lat": 51.509865, "lon": -0.118092},
                           # similar map style to before, but without visible roads
                           mapbox_style="outdoors",
                           zoom=9)

# Change title of color bar and add % suffix to values
fig.update_layout(coloraxis_colorbar=dict(
    title="obesity rate",
    yanchor="top", y=1, ticksuffix="%",
))

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

#### Showing two datasets on the same map with dropdown selection

Since we have two datasets representing the geographical data for two different age groups, it would be valuable to display both sets of data on the same map, using a dropdown for user selection.

In order to do this, we need to go further than Plotly Express and use Plotly's graph_objects for more customisation.

In [None]:
import plotly.graph_objects as go

In [None]:
year6 = pd.read_csv("year6_cleaned.csv")

# Create the year6 dataset for London only
y6_london = year6[year6['MSOA'].isin(msoas_included)]
y6_london.shape

In [None]:
colorbar = dict(title={"text": "obesity rate"}, yanchor="top", y=1, ticks="outside", ticksuffix="%")

# This is a list of dictionaries that each represents a separate Choroplethmapbox object to be layered on the map
# The available properties for Choroplethmapbox objects can be found here https://plotly.com/python/reference/#choroplethmapbox
data = [
    dict(type='choroplethmapbox',
         name='Reception',
         geojson=london_msoa_json,
         locations=reception_london['MSOA'],
         z=reception_london['15_16to17_18'],
         zmin=0, zmax=reception_london['15_16to17_18'].max(),
         featureidkey="properties.MSOA11NM",
         # determines what appears on hover. <extra></extra> prevents the second box from appearing
         hovertemplate="%{location}: %{z}%<extra></extra>",
         colorbar=colorbar,
         autocolorscale=False,
         colorscale="RdYlGn",
         reversescale=True,
         zmid=reception_natnl_avg,
        visible=True),
    dict(type='choroplethmapbox',
         name='Year 6',
         geojson=london_msoa_json,
         locations=y6_london['MSOA'],
         z=y6_london['15_16to17_18'],
         zmin=0, zmax=y6_london['15_16to17_18'].max(),
         featureidkey="properties.MSOA11NM",
         hovertemplate="%{location}: %{z}%<extra></extra>",
         colorbar=colorbar,
         autocolorscale=False,
         colorscale="RdYlGn",
         reversescale=True,
         zmid=y6_london['15_16to17_18'].mean(),
         # set to false so that only the Reception data is shown initially, before selection with the dropdown
        visible=False)
]

In [None]:
# Creates the layout object. Full list of properties available can be found here:
# https://plotly.com/python-api-reference/generated/plotly.graph_objects.Layout.html#plotly.graph_objects.Layout

layout = go.Layout(
    title_text = 'Children obesity rates in London MSOAs',
    font = dict(family='Roboto'),
    # top, bottom, left and right margins
    margin = dict(t = 0, b = 0, l = 0, r = 0),
    mapbox = dict(
        # here you need the token from Mapbox
        accesstoken = mapbox_api_key,
        # where we want the map to be centered
        center={"lat": 51.509865, "lon": -0.118092},
        # default level of zoom
        zoom = 9,
        # default map style
        style = 'outdoors'
    ),
)

# each dropdown menu is defined as a dictionary within the list. All available properties for updatemenus objects
# can be found here https://plotly.com/python/reference/#layout-updatemenus

layout.updatemenus = list([
    dict(buttons=list([
        dict(label='Reception', method='update', args=[{'visible': [True, False]}]),
        dict(label='Year 6', method='update', args=[{'visible': [False, True]}])
    ]),
         # placement of dropdown menu on the figure
         direction='down', x = -0.05, xanchor = 'left', y = 1, yanchor = 'top',
         showactive=True
    )
])


In [None]:
interactive_fig = go.Figure(data=data, layout=layout)
interactive_fig.layout.title.text = "Children obesity rates in London MSOAs"
interactive_fig.show()

NB: The range you decide for zmin and zmax can influence how your data is perceived. e.g. if we used the minimum rate observed as zmin, instead of 0, there would be more MSOAs with a green shade and the problem would appear less widespread. While it initially looks like a detail, it can have a big impact.

#### Add sliders to show evolution over time periods

The next step would be to add sliders to visualise the evolution of the data over time. Unfirtunately, combining a dropdown with a slider is not possible with Plotly within the notebook (at least without using widgets). 

I will look into implementing this with Javascript for a blog post but in the meantime, we can still remove the dropdown and create a choropleth with a slider for one of the age groups only.

In [None]:
reception.columns.values

In [None]:
# First, we need a Choropleth object (trace) for each slider step

data = []

time_periods = reception.columns.values[3:11]

# Reception traces
for time_period in time_periods:
    data.append(go.Choroplethmapbox(
        name=time_period,
        geojson=london_msoa_json,
        locations=reception_london['MSOA'],
        z=reception_london[time_period],
        zmin=0, zmax=reception_london[time_period].max(),
        featureidkey="properties.MSOA11NM",
        # determines what appears on hover. <extra></extra> prevents the second box from appearing
        hovertemplate="%{location}: %{z}%<extra></extra>",
        colorbar=colorbar,
        autocolorscale=False,
        colorscale="RdYlGn",
        reversescale=True,
        zmid=reception_natnl_avg,
        visible=False))
    
# We need to have at least one visible trace as default
data[0].visible = True

In [None]:
# We also need a fresh layout without a dropdown
layout = go.Layout(
    title_text = 'Children obesity rates in London MSOAs',
    font = dict(family='Roboto'),
    # top, bottom, left and right margins
    margin = dict(t = 0, b = 0, l = 0, r = 0),
    mapbox = dict(
        # here you need the token from Mapbox
        accesstoken = mapbox_api_key,
        # where we want the map to be centered
        center={"lat": 51.509865, "lon": -0.118092},
        # default level of zoom
        zoom = 9,
        # default map style
        style = 'outdoors'
    ),
)

# Now we create the steps for the slider
steps = []
for i in range(len(data)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(data)}],
        label=data[i]['name']
    )
    step["args"][0]["visible"][i] = True
    steps.append(step)
    
# And we create the slider itself
layout.sliders = list([
    dict(steps=steps)
])

In [None]:
slider_fig = go.Figure(data=data, layout=layout)
slider_fig.show()