# Altair Choropleths play

Here I load in James' presidential polls dataset from the teaching notebook in this dir and attempt some rudimentary WAYS ideas.

In [None]:
%%capture
# capture suppresses output of the below:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install -r requirements.txt

In [None]:
import geopandas as gpd 
import pandas as pd
import altair as alt

geo_states = gpd.read_file('choropleth_teaching/gz_2010_us_040_00_500k.json')
df_polls = pd.read_csv('choropleth_teaching/presidential_poll_averages_2020.csv')

In [None]:
df_polls.head()

On how many dates was the polling done?

In [None]:
len(df_polls.modeldate.unique())

Filter our poll data to remove third party candidates.

In [None]:
df_polls = df_polls[
    (df_polls.candidate_name == 'Donald Trump') |
    (df_polls.candidate_name == 'Joseph R. Biden Jr.')
]

The geo_states variable has polygons for each state.

In [None]:
geo_states.head()

In [None]:
trump_data = df_polls[
    df_polls.candidate_name == 'Donald Trump'
]

biden_data = df_polls[
    df_polls.candidate_name == 'Joseph R. Biden Jr.'
]

Our spatial and poll data have the name of the state in common. We will change the name of the state to NAME to match our geospatial dataframe.

In [None]:
trump_data.columns = ['cycle', 'NAME', 'modeldate', 'candidate_name', 'pct_estimate', 'pct_trend_adjusted']
biden_data.columns = ['cycle', 'NAME', 'modeldate', 'candidate_name', 'pct_estimate', 'pct_trend_adjusted']

We can join the geospatial and poll data using the NAME column (the name of the state).

In [None]:
# Create separate date frame for trump and biden
# Add the poll data
geo_states_trump = geo_states.merge(trump_data, on='NAME')
geo_states_biden = geo_states.merge(biden_data, on='NAME')

In [None]:
(geo_states_trump.modeldate == '11/03/2020').value_counts()

In [None]:
geo_states_biden.head()

### Create choropleth plot function

In [None]:
import sys; sys.path.insert(0, '..')
from ways_py.ways import meta_hist

@meta_hist
def usa_choro(candidate_geo_states, color, title):
    """Plot a choropleth of the US states with the candidate vote percentage mapped to the color;
    pass in altair color object and a title"""
    chart = alt.Chart(candidate_geo_states, title=title).mark_geoshape()
    chart = chart.encode(
        color,
        tooltip=['NAME', 'pct_estimate']
    ).properties(
        width=500,
        height=300
    ).project(
        type='albersUsa'
    )
    return chart

## Adding interactivity with Jupyter interact widgets

Here I attempt to offer the user working on the the US presidential poll choropleth visualisation some options related to the colour-binning.

In [None]:
from ipywidgets import interact, interact_manual, widgets, HBox, VBox, GridspecLayout

In [None]:
# Get an ordered list of the dates (as strings) on which polling occured
import datetime
unsorted_datestrings = list(set(list(geo_states_biden['modeldate'])))
dates = sorted(unsorted_datestrings, key=lambda x: datetime.datetime.strptime(x, '%m/%d/%Y'))

Get a list of possible altair scales from [here](https://altair-viz.github.io/user_guide/generated/core/altair.ScaleType.html#altair.ScaleType) and create a list:

In [None]:
scales = ['linear', 'log', 'pow', 'sqrt', 'symlog', 'identity', 'sequential', 'time', 'utc', 'quantile', 'quantize', 'threshold', 'bin-ordinal', 'ordinal', 'point', 'band']

Get a list of the possible color schemes from [here](https://vega.github.io/vega/docs/schemes/#reference)

In [None]:
schemes = ['blues', 'tealblues', 'teals', 'greens', 'browns', 'oranges', 'reds', 'purples', 'warmgreys', 'greys',
           'viridis', 'magma', 'inferno', 'plasma', 'cividis', 'turbo', 'bluegreen', 'bluepurple', 'goldgreen',
           'goldorange', 'goldred', 'greenblue', 'orangered', 'purplebluegreen', 'purpleblue', 'purplered',
           'redpurple', 'yellowgreenblue', 'yellowgreen', 'yelloworangebrown', 'yelloworangered', 'darkblue',
           'darkgold','darkgreen','darkmulti','darkred', 'lightgreyred', 'lightgreyteal', 'lightmulti', 'lightorange',
           'lighttealblue', 'blueorange', 'brownbluegreen', 'purplegreen', 'pinkyellowgreen', 'purpleorange',
           'redblue', 'redgrey', 'redyellowblue', 'redyellowgreen', 'spectral', 'rainbow', 'sinebow']

In [None]:
# color_picker = widgets.ColorPicker(
#     concise=True,
#     description='Background color:',
#     value='#efefef',
# )
# color_picker

### Create and arrange widgets

In [None]:
# List containing all widgets to be added to GridBox
grid_matrix = []

# Simple dropdown to switch between Biden and Trump's data
candidate = widgets.Dropdown(value='Biden', options=['Trump', 'Biden'], description = 'Candidate')
grid_matrix.append(candidate)

# Choose the polling date to visualise
date = widgets.SelectionSlider(value='10/03/2020', options=dates, description='Date', continuous_update=False)
grid_matrix.append(date)

# Checkbox that determines whether we use color binning - if false, color scheme is continuous
bin = widgets.Checkbox(value=True, description='Bin')

# Select the maximum number of bins
maxbins = widgets.IntSlider(value=100, min=2, max=100, step=1, description='Max Bins', continuous_update=False)

# Double-slider: Choose the extent of the polling percentage data to plot
extent = widgets.IntRangeSlider(value=[0,100], min=0, max=100, description='Extent', continuous_update=False)

# Grey out the widgets that work with the Bin object when bin not selected
def bin_options(change):
    if change.new:
        maxbins.disabled = False
        extent.disabled = False
    else:
        maxbins.disabled = True
        extent.disabled = True
bin.observe(bin_options, names='value')

grid_matrix.append(bin)
grid_matrix.append(maxbins)
grid_matrix.append(extent)

# Select which kind of scaling to use
scale = widgets.Dropdown(value='linear', options=scales, description = 'Scales')
grid_matrix.append(scale)
        
# Three widgets here that expose a variety of options for setting the color scheme:
# colorscheme and colorrange are greyed out when not selected by colorschemetype
colorscheme = widgets.Dropdown(options=schemes, description = 'Scheme')
colorrange = widgets.Dropdown(options=['redpurpleblue', 'redorangeyellow'], description = 'Range', disabled=True)
colorschemetype = widgets.RadioButtons(value= 'Scheme', options=['Scheme', 'Range'], description='Color Method')
def choose_coloring_method(change):
    if change.new == 'Scheme':
        colorscheme.disabled = False
        colorrange.disabled = True
    elif change.new == 'Range':
        colorscheme.disabled = True
        colorrange.disabled = False
colorschemetype.observe(choose_coloring_method, names='value')
grid_matrix.append(colorschemetype)
grid_matrix.append(colorscheme)
grid_matrix.append(colorrange)

# Plot does not automatically get generated, only the widgets are visible until this (or anything else) clicked
generate_button = widgets.Button(description="Generate Plot")
def on_button_clicked(b):
    date.value='11/03/2020' # This will get the plot to generate by changing one of the widgets
generate_button.on_click(on_button_clicked)
grid_matrix.append(generate_button)

# Create the GridBox to arrange the widgets into rows of three
grid = widgets.GridBox(grid_matrix, layout=widgets.Layout(grid_template_columns="repeat(3, 300px)"))

# Define interactive plot containing choropleth and metavisualisation histogram
def interact_usa_choro(candidate,
                       date,
                       bin,
                       maxbins,
                       scale,
                       extent,
                       colorschemetype,
                       colorscheme,
                       colorrange
                      ):

    # Chooses the candidate dataset
    candidate_geo_states = geo_states_biden
    if candidate == 'Trump':
        candidate_geo_states = geo_states_trump

    # Choose which polling date to display
    candidate_geo_states = candidate_geo_states[
        (candidate_geo_states.modeldate == date)
    ]

    # Give the choropleth plot a title
    title = 'Poll estimate for ' + candidate + ' on ' + date

    # Creates the bin, scale and color objects, to be passed to both plots
    if bin: # if bin is False, leave as bool
        bin = alt.Bin(maxbins=maxbins, extent=extent)
    if colorschemetype == 'Scheme':
        scale = alt.Scale(type=scale, scheme=colorscheme)
    elif colorschemetype == 'Range':
        if colorrange == 'redpurpleblue':
            scale = alt.Scale(type=scale, range=['red', 'purple', 'blue'])
        elif colorrange == 'redorangeyellow':
            scale = alt.Scale(type=scale, range=['red', 'orange', 'yellow'])
    column = "pct_estimate"
    color = alt.Color(column,
                      legend=None,
                      bin=bin,
                      scale=scale
                     )
    
    # Display the histogram and choropleth in horizontal panels
    display(usa_choro(candidate_geo_states, color, title))

# Create interactive ouput from widget grid and plot
out = widgets.interactive_output(
    interact_usa_choro,
    {
        'candidate': candidate,
        'date': date,
        'bin': bin,
        'maxbins': maxbins,
        'scale': scale,
        'extent': extent,
        'colorschemetype': colorschemetype,
        'colorscheme': colorscheme,
        'colorrange': colorrange
    }
)

# Display in this cell
display(grid, out)

Notes:
1. Setting the scale to `log` doesn't work unless `bin` is **unchecked**
2. Because the `extent` data range plugs into the `alt.Bin` object, this will only work if `bin` is **checked**
3. There is no way to pre-populate dropdowns with all available options for a given function argument