# Altair Choropleths play

Here I load in James' presidential polls dataset from the teaching notebook in this dir and attempt some rudimentary WAYS ideas.

In [1]:
%%capture
# capture suppresses output of the below:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install -r requirements.txt

In [2]:
import geopandas as gpd 
import pandas as pd
import altair as alt

geo_states = gpd.read_file('choropleth_teaching/gz_2010_us_040_00_500k.json')
df_polls = pd.read_csv('choropleth_teaching/presidential_poll_averages_2020.csv')

In [3]:
df_polls.head()

Unnamed: 0,cycle,state,modeldate,candidate_name,pct_estimate,pct_trend_adjusted
0,2020,Wyoming,11/03/2020,Joseph R. Biden Jr.,30.81486,30.82599
1,2020,Wisconsin,11/03/2020,Joseph R. Biden Jr.,52.12642,52.09584
2,2020,West Virginia,11/03/2020,Joseph R. Biden Jr.,33.49125,33.51517
3,2020,Washington,11/03/2020,Joseph R. Biden Jr.,59.34201,59.39408
4,2020,Virginia,11/03/2020,Joseph R. Biden Jr.,53.7412,53.72101


On how many dates was the polling done?

In [4]:
len(df_polls.modeldate.unique())

251

Filter our poll data to remove third party candidates.

In [5]:
df_polls = df_polls[
    (df_polls.candidate_name == 'Donald Trump') |
    (df_polls.candidate_name == 'Joseph R. Biden Jr.')
]

The geo_states variable has polygons for each state.

In [6]:
geo_states.head()

Unnamed: 0,GEO_ID,STATE,NAME,LSAD,CENSUSAREA,geometry
0,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ..."
1,0400000US25,25,Massachusetts,,7800.058,"MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ..."
2,0400000US26,26,Michigan,,56538.901,"MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ..."
3,0400000US30,30,Montana,,145545.801,"POLYGON ((-104.05770 44.99743, -104.25015 44.9..."
4,0400000US32,32,Nevada,,109781.18,"POLYGON ((-114.05060 37.00040, -114.04999 36.9..."


In [7]:
trump_data = df_polls[
    df_polls.candidate_name == 'Donald Trump'
]

biden_data = df_polls[
    df_polls.candidate_name == 'Joseph R. Biden Jr.'
]

Our spatial and poll data have the name of the state in common. We will change the name of the state to NAME to match our geospatial dataframe.

In [8]:
trump_data.columns = ['cycle', 'NAME', 'modeldate', 'candidate_name', 'pct_estimate', 'pct_trend_adjusted']
biden_data.columns = ['cycle', 'NAME', 'modeldate', 'candidate_name', 'pct_estimate', 'pct_trend_adjusted']

We can join the geospatial and poll data using the NAME column (the name of the state).

In [9]:
# Create separate date frame for trump and biden
# Add the poll data
geo_states_trump = geo_states.merge(trump_data, on='NAME')
geo_states_biden = geo_states.merge(biden_data, on='NAME')

In [10]:
(geo_states_trump.modeldate == '11/03/2020').value_counts()

False    9972
True       51
Name: modeldate, dtype: int64

In [11]:
geo_states_biden.head()

Unnamed: 0,GEO_ID,STATE,NAME,LSAD,CENSUSAREA,geometry,cycle,modeldate,candidate_name,pct_estimate,pct_trend_adjusted
0,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...",2020,11/03/2020,Joseph R. Biden Jr.,53.31518,53.32106
1,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...",2020,11/02/2020,Joseph R. Biden Jr.,53.31518,53.32106
2,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...",2020,11/01/2020,Joseph R. Biden Jr.,53.70696,53.91672
3,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...",2020,10/31/2020,Joseph R. Biden Jr.,53.30098,53.55544
4,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...",2020,10/30/2020,Joseph R. Biden Jr.,53.34166,53.64551


### Create choropleth plot function

In [12]:
# Test wrapper, modify me
def mywrapper(func):
    def wrapper(*args, **kwargs):
        func(*args, **kwargs)
        print(' World')
    return wrapper

@mywrapper
def f1():
    print('Hello')

f1()

Hello
 World


In [13]:
# Import WAYS
import sys; sys.path.insert(0, '..')
from ways_py.ways import (meta_hist, altair_bin_jupyter_widgets, altair_scale_jupyter_widget,
                          altair_color_jupyter_widgets, get_altair_color_obj)

In [14]:
# @meta_color should create the widgets and then generate the color objects from them and display the chart with widgets

In [15]:
@meta_hist # decorators modify the output of usa_choro when called to be more than just the return
def usa_choro(candidate_geo_states, color, title):
    """Plot a choropleth of the US states with the candidate vote percentage mapped to the color;
    pass in altair color object and a title"""
    chart = alt.Chart(candidate_geo_states, title=title).mark_geoshape()
    chart = chart.encode(
        color,
        tooltip=['NAME', 'pct_estimate']
    ).properties(
        width=500,
        height=300
    ).project(
        type='albersUsa'
    )
    return chart

## Adding interactivity with Jupyter interact widgets

Here I attempt to offer the user working on the the US presidential poll choropleth visualisation some options related to the colour-binning.

In [16]:
from ipywidgets import interact, interact_manual, widgets, HBox, VBox, GridspecLayout

### Create and arrange widgets

These widgets control the parts of the data to be visualised:

In [18]:
# Simple dropdown to switch between Biden and Trump's data
candidate = widgets.Dropdown(value='Biden', options=['Trump', 'Biden'], description = 'Candidate')

# Get an ordered list of the dates (as strings) on which polling occured
import datetime
unsorted_datestrings = list(set(list(geo_states_biden['modeldate'])))
dates = sorted(unsorted_datestrings, key=lambda x: datetime.datetime.strptime(x, '%m/%d/%Y'))

# Choose the polling date to visualise
date = widgets.SelectionSlider(value='10/03/2020', options=dates, description='Date', continuous_update=False)

### Load and arrange widgets for Trump/Biden polling data choropleth

In [19]:
# get the widgets from WAYS
bin_widgets = altair_bin_jupyter_widgets()

scale_widget = altair_scale_jupyter_widget()

color_widgets = altair_color_jupyter_widgets()

In [20]:
# create gridbox for widgets
grid_widgets = []

# Add the data widgets
grid_widgets.append(candidate)
grid_widgets.append(date)

# Add the plot widgets
grid_widgets += list(bin_widgets.values())
grid_widgets.append(scale_widget)
grid_widgets.append(color_widgets['colorschemetype'])
grid_widgets.append(color_widgets['colorscheme'])
grid_widgets.append(color_widgets['color_box'])

# Plot does not automatically get generated, only the widgets are visible until this (or anything else) clicked
generate_button = widgets.Button(description="Generate Plot")
def on_button_clicked(b):
    date.value='11/03/2020' # This will get the plot to generate by changing one of the widgets
generate_button.on_click(on_button_clicked)
grid_widgets.append(generate_button)

# Create the GridBox to arrange the widgets into rows of three
grid = widgets.GridBox(grid_widgets, layout=widgets.Layout(grid_template_columns="repeat(3, 300px)"))

In [23]:
# Define interactive plot containing choropleth and metavisualisation histogram
# @interact_altair
def interact_usa_choro(candidate,
                       date,
                       bin,
                       maxbins,
                       scale,
                       extent,
                       colorschemetype,
                       colorscheme,
                       color_1,
                       color_2,
                       color_3
                      ):

    # Chooses the candidate dataset
    candidate_geo_states = geo_states_biden
    if candidate == 'Trump':
        candidate_geo_states = geo_states_trump

    # Choose which polling date to display
    candidate_geo_states = candidate_geo_states[
        (candidate_geo_states.modeldate == date)
    ]

    # Give the choropleth plot a title
    title = 'Poll estimate for ' + candidate + ' on ' + date
    
    # Choose column of df to plot
    column = "pct_estimate"

    # Creates the bin, scale and color objects, to be passed to both plots
    color = get_altair_color_obj(bin,
                       maxbins,
                       scale,
                       extent,
                       colorschemetype,
                       colorscheme,
                       color_1,
                       color_2,
                       color_3,
                          column)
    
    # Display the histogram and choropleth in horizontal panels
#     return usa_choro(candidate_geo_states, color, title)
    display(usa_choro(candidate_geo_states, color, title))

In [25]:
# Create interactive ouput from widget grid and plot
# Pass the widgets into the args of the interactive choropleth
out = widgets.interactive_output(
    interact_usa_choro,
    {
        'candidate': candidate,
        'date': date,
        'bin': bin_widgets['bin'],
        'maxbins': bin_widgets['maxbins'],
        'extent': bin_widgets['extent'],
        'scale': scale_widget,
        'colorschemetype': color_widgets['colorschemetype'],
        'colorscheme': color_widgets['colorscheme'],
        'color_1': color_widgets['color_1'],
        'color_2': color_widgets['color_2'],
        'color_3': color_widgets['color_3'],
    }
)
# Display in this cell
display(grid, out)

GridBox(children=(Dropdown(description='Candidate', index=1, options=('Trump', 'Biden'), value='Biden'), Selec…

Output()

Notes:
1. Setting the scale to `log` doesn't work unless `bin` is **unchecked**
2. Because the `extent` data range plugs into the `alt.Bin` object, this will only work if `bin` is **checked**
3. There is no way to pre-populate dropdowns with all available options for a given function argument

TODOs:
- [x] Separate the data widgets from the color and bin object widgets
- [x] Move into WAYS py
- [x] Put color box in color function
- [ ] Understand wrapper functions and what Roly has done
- [ ] Create new functions for WAYS called like altair_color and altair_bin that act like the histogram wrapper
    - Can these be chained so that you can apply multiple wrappers?