# Altair Choropleths play

Here I load in James' presidential polls dataset from the teaching notebook in this dir and attempt some rudimentary WAYS ideas.

In [1]:
%%capture
# capture suppresses output of the below:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install -r requirements.txt

In [2]:
import geopandas as gpd 
import pandas as pd
import altair as alt

geo_states = gpd.read_file('gz_2010_us_040_00_500k.json')
df_polls = pd.read_csv('presidential_poll_averages_2020.csv')

In [3]:
df_polls.head()

Unnamed: 0,cycle,state,modeldate,candidate_name,pct_estimate,pct_trend_adjusted
0,2020,Wyoming,11/03/2020,Joseph R. Biden Jr.,30.81486,30.82599
1,2020,Wisconsin,11/03/2020,Joseph R. Biden Jr.,52.12642,52.09584
2,2020,West Virginia,11/03/2020,Joseph R. Biden Jr.,33.49125,33.51517
3,2020,Washington,11/03/2020,Joseph R. Biden Jr.,59.34201,59.39408
4,2020,Virginia,11/03/2020,Joseph R. Biden Jr.,53.7412,53.72101


On how many dates was the polling done?

In [4]:
len(df_polls.modeldate.unique())

251

Filter our poll data to a specific date.

In [5]:
df_nov = df_polls[
    (df_polls.modeldate == '11/03/2020') # Ed note - changed 3 to 03
]

df_nov_states = df_nov[
    (df_nov.candidate_name == 'Donald Trump') |
    (df_nov.candidate_name == 'Joseph R. Biden Jr.')
]

The geo_states variable has polygons for each state.

In [6]:
geo_states.head()

Unnamed: 0,GEO_ID,STATE,NAME,LSAD,CENSUSAREA,geometry
0,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ..."
1,0400000US25,25,Massachusetts,,7800.058,"MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ..."
2,0400000US26,26,Michigan,,56538.901,"MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ..."
3,0400000US30,30,Montana,,145545.801,"POLYGON ((-104.05770 44.99743, -104.25015 44.9..."
4,0400000US32,32,Nevada,,109781.18,"POLYGON ((-114.05060 37.00040, -114.04999 36.9..."


In [7]:
trump_data = df_nov_states[
    df_nov_states.candidate_name == 'Donald Trump'
]

biden_data = df_nov_states[
    df_nov_states.candidate_name == 'Joseph R. Biden Jr.'
]

Our spatial and poll data have the name of the state in common. We will change the name of the state to NAME to match our geospatial dataframe.

In [8]:
trump_data.columns = ['cycle', 'NAME', 'modeldate', 'candidate_name', 'pct_estimate', 'pct_trend_adjusted']
biden_data.columns = ['cycle', 'NAME', 'modeldate', 'candidate_name', 'pct_estimate', 'pct_trend_adjusted']

We can join the geospatial and poll data using the NAME column (the name of the state).

In [9]:
# Create seperate date frame for trump and biden
# Add the poll data
geo_states_trump = geo_states.merge(trump_data, on='NAME')
geo_states_biden = geo_states.merge(biden_data, on='NAME')

In [10]:
geo_states_trump.head()

Unnamed: 0,GEO_ID,STATE,NAME,LSAD,CENSUSAREA,geometry,cycle,modeldate,candidate_name,pct_estimate,pct_trend_adjusted
0,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...",2020,11/03/2020,Donald Trump,40.3441,40.31588
1,0400000US25,25,Massachusetts,,7800.058,"MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ...",2020,11/03/2020,Donald Trump,28.56164,28.86275
2,0400000US26,26,Michigan,,56538.901,"MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ...",2020,11/03/2020,Donald Trump,43.20577,43.23326
3,0400000US30,30,Montana,,145545.801,"POLYGON ((-104.05770 44.99743, -104.25015 44.9...",2020,11/03/2020,Donald Trump,49.74744,49.78661
4,0400000US32,32,Nevada,,109781.18,"POLYGON ((-114.05060 37.00040, -114.04999 36.9...",2020,11/03/2020,Donald Trump,44.32982,44.36094


In [11]:
geo_states_biden.head()

Unnamed: 0,GEO_ID,STATE,NAME,LSAD,CENSUSAREA,geometry,cycle,modeldate,candidate_name,pct_estimate,pct_trend_adjusted
0,0400000US23,23,Maine,,30842.923,"MULTIPOLYGON (((-67.61976 44.51975, -67.61541 ...",2020,11/03/2020,Joseph R. Biden Jr.,53.31518,53.32106
1,0400000US25,25,Massachusetts,,7800.058,"MULTIPOLYGON (((-70.83204 41.60650, -70.82373 ...",2020,11/03/2020,Joseph R. Biden Jr.,64.36328,64.62505
2,0400000US26,26,Michigan,,56538.901,"MULTIPOLYGON (((-88.68443 48.11579, -88.67563 ...",2020,11/03/2020,Joseph R. Biden Jr.,51.17806,51.15482
3,0400000US30,30,Montana,,145545.801,"POLYGON ((-104.05770 44.99743, -104.25015 44.9...",2020,11/03/2020,Joseph R. Biden Jr.,45.34418,45.36695
4,0400000US32,32,Nevada,,109781.18,"POLYGON ((-114.05060 37.00040, -114.04999 36.9...",2020,11/03/2020,Joseph R. Biden Jr.,49.62386,49.65657


### Create choropleth plot function

In [12]:
def usa_choro(geo_states, bin, scale):
    """Plot a choropleth of the US states with the candidate vote percentage mapped to the color;
    pass in altair bin and scale objects"""
    title = 'Poll estimate for ' + geo_states['candidate_name'][0] + ' on 11/3/2020'
    chart = alt.Chart(geo_states, title=title).mark_geoshape()
    chart = chart.encode(
        alt.Color('pct_estimate',
                  bin=bin,
                  scale=scale
                 ),
        tooltip=['NAME', 'pct_estimate']
    ).properties(
        width=500,
        height=300
    ).project(
        type='albersUsa'
    )
    return chart

### Create "metavisualisation" histogram

In [13]:
def meta_hist(geo_states, bin, scale):
    """Plot a histogram of the dataset with binning and colouring that matches the choropleth
    if you give it the same bin and scale objects"""
    hist = alt.Chart(geo_states).mark_bar().encode(
        alt.X("pct_estimate", bin=bin),
        y='count()',
    ).encode(
        alt.Color('pct_estimate',
                  bin=bin,
                  scale=scale
                 )
    ).properties(
        width=300,
        height=300
    )
    return hist

## Adding interactivity with Jupyter interact widgets

Here I attempt to offer the user working on the the US presidential poll choropleth visualisation some options related to the colour-binning.

In [14]:
from ipywidgets import interact, widgets

In [15]:
extent_slider = widgets.IntRangeSlider(value=[0,100], min=0, max=100) # create this widget separately to make it a range
@interact
def interact_usa_choro(candidate=['Trump', 'Biden'], bin=True, maxbins=(2, 100, 1), scale=['band', 'linear', 'log', 'bin-ordinal'], extent=extent_slider):
    
    # Chooses the dataset
    geo_states = geo_states_biden
    if candidate == 'Trump':
        geo_states = geo_states_trump
    
    # Creates the bin and scale objects, to be passed to both plots
    if bin: # if bin is False, leave as bool
        bin = alt.Bin(maxbins=maxbins, extent=extent)
    scale = alt.Scale(type=scale)
    
    # Generate both plots and place them horizontally
    return usa_choro(geo_states, bin, scale) | meta_hist(geo_states, bin, scale)

interactive(children=(Dropdown(description='candidate', options=('Trump', 'Biden'), value='Trump'), Checkbox(v…

Notes:
1. Setting the scale to `log` doesn't work unless `bin` is **unchecked**
2. Because the `extent` data range plugs into the `alt.Bin` object, this will only work if `bin` is **checked**

### Can we pre-populate dropdowns with all available options for a given function argument?

In [16]:
example = usa_choro(geo_states_trump)
example.__code__.co_varnames

TypeError: usa_choro() missing 2 required positional arguments: 'bin' and 'scale'

In [None]:
import inspect

In [None]:
print(inspect.signature(alt.Color))
print('')
print(inspect.signature(alt.Chart))

In [None]:
inspect.signature(alt.Scale)

In [None]:
print(inspect.signature(alt.ScaleType))

Find a way to return the enumerated values in [this class](https://altair-viz.github.io/user_guide/generated/core/altair.ScaleType.html#altair.ScaleType)