In [1]:
import altair as alt
import pandas as pd
from vega_datasets import data

# per https://gist.github.com/mbostock/4090848#gistcomment-2102151
ansi = pd.read_csv('https://www2.census.gov/geo/docs/reference/state.txt', sep='|')
ansi.columns = ['id', 'abbr', 'State', 'statens']
ansi = ansi[['id', 'State']]

driver_data = pd.read_csv("bad-drivers/bad-drivers.csv")
driver_data.columns = ['State', 
                        'Fatal Collisions (per billion mi)', 
                        'Speeding (%)',
                        'Alcohol-Impaired (%)',
                        'Not Distracted (%)',
                        'No Previous Accidents (%)',
                        'Insurance Premiums ($)',
                        'Losses per Collision ($)']
driver_data = pd.merge(ansi, driver_data, on='State')
driver_data['Speeding Collisions (per billion mi)'] = driver_data['Fatal Collisions (per billion mi)'] * driver_data['Speeding (%)'] * 0.01
driver_data['Alcohol-Impaired Collisions (per billion mi)'] = driver_data['Fatal Collisions (per billion mi)'] * driver_data['Alcohol-Impaired (%)'] * 0.01
driver_data.head()

Unnamed: 0,id,State,Fatal Collisions (per billion mi),Speeding (%),Alcohol-Impaired (%),Not Distracted (%),No Previous Accidents (%),Insurance Premiums ($),Losses per Collision ($),Speeding Collisions (per billion mi),Alcohol-Impaired Collisions (per billion mi)
0,1,Alabama,18.8,39,30,96,80,784.55,145.08,7.332,5.64
1,2,Alaska,18.1,41,25,90,94,1053.48,133.93,7.421,4.525
2,4,Arizona,18.6,35,28,84,96,899.47,110.35,6.51,5.208
3,5,Arkansas,22.4,18,26,94,95,827.34,142.39,4.032,5.824
4,6,California,12.0,35,28,91,89,878.41,165.63,4.2,3.36


In [10]:
# US states background
states = alt.topo_feature(data.us_10m.url, feature='states')

size_dropdown = alt.binding_select(options=["Insurance Premiums ($)", "Losses per Collision ($)"], name="Select a size variable:")
size_selection = alt.selection(type="single", fields=['column'], bind=size_dropdown, init={'column':'Insurance Premiums ($)'})
drag = alt.selection(type="interval", encodings=["x", "y"])
click = alt.selection(type="multi", fields=['State'])

choreopleth = alt.Chart(states).mark_geoshape().encode(
    color = "Fatal Collisions (per billion mi):Q",
    tooltip = ['State:N', 'Fatal Collisions (per billion mi):Q'],
    opacity = alt.condition(drag | click, alt.value(1), alt.value(0.2)),
).transform_lookup(
    lookup = 'id',
    from_ = alt.LookupData(driver_data, 'id', list(driver_data.columns))
).add_selection(click).project('albersUsa')

scatterplot = alt.Chart(driver_data).mark_circle().encode(
    x = "Speeding Collisions (per billion mi)",
    y = "Alcohol-Impaired Collisions (per billion mi)",
    color = "Fatal Collisions (per billion mi)",
    size = "value:Q",
    tooltip = ["State", "Speeding Collisions (per billion mi)", "Alcohol-Impaired Collisions (per billion mi)"],
    opacity = alt.condition(drag | click, alt.value(1), alt.value(.2))
).transform_fold(
    ["Insurance Premiums ($)", "Losses per Collision ($)"],
    as_ = ['column','value']
).transform_filter(size_selection).add_selection(click, size_selection, drag)

choreopleth | scatterplot