# Average metric decomposition

https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ

In [1]:
import icanexplain as ice

votes = ice.datasets.load_us_general_election_popular_vote()
votes.head()

Unnamed: 0,year,state,state_po,county_name,county_fips,party,votes
0,2012,ALABAMA,AL,AUTAUGA,1001,DEMOCRAT,6363
1,2012,ALABAMA,AL,AUTAUGA,1001,OTHER,190
2,2012,ALABAMA,AL,AUTAUGA,1001,REPUBLICAN,17379
3,2012,ALABAMA,AL,BALDWIN,1003,DEMOCRAT,18424
4,2012,ALABAMA,AL,BALDWIN,1003,OTHER,898


We'll take a look at the evolution of the Republican vote share in the US over time.

In [2]:
tally = (
    votes
    .groupby(['year', 'state_po'])
    .agg({'votes': 'sum'})
    .rename(columns={'votes': 'total_votes'})
    .join(
        votes
        .query('party == "REPUBLICAN"')
        .groupby(['year', 'state_po'])
        .agg({'votes': 'sum'})
        .rename(columns={'votes': 'republican_votes'})
    )
    .reset_index()
)
tally.sample(5)

Unnamed: 0,year,state_po,total_votes,republican_votes
44,2012,UT,1017440,740600
76,2016,MS,1209357,700714
8,2012,DE,413937,165484
34,2012,NY,7061925,2223397
75,2016,MO,2807381,1594511


In [3]:
(
    tally
    .groupby('year')
    [['total_votes', 'republican_votes']].sum()
    .reset_index()
    .assign(share=lambda x: x.republican_votes / x.total_votes)
    .assign(diff=lambda x: x.share.diff())
    [['year', 'share', 'diff']]
    .style.format({'share': '{:.2%}', 'diff': '{:+.2%}'}, na_rep='')
    .set_caption('Republican share of the popular vote in US general elections')
)

Unnamed: 0,year,share,diff
0,2012,47.01%,
1,2016,46.14%,-0.87%
2,2020,46.83%,+0.69%


In [4]:
explainer = ice.MeanExplainer(
    fact='republican_votes',
    count='total_votes',
    period='year',
)
explainer(tally).style.format({'inner': '{:+.2%}', 'mix': '{:+.2%}'}, na_rep='')

Unnamed: 0,year,inner,mix
0,2016,-0.87%,+0.00%
1,2020,+0.69%,-0.00%


In [5]:
explainer = ice.MeanExplainer(
    fact='republican_votes',
    count='total_votes',
    period='year',
    group='state_po'
)
explanation_by_state = explainer(tally)
(
    explanation_by_state
    .query('year == 2016')
    .sort_values('inner', ascending=False)
    .head(10)
    .style.format({'inner': '{:+.2%}', 'mix': '{:+.2%}'}, na_rep='')
)

Unnamed: 0,year,state_po,inner,mix
34,2016,NY,+0.28%,-0.03%
35,2016,OH,+0.16%,-0.00%
22,2016,MI,+0.10%,+0.00%
38,2016,PA,+0.08%,+0.00%
24,2016,MO,+0.06%,-0.01%
12,2016,IA,+0.06%,-0.00%
15,2016,IN,+0.06%,-0.00%
49,2016,WV,+0.03%,+0.00%
17,2016,KY,+0.03%,+0.00%
48,2016,WI,+0.03%,+0.00%


Ohio (OH) shifted more Republican, moving from a win for Obama in 2012 to a win for Trump in 2016.

In [6]:
(
    tally
    .query('state_po == "NY" and year in (2012, 2016)')
    .assign(share=lambda x: x.republican_votes / x.total_votes)
    .sort_values('year')
)

Unnamed: 0,year,state_po,total_votes,republican_votes,share
34,2012,NY,7061925,2223397,0.314843
85,2016,NY,7707363,2814589,0.365182


In [7]:
(
    tally
    .query('state_po == "OH" and year in (2012, 2016)')
    .assign(share=lambda x: x.republican_votes / x.total_votes)
    .sort_values('year')
)

Unnamed: 0,year,state_po,total_votes,republican_votes,share
35,2012,OH,5580822,2661407,0.476884
86,2016,OH,5496487,2841005,0.516877


In [8]:
(
    explanation_by_state
    .query('year == 2020')
    .sort_values('inner')
    .head(10)
    .style.format({'inner': '{:+.2%}', 'mix': '{:+.2%}'}, na_rep='')
)

Unnamed: 0,year,state_po,inner,mix
61,2020,GA,-0.05%,+0.01%
71,2020,MD,-0.03%,+0.02%
70,2020,MA,-0.03%,+0.01%
56,2020,CO,-0.03%,-0.00%
57,2020,CT,-0.02%,+0.00%
94,2020,TX,-0.01%,+0.03%
96,2020,VA,-0.01%,+0.00%
72,2020,ME,-0.01%,+0.00%
59,2020,DE,-0.01%,+0.00%
68,2020,KY,-0.01%,-0.01%


Georgia (GA) went from Republican (Donald Trump) in 2016 to Democratic (Joe Biden) in 2020.

In [9]:
(
    tally
    .query('state_po == "GA" and year in (2016, 2020)')
    .assign(share=lambda x: x.republican_votes / x.total_votes)
    .sort_values('year')
)

Unnamed: 0,year,state_po,total_votes,republican_votes,share
61,2016,GA,4114711,2089104,0.507716
112,2020,GA,4998482,2461837,0.492517


In [10]:
(
    explanation_by_state
    .groupby('year')
    [['inner', 'mix']]
    .sum().sum(axis='columns')
    .to_frame('diff')
    .style.format('{:+.2%}')
)

Unnamed: 0_level_0,diff
year,Unnamed: 1_level_1
2016,-0.87%
2020,+0.69%


In [11]:
import altair as alt
import pandas as pd
from vega_datasets import data

# Load state geometries from vega_datasets
states = alt.topo_feature(data.us_10m.url, 'states')

# Map state IDs to state postal abbreviations
state_id_to_abbr = {
    1: 'AL', 2: 'AK', 4: 'AZ', 5: 'AR', 6: 'CA', 8: 'CO', 9: 'CT', 10: 'DE',
    11: 'DC', 12: 'FL', 13: 'GA', 15: 'HI', 16: 'ID', 17: 'IL', 18: 'IN',
    19: 'IA', 20: 'KS', 21: 'KY', 22: 'LA', 23: 'ME', 24: 'MD', 25: 'MA',
    26: 'MI', 27: 'MN', 28: 'MS', 29: 'MO', 30: 'MT', 31: 'NE', 32: 'NV',
    33: 'NH', 34: 'NJ', 35: 'NM', 36: 'NY', 37: 'NC', 38: 'ND', 39: 'OH',
    40: 'OK', 41: 'OR', 42: 'PA', 44: 'RI', 45: 'SC', 46: 'SD', 47: 'TN',
    48: 'TX', 49: 'UT', 50: 'VT', 51: 'VA', 53: 'WA', 54: 'WV', 55: 'WI',
    56: 'WY'
}

# Convert this dictionary into a DataFrame
state_abbr_df = pd.DataFrame(list(state_id_to_abbr.items()), columns=['id', 'state_po'])

# Merge with your original dataframe
merged_df = pd.merge(state_abbr_df, explanation_by_state, on='state_po', how='left')

In [12]:
# Define the color scale
color_scale = alt.Scale(domain=[merged_df['inner'].min(), merged_df['inner'].max()], scheme='reds')

# Create a chart for the year 2020
chart_2020 = alt.Chart(states).mark_geoshape().encode(
    color=alt.Color('inner:Q', scale=color_scale),
    tooltip=['state_po:N', 'inner:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(merged_df[merged_df['year'] == 2016], 'id', ['state_po', 'inner'])
).project(
    type='albersUsa'
).properties(
    width=400,
    height=300,
    title='Year 2016'
)

# Create a chart for the year 2021
chart_2021 = alt.Chart(states).mark_geoshape().encode(
    color=alt.Color('inner:Q', scale=color_scale),
    tooltip=['state_po:N', 'inner:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(merged_df[merged_df['year'] == 2020], 'id', ['state_po', 'inner'])
).project(
    type='albersUsa'
).properties(
    width=400,
    height=300,
    title='Year 2020'
)

# Combine the charts
combined_chart = alt.hconcat(chart_2020, chart_2021).resolve_scale(
    color='shared'
)

combined_chart
