# Thinking About An Upcoming Election

In [None]:
polls = pd.read_csv('raw-polls.csv')
pres_polls_national = polls[(polls.type_detail=='Pres-G') & (polls.location == 'US')]
pres_polls_by_state = polls[(polls.type_detail=='Pres-G') & (polls.location != 'US')]

In [None]:
def margin_dotplot(df, x, y, figsize=(16,4)):
    """
    plot margin_poll (grey), average of margin_poll (white), and
    margin_actual (blue/red)
    """
    data = df.sort_values(by=y)
    fig, ax = plt.subplots(figsize=figsize)
    sns.stripplot(data=data,
                       x='margin_actual', y=y, orient='h', size=15,
                       hue=data['margin_actual'].apply(lambda x: 'D' if x > 0 else 'R'),
                       palette={'D':'blue', 'R': 'red'},
                       jitter=False, alpha=.25, ax=ax)

    sns.stripplot(data=data,
                       x=x, y=y, orient='h', size=15,
                       jitter=False, alpha=.35, color='grey', ax=ax)        
    data = (data.groupby(y)[x]
                .agg({x: 'mean'})
                .sort_values(by=y).reset_index())
    sns.stripplot(data=data,
                  x=x, y=y, orient='h', size=15,
                  color='white', linewidth=1, jitter=False, ax=ax)    
        
    plt.axvline(x=0, color='black', linewidth=0.8)
    ax.xaxis.grid(True)
    ax.yaxis.grid(True)

def dotplot(df, x, y, figsize=(16,4)):
    """
    plot any arbitray x (grey) and y with the average of x (white)
    """
    data = df.sort_values(by=y)
    fig, ax = plt.subplots(figsize=figsize)
    sns.stripplot(data=data,
                       x=x, y=y, orient='h', size=15,
                       jitter=False, alpha=.35, color='grey', ax=ax)
    data = data.groupby(y)[x].agg({x: 'mean'}).sort_values(by=y).reset_index()
    sns.stripplot(data=data,
                  x=x, y=y, orient='h', size=15,
                  color='white', linewidth=1, jitter=False, ax=ax)
    plt.axvline(x=0, color='black', linewidth=0.8)
    ax.xaxis.grid(True)
    ax.yaxis.grid(True)

#### nationwide popular vote: polls versus election results for each year


- plot
- look @ average error in table

In [None]:
margin_dotplot(pres_polls_national, x='margin_poll', y='year')

In [None]:
pres_polls_national[['year', 'bias']].groupby('year').mean()

#### state by state polls vs popular vote for 2016

- plot
- look @ average error in table

In [None]:
pres_polls_by_state_2016 = pres_polls_by_state[pres_polls_by_state.year == 2016]
margin_dotplot(pres_polls_by_state_2016, x='margin_poll', y='location', figsize=(8,25))

#### state by state average errors for each year


- plot
- look @ average error in table

In [None]:
dotplot(pres_polls_by_state, x='bias', y='year', figsize=(16,4))

In [None]:
pres_polls_by_state[['year', 'bias']].groupby('year').mean()

#### Step 2: Polling averages and adjustment

* state by state 2016 polls
    1. plot (y axis = state - plot each poll, average of polls, and election result)
    2. plot (y axis = state - plot each *adjusted* poll, *adjusted* average of polls, and election result)
    3. plot (y axis = state - plot average, adjusted average, and election result
* do the last chart with national 2016 as well as national for all years

https://fivethirtyeight.com/features/how-fivethirtyeight-calculates-pollster-ratings/

In [None]:
polls = pd.read_csv('raw-polls.csv')
pollster_ratings = pd.read_csv('pollster-ratings.csv').set_index('Pollster')

pollster_mapping = {
    'Field Research Corporation (Field Poll)': 'Field Research Corp. (Field Poll)',
    'Selzer & Company': 'Selzer & Co.',
    'Ed Renwick/Loyola University': 'Ed Renwick/Loyola University New Orleans',
    'Abt SRBI': 'Abt Associates',
    'Marquette Law School': 'Marquette University Law School',
    'Google Consumer Surveys': 'Google Surveys',
    'Fox News/Anderson Robbins Research/Shaw & Company Research': 'Fox News/Anderson Robbins Research/Shaw & Co. Research',
    'Craciun Research': 'Craciun Research Group',
    'Cygnal Political': 'Cygnal',
    'US News & World Report': 'U.S. News & World Report',
    'Red Racing Horses': 'RRH Elections',
}

polls['pollster'] = polls.pollster.apply(lambda x: x.replace(', Inc.', ' Inc.'))
polls['pollster'] = polls.pollster.apply(lambda x: x.replace(', LLC', ' LLC'))
polls['pollster'] = polls.pollster.apply(lambda x: pollster_mapping.get(x, x))

polls['grade'] = polls.pollster.apply(lambda x: pollster_ratings.loc[x]['538 Grade'] if x in pollster_ratings.index else None)
polls['mrb'] = polls.pollster.apply(lambda x: pollster_ratings.loc[x]['Mean-Reverted Bias'] if x in pollster_ratings.index else None)
polls['mrb'] = pd.to_numeric(polls['mrb'].str.replace('D +', '', regex=False).str.replace('R +', '-', regex=False))

polls['margin_poll_adjusted'] = polls['margin_poll'] - polls['mrb']
polls['bias_adjusted'] = polls.margin_poll_adjusted - polls.margin_actual
polls['error_adjusted'] = np.abs(polls.margin_poll_adjusted - polls.margin_actual)

pres_polls_national = polls[(polls.type_detail=='Pres-G') & (polls.location == 'US')]
pres_polls_by_state = polls[(polls.type_detail=='Pres-G') & (polls.location != 'US')]

pres_polls_national_2016 = pres_polls_national[pres_polls_national.year == 2016]
pres_polls_by_state_2016 = pres_polls_by_state[pres_polls_by_state.year == 2016]

In [None]:
pres_polls_national_2016[['bias', 'bias_adjusted']].mean()

In [None]:
pres_polls_by_state_2016[['bias', 'bias_adjusted']].mean()

In [None]:
pres_polls_by_state_2016[pres_polls_by_state_2016.location == "DC"][['pollster', 'grade', 'margin_actual', 'margin_poll', 'margin_poll_adjusted', 'mrb', 'bias', 'bias_adjusted']]

In [None]:
tmpdf = pres_polls_by_state_2016.groupby('location') # 'error': 'mean', 'error_adjusted': 'mean'

tmpdf = pd.concat([
    tmpdf.agg({'bias': 'mean', 'bias_adjusted': 'mean'}),
    pd.DataFrame(tmpdf.size()),
], axis=1)
tmpdf['difference'] = np.abs(tmpdf.bias) - np.abs(tmpdf.bias_adjusted)
tmpdf.sort_values(by=0, ascending=False)

## Answering Questions with Data

Lets prove or disprove some hypotheses
- As goes Ohio, so goes the nation.
- Texas is going blue
- ...come up with a few more

## Polls Stories

- https://fivethirtyeight.com/features/how-much-the-polls-missed-by-in-every-state/
- https://fivethirtyeight.com/features/pollsters-probably-didnt-talk-to-enough-white-voters-without-college-degrees/
- https://fivethirtyeight.com/features/what-a-difference-2-percentage-points-makes/
- https://fivethirtyeight.com/features/shy-voters-probably-arent-why-the-polls-missed-trump/
- https://fivethirtyeight.com/features/the-polls-missed-trump-we-asked-pollsters-why/
- https://fivethirtyeight.com/features/why-fivethirtyeight-gave-trump-a-better-chance-than-almost-anyone-else/
- https://fivethirtyeight.com/features/the-polls-are-all-right/
- https://fivethirtyeight.com/features/trump-is-just-a-normal-polling-error-behind-clinton/


# The 2018 FiveThirtyEight Forecasts

- https://projects.fivethirtyeight.com/2018-midterm-election-forecast/senate
- https://projects.fivethirtyeight.com/2018-midterm-election-forecast/house

# Some other folks

- [CNN](https://www.cnn.com/election/2018/forecast)
- [Daily Kos](https://elections.dailykos.com/)
- [New York Times - Real Time Polling!](https://www.nytimes.com/interactive/2018/upshot/elections-polls.html)


# Visualizing Uncertainty

- FiveThirtyEight in [2010](https://www.nytimes.com/elections/2010/forecasts/senate.html), [2014](https://fivethirtyeight.com/interactives/senate-forecast/), [2016](https://projects.fivethirtyeight.com/2016-election-forecast/), [2018](https://projects.fivethirtyeight.com/2018-midterm-election-forecast/house/)
    * I think 2010 still works in Safari...
- New York Times
    * The Spinners https://www.nytimes.com/2014/11/01/upshot/how-confirmation-bias-can-lead-to-a-spinning-of-wheels.html
    * The Needle https://www.youtube.com/watch?v=iq5rW6zYeP4
- [Huffpost's](http://elections.huffingtonpost.com/pollster) custom charts.

