## 2016 Presidental Election: Virginia Voting Patterns

You can see this visualization included in the following web app:   
https://virginia-2016-vote-totals.herokuapp.com/

The data source: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/LYWX3D

In [1]:
import plotly as py
import plotly.graph_objs as go
import pandas as pd

In [17]:
# read in the data
df0 = pd.read_csv('https://raw.githubusercontent.com/austinlasseter/virginia_election_2016/master/Virginia.csv')
df0.shape

(16137, 22)

In [22]:
# restrict to a small portion of the columns
df1=df0[['county_name', 'jurisdiction', 'precinct', 'candidate', 'votes']].copy()
df1.shape

(16137, 5)

In [23]:
# who were all the candidates?
df1['candidate'].value_counts()

[Write-in]         2693
Hillary Clinton    2693
Donald Trump       2693
Gary Johnson       2691
Evan McMullin      2687
Jill Stein         2680
Name: candidate, dtype: int64

In [24]:
# cluster all other candidates into 'other'
df1.loc[(df1['candidate']!='Hillary Clinton') 
        & (df1['candidate']!='Donald Trump'), 'candidate']='Other'
df1['candidate'].value_counts()

Other              10751
Hillary Clinton     2693
Donald Trump        2693
Name: candidate, dtype: int64

In [26]:
# what does our data look like?
df1.head(6)

Unnamed: 0,county_name,jurisdiction,precinct,candidate,votes
0,Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Hillary Clinton,784
1,Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Other,32
2,Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Other,8
3,Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Donald Trump,644
4,Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Other,3
5,Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Other,16


In [25]:
# convert the first 4 columns into a clustered index (leaving "votes" unchanged)
df2=df1.groupby(['county_name', 'jurisdiction', 'precinct', 'candidate']).sum()
df2.head(6)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,votes
county_name,jurisdiction,precinct,candidate,Unnamed: 4_level_1
Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Donald Trump,644
Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Hillary Clinton,784
Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,Other,59
Accomack County,ACCOMACK COUNTY,## Provisional,Donald Trump,1
Accomack County,ACCOMACK COUNTY,## Provisional,Hillary Clinton,3
Accomack County,ACCOMACK COUNTY,## Provisional,Other,2


In [29]:
# reshape the data so that the 3 candidates are columns (not rows)
df3=df2.unstack(level=-1)
df3.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,votes,votes,votes
Unnamed: 0_level_1,Unnamed: 1_level_1,candidate,Donald Trump,Hillary Clinton,Other
county_name,jurisdiction,precinct,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,644,784,59
Accomack County,ACCOMACK COUNTY,## Provisional,1,3,2
Accomack County,ACCOMACK COUNTY,101 - CHINCOTEAGUE,1188,444,48
Accomack County,ACCOMACK COUNTY,201 - ATLANTIC,553,175,22
Accomack County,ACCOMACK COUNTY,202 - GREENBACKVILLE,697,302,31


In [30]:
# reset the index
df4=df3.reset_index()
df4.head()

Unnamed: 0_level_0,county_name,jurisdiction,precinct,votes,votes,votes
candidate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Donald Trump,Hillary Clinton,Other
0,Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,644,784,59
1,Accomack County,ACCOMACK COUNTY,## Provisional,1,3,2
2,Accomack County,ACCOMACK COUNTY,101 - CHINCOTEAGUE,1188,444,48
3,Accomack County,ACCOMACK COUNTY,201 - ATLANTIC,553,175,22
4,Accomack County,ACCOMACK COUNTY,202 - GREENBACKVILLE,697,302,31


In [31]:
# check out the contents of one cell
print(df4['votes']['Other'][0])

59

In [9]:
# check out the contents of one cell
print(df4['jurisdiction'][0])

'ACCOMACK COUNTY'

In [32]:
# create a sub-dataframe representing just a single jurisdiction
accomack=df4[df4['jurisdiction']=='ACCOMACK COUNTY']
print(accomack.shape)
accomack.head(5)

(18, 6)


Unnamed: 0_level_0,county_name,jurisdiction,precinct,votes,votes,votes
candidate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Donald Trump,Hillary Clinton,Other
0,Accomack County,ACCOMACK COUNTY,# AB - Central Absentee Precinct,644,784,59
1,Accomack County,ACCOMACK COUNTY,## Provisional,1,3,2
2,Accomack County,ACCOMACK COUNTY,101 - CHINCOTEAGUE,1188,444,48
3,Accomack County,ACCOMACK COUNTY,201 - ATLANTIC,553,175,22
4,Accomack County,ACCOMACK COUNTY,202 - GREENBACKVILLE,697,302,31


In [34]:
# create a list of all the precincts in that jurisdiction
precincts=list(accomack['precinct'].value_counts().index)
precincts

['803 - WACHAPREAGUE',
 '301 - NEW CHURCH',
 '401 - BLOXOM',
 '602 - TANGIER',
 '403 - SAXIS',
 '701 - NANDUA',
 '101 - CHINCOTEAGUE',
 '## Provisional',
 '# AB - Central Absentee Precinct',
 '801 - BOBTOWN',
 '202 - GREENBACKVILLE',
 '901 - PAINTER',
 '501 - MAPPSVILLE',
 '502 - RUE',
 '802 - MELFA',
 '601 - ACCOMAC',
 '402 - PARKSLEY',
 '201 - ATLANTIC']

In [35]:
# create a list of all the votes in that jurisdiction for a single candidate
list(accomack['votes']['Other'])

[59, 2, 48, 22, 31, 33, 13, 29, 5, 23, 17, 41, 5, 75, 23, 22, 9, 38]

In [40]:
# make a list of 3 colors
virginia_colors=['red','blue','yellow']

In [41]:
# Let's display that with plotly.

mydata1 = go.Bar(x=list(accomack['precinct'].value_counts().index), 
                 y=list(accomack['votes']['Donald Trump']), 
                 marker=dict(color=virginia_colors[0]),
                 name='Trump')
mydata2 = go.Bar(x=list(accomack['precinct'].value_counts().index), 
                 y=list(accomack['votes']['Hillary Clinton']), 
                 marker=dict(color=virginia_colors[1]),
                 name='Clinton')
mydata3 = go.Bar(x=list(accomack['precinct'].value_counts().index), 
                 y=list(accomack['votes']['Other']), 
                 marker=dict(color=virginia_colors[2]),
                 name='Other')

mylayout = go.Layout(
    title='Virginia Voting 2016',
    xaxis=dict(title='Candidates'),
    yaxis=dict(title='Number of Votes')    
)
fig = go.Figure(data=[mydata1, mydata2, mydata3], layout=mylayout)
fig

In [46]:
# Now turn that into a function.
def juris_picker(juris_name):
    juris_df=df4[df4['jurisdiction']==juris_name]
    
    mydata1 = go.Bar(x=list(juris_df['precinct'].value_counts().index), 
                     y=list(juris_df['votes']['Donald Trump']), 
                     marker=dict(color='#122A7F'),
                     name='Trump')
    mydata2 = go.Bar(x=list(juris_df['precinct'].value_counts().index), 
                     y=list(juris_df['votes']['Hillary Clinton']), 
                     marker=dict(color='#008080'),
                     name='Clinton')
    mydata3 = go.Bar(x=list(juris_df['precinct'].value_counts().index), 
                     y=list(juris_df['votes']['Other']), 
                     marker=dict(color='#92A5E8'),
                     name='Other')

    mylayout = go.Layout(
        title='Votes by candidate for: {}'.format(juris_name),
        xaxis=dict(title='Precincts'),
        yaxis=dict(title='Number of Votes')    
    )
    fig = go.Figure(data=[mydata1, mydata2, mydata3], layout=mylayout)
    return fig

In [47]:
jurisdiction_name='FAIRFAX CITY'
juris_picker(jurisdiction_name)