# PresidentialPoll - Presidential Election Analysis from 1976-2016   

## Read in all of our data and plot Popular Vote Map

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import plotly.graph_objects as go
from matplotlib.widgets import Slider, Button, RadioButtons
import matplotlib.gridspec as gridspec
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
import plotly.offline as offline

############################################
%matplotlib widget

from matplotlib.widgets import Slider, Button, RadioButtons
import plotly as plotly
from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

import heapq
from collections import OrderedDict

init_notebook_mode(connected=True)

# class for storing election info for states in each year
class election_info:
    total_votes = 0
    
    # winner info
    winner_name = ''
    winner_votes = 0
    winner_party = ''
    
    # runnerup/loser info
    runnerup_name = ''
    runnerup_votes = 0
    runnerup_party = ''
    
    # state info
    state_density = 0
    state_population = 0
    state_area = 0
    
    def __init__(self, winner_name, winner_votes, winner_party, total_votes, runnerup_name, runnerup_votes, runnerup_party, state_density, state_population, state_area):
        self.winner_name = winner_name
        self.winner_votes = winner_votes
        self.winner_party = winner_party
        self.total_votes = total_votes
        self.runnerup_name = runnerup_name
        self.runnerup_votes = runnerup_votes
        self.runnerup_party = runnerup_party
        self.state_density = state_density
        self.state_population = state_population
        self.state_area = state_area


# read in election data
my_file = pd.read_csv('data/1976-2016-president.csv', encoding = 'unicode_escape')

# read in states and state abbreviations 
states_df = pd.read_csv('data/states.csv')
states_df = states_df.sort_values(by=['State'])

# set election year, make states lists
election_years = [1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016]
states = list(states_df['State'])
state_codes = list(states_df['Abbreviation'])

# read in population, density, and area data
# note: area is land area
areas_df = pd.read_csv('data/areas.csv', encoding = 'unicode_escape')
densities_df = pd.read_csv('data/densities.csv', encoding = 'unicode_escape')
populations_df = pd.read_csv('data/populations.csv', encoding = 'unicode_escape')

# dict for storing years as key and then dict of states as vals
slides = {}
slides = dict((year,None) for year in election_years)

# since our population and density data is only every 10 years, function to return right time frame
def get_year(x):
    if(x < 1976):
        return 1970
    elif(x < 1986):
        return 1980
    elif(x < 1996):
        return 1990
    elif(x < 2006):
        return 2000
    else:
        return 2010

# Read in data

# go through each election year 
for year in election_years:
    
    # create state dict for that year (key=states, val=election_info class)
    slides[year] = OrderedDict((s,None) for s in states)
    
    # get all data for that year from dfs
    year_info = my_file.loc[my_file['year'] == year]
    for state in states:
        state_info = year_info.loc[year_info['state'] == state]
        
        state_winner = state_info[state_info['candidatevotes']==state_info['candidatevotes'].max()]
        state_runnerup = state_info[state_info['candidatevotes']==heapq.nlargest(2, state_info['candidatevotes'])[1]]
        
        total_votes = int(state_winner['totalvotes'].values[0])
        
        winner_name = str(state_winner['candidate'].values[0])
        winner_votes = int(state_winner['candidatevotes'].values[0])
        winner_party = str(state_winner['party'].values[0])
        if(winner_party == 'democratic-farmer-labor'): winner_party ='democrat'
        
        runnerup_name = str(state_runnerup['candidate'].values[0])
        runnerup_votes = int(state_runnerup['candidatevotes'].values[0])
        runnerup_party = str(state_runnerup['party'].values[0])
        if(runnerup_party == 'democratic-farmer-labor'): runnerup_party ='democrat'
        
        
        state_area = int(areas_df.loc[areas_df['State'] == state].values[0][1].replace(',', ''))
        state_population = populations_df.loc[populations_df['State'] == state][str(get_year(int(year)))].values[0]
        state_density = densities_df.loc[densities_df['State'] == state][str(get_year(int(year)))].values[0]
        
        
        slides[year][state] = election_info(winner_name, winner_votes, winner_party, total_votes, runnerup_name, runnerup_votes, runnerup_party, state_density, state_population, state_area)   

#####################################################################
        
# get total population, total densities, total area for each election year - may or may not be used later
state_population_totals = dict((year,None) for year in election_years)
state_density_totals = dict((year,None) for year in election_years)
state_area_totals = dict((year,None) for year in election_years)
for year, states in slides.items(): 
    pop_tot = 0
    area_tot = 0
    density_tot = 0
    for state, info in states.items():
        pop_tot+=info.state_population
        density_tot+=info.state_density
        area_tot+=info.state_area

    state_population_totals[year]=pop_tot
    state_density_totals[year]=density_tot
    state_area_totals[year]=area_tot


#####################################################################

# Plot the US election popular vote slider from 1976-2016
data_slider = []

scl_cus = [[0.0, '#0015BC'],[1.0, '#DE0100']]

for key, val in slides.items():  
      
   
    data_one_year = dict(
                        type='choropleth',
                        locations = state_codes,
                        z = [0 if x.winner_party == 'democrat' else 1 for x in val.values()],
                        locationmode='USA-states',
 
                        colorscale = scl_cus,
                        text =  [ str(str(y) + '<br>' + str(x.winner_name)+ '<br>' + str(x.winner_party).title() + '<br>' + 'Votes: ' + str("{:,}".format(x.winner_votes)) + " / " + str("{:,}".format(x.total_votes)) + '<br>Population: ' + str("{:,}".format(x.state_population)) + '<br>Density: ' + str("{:,} ppsm".format(x.state_density)) + '<br>Land Area: ' + str("{:,} mi²".format(x.state_area)) + '<br>')  for y,x in val.items()],
                        
                        showlegend = False,
                        showscale = False,

                        hovertemplate='%{text}<extra></extra>'
                        )
    data_slider.append(data_one_year)



steps = []

for i in range(len(data_slider)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label='Year {}'.format(i*4 + 1976)) # label to be displayed for each step (year)
    step['args'][1][i] = True
    steps.append(step)


sliders = [dict(active=10, pad={"t": 1}, steps=steps)]  



layout = dict(title = 'US Popular Vote Election Map 1976-2016',title_x=0.5, geo=dict(scope='usa',
                       projection={'type': 'albers usa'}),
              sliders=sliders)
fig = dict(data=data_slider, layout=layout) 


# Make plots directory if doesn't exist
import os
if not os.path.exists("plots"):
    os.mkdir("plots")
if not os.path.exists("plots/html_plots"):
    os.mkdir("plots/html_plots")
    
plotly.offline.iplot(fig)
offline.plot(fig, auto_open=True, filename='plots/html_plots/US Popular Vote Election Map 1976-2016.html', validate=True)


'plots/html_plots/US Popular Vote Election Map 1976-2016.html'

## Plot Vote Difference Between Winner and Runnerup and Vote Distribution

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import ipywidgets as widgets
from IPython.display import display
import seaborn as sns
import matplotlib.pyplot as plt

# winners for each election year
winners = {
    1976:"democrat",
    1980:"republican",
    1984:"republican",
    1988:"republican",
    1992:"democrat",
    1996:"democrat",
    2000:"republican",
    2004:"republican",
    2008:"democrat",
    2012:"democrat",
    2016:"republican" 
}

# Count up total votes for election winner(via checking dem/rep), runnerup total, other votes total, and total votes
winner_totals = {}
runnerup_totals = {}
other_totals = {}
total_votes = {}
for YEAR, STATES in slides.items():
    winner_total = 0
    runnerup_total = 0
    other_total = 0
    tvc = 0
    for state, election_info in STATES.items():
        if(winners[YEAR] == "democrat" and election_info.winner_party == "democrat"):
            winner_total += election_info.winner_votes
            runnerup_total +=election_info.runnerup_votes
        elif(winners[YEAR] == "republican" and election_info.winner_party == "republican"):
            winner_total += election_info.winner_votes
            runnerup_total +=election_info.runnerup_votes
        else:
            winner_total +=election_info.runnerup_votes
            runnerup_total += election_info.winner_votes
            
        other_total += election_info.total_votes - election_info.winner_votes - election_info.runnerup_votes
        tvc+=election_info.total_votes
        
    winner_totals[YEAR]=winner_total
    runnerup_totals[YEAR] = runnerup_total
    other_totals[YEAR] = other_total
    total_votes[YEAR] = tvc
    

# plot 1 - Plot vote difference between winner and runnerup for each election year
fig = plt.figure(1)

axdf = pd.DataFrame()
axdf['diff'] = [winner_totals[i] - runnerup_totals[i] for i in election_years]
ax = sns.lineplot(data=axdf,color='b')
ax.set_xticks(range(len(axdf)))
ax.set_xticklabels(election_years)
legend = ax.legend()
legend.texts[0].set_text("Difference")
ax.set(xlabel='Year', ylabel='Difference')
ax.axhline(0, ls='--', color='r')
plt.title("Vote Difference Between Winner and Runnerup")


import matplotlib.ticker as ticker
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '%1.1fM' % (x * 1e-6)))

plt.tight_layout()
plt.plot(1)
plt.savefig('plots/Vote Difference Between Winner and Runnerup From 1976-2016.png')
plt.show()



# plot 2 - Plot percentage distribution of votes for each election year
fig = plt.figure(2)

# Create subplots
ax1 = fig.add_subplot(341)
ax2 = fig.add_subplot(342)
ax3 = fig.add_subplot(343)
ax4 = fig.add_subplot(344)
ax5 = fig.add_subplot(345)
ax6 = fig.add_subplot(346)
ax7 = fig.add_subplot(347)
ax8 = fig.add_subplot(348)
ax9 = fig.add_subplot(349)
ax10 = fig.add_subplot(3,4,10)
ax11 = fig.add_subplot(3,4,11)
ax12 = fig.add_subplot(3,4,12)


labels = 'Winner', 'Runnerup', 'Other'
colors = 'tab:blue', 'tab:orange', 'tab:green'


sizes = [winner_totals[election_years[0]], runnerup_totals[election_years[0]], other_totals[election_years[0]]]
ax1.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax1.set_title(election_years[0])


sizes = [winner_totals[election_years[1]], runnerup_totals[election_years[1]], other_totals[election_years[1]]]
ax2.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax2.set_title(election_years[1])

sizes = [winner_totals[election_years[2]], runnerup_totals[election_years[2]], other_totals[election_years[2]]]
ax3.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax3.set_title(election_years[2])

# set ax4 as legend, no a plot
handles, labels = ax1.get_legend_handles_labels()
ax4.axis('off')
ax4.legend(handles, labels,
          title="Votes",
         loc="lower right",
          bbox_to_anchor=(.5, 0, 0.5, .5))

sizes = [winner_totals[election_years[3]], runnerup_totals[election_years[3]], other_totals[election_years[3]]]
ax5.pie(sizes, labels=labels, autopct='%1.1f%%',
         shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax5.set_title(election_years[3])

sizes = [winner_totals[election_years[4]], runnerup_totals[election_years[4]], other_totals[election_years[4]]]
ax6.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax6.set_title(election_years[4])

sizes = [winner_totals[election_years[5]], runnerup_totals[election_years[5]], other_totals[election_years[5]]]
ax7.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax7.set_title(election_years[5])

sizes = [winner_totals[election_years[6]], runnerup_totals[election_years[6]], other_totals[election_years[6]]]
ax8.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax8.set_title(election_years[6])

sizes = [winner_totals[election_years[7]], runnerup_totals[election_years[7]], other_totals[election_years[7]]]
ax9.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax9.set_title(election_years[7])

sizes = [winner_totals[election_years[8]], runnerup_totals[election_years[8]], other_totals[election_years[8]]]
ax10.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax10.set_title(election_years[8])

sizes = [winner_totals[election_years[9]], runnerup_totals[election_years[9]], other_totals[election_years[9]]]
ax11.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax11.set_title(election_years[9])

sizes = [winner_totals[election_years[10]], runnerup_totals[election_years[10]], other_totals[election_years[10]]]
ax12.pie(sizes, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90, textprops={'fontsize': 6}, colors=colors)
ax12.set_title(election_years[10])



fig.suptitle('Vote Distribution From 1976-2016', fontsize=16)

plt.tight_layout()
plt.plot(2)
plt.savefig('plots/Vote Distribution From 1976-2016.png')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Plot Total Votes, Winner Votes, and Runnerup Votes Over The Years

In [3]:

# plot 3 - Total Votes over the years
fig = plt.figure(3)

axdf = pd.DataFrame()
axdf['Year'] = total_votes.keys()
axdf['Total Votes'] = total_votes.values()
ax = sns.barplot(data=axdf, x='Year', y='Total Votes', color='#f77f00')
plt.title("Total Votes From 1976-2016")
import matplotlib.ticker as ticker
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '%1.1fM' % (x * 1e-6)))
plt.plot(3)
plt.savefig('plots/Total Votes From 1976-2016.png')
plt.show()

# plot 4 - Total Winner votes over the years
fig = plt.figure(4)

axdf = pd.DataFrame()
axdf['Year'] = winner_totals.keys()
axdf['Winner Votes'] = winner_totals.values()
ax = sns.barplot(data=axdf, x='Year', y='Winner Votes', color='#f77f00')
plt.title("Winner's Votes From 1976-2016")
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '%1.1fM' % (x * 1e-6)))
plt.plot(4)
plt.savefig('plots/Winner\'s Votes From 1976-2016.png')
plt.show()

# plot 5 - total runnerup votes over the years
fig = plt.figure(5)

axdf = pd.DataFrame()
axdf['Year'] = runnerup_totals.keys()
axdf['Runnerup Votes'] = runnerup_totals.values()
ax = sns.barplot(data=axdf, x='Year', y='Runnerup Votes', color='#f77f00')
plt.title("Runnerup's Votes From 1976-2016")
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '%1.1fM' % (x * 1e-6)))
plt.plot(5)
plt.savefig('plots/Runnerup\'s Votes From 1976-2016.png')
plt.show()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Plot State Leanings Based on Total Votes

In [4]:
# sum total states votes for each party and classify via majority vote 
from collections import OrderedDict 
state_leans = OrderedDict((state,[0,0]) for state in states)

# state_leans[state][0] is dem [1] is rep
for YEAR, STATES in slides.items():
    for state, election_info in STATES.items():
        if(election_info.winner_party == "democrat"):
            state_leans[state][0] += election_info.winner_votes
            state_leans[state][1] += election_info.runnerup_votes
        else:
            state_leans[state][0] += election_info.runnerup_votes
            state_leans[state][1] += election_info.winner_votes
            
            
# plot US map with state leanings from 1976 - 2016 (which party had more overall total votes)
data = dict(
                        type='choropleth',
                        locations = state_codes,

                        z = [0 if x[0] > x[1] else 1 for x in state_leans.values()],
                        locationmode='USA-states',

 
                        colorscale = scl_cus,
                        text =  [ str(str(y) + '<br>' + 'Democratic Lean <br>Dem: ' + str("{:,}".format(x[0]))+ ' votes' + '<br>' + "Rep: " + str("{:,}".format(x[1])) + " votes") if x[0] > x[1] else str( str(y) + '<br>' 'Republican Lean <br>Rep: ' + str("{:,}".format(x[1]))+ ' votes' + '<br>' + "Dem: " + str("{:,}".format(x[0])) + " votes")  for y,x in state_leans.items()],
                        
                        showlegend = False,
                        showscale = False,
    
                        hovertemplate='%{text}<extra></extra>'
                        
                        )
    


layout = dict(
        title = 'State Leanings(Comparing Total Votes) 1976-2016', title_x =.5,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ), 
            ),
             )
fig = dict( data=data, layout=layout )
plotly.offline.iplot(fig)
offline.plot(fig, auto_open=True, filename='plots/html_plots/State Leanings(Comparing Total Votes) 1976-2016.html', validate=True)



'plots/html_plots/State Leanings(Comparing Total Votes) 1976-2016.html'

## Plot State Leanings Based on Comparing Elections Won

In [5]:
# sum states vots over time for each party and classify them as rep or dem
from collections import OrderedDict 

# state_leans_elections[state][0] is dem [1] is rep
state_leans_elections = OrderedDict((state,[0,0]) for state in states)

for YEAR, STATES in slides.items():
    for state, election_info in STATES.items():
        if(election_info.winner_party == "democrat"):
            state_leans_elections[state][0] += 1
        else:
            state_leans_elections[state][1] += 1

# plot US map with state leanings from 1976 - 2016 (which party won more elections for that state)
data = dict(
                        type='choropleth',
                        locations = state_codes,

                        z = [0 if x[0] > x[1] else 1 for x in state_leans_elections.values()],
                        locationmode='USA-states',

 
                        colorscale = scl_cus,
                        text =  [ str(str(y) + '<br>' + 'Democratic Lean <br>Dem: ' + str("{:,}".format(x[0]))+ ' elections' + '<br>' + "Rep: " + str("{:,}".format(x[1])) + " elections") if x[0] > x[1] else str( str(y) + '<br>' 'Republican Lean <br>Rep: ' + str("{:,}".format(x[1]))+ ' elections' + '<br>' + "Dem: " + str("{:,}".format(x[0])) + " elections")  for y,x in state_leans_elections.items()],
                        
                        showlegend = False,
                        showscale = False,
                        hovertemplate='%{text}<extra></extra>'
                        )
    


layout = dict(
        title = 'State Leanings(Comparing Number of Elections Won) 1976-2016', title_x =.5,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ), 
            ),
             )
fig = dict( data=data, layout=layout )
plotly.offline.iplot(fig)
offline.plot(fig, auto_open=True, filename='plots/html_plots/State Leanings(Comparing Number of Elections Won) 1976-2016.html', validate=True)



'plots/html_plots/State Leanings(Comparing Number of Elections Won) 1976-2016.html'

## Plot Voting Metrics

In [6]:
import plotly as px
import plotly.graph_objects as go

fig = go.Figure(data=[
    go.Bar(name='Estimated Population', x=election_years, y=[state_population_totals[yr] for yr in election_years]),
    go.Bar(name='Voter Turnout', x=election_years, y=[total_votes[yr] for yr in election_years]),
    go.Bar(name='Winner Votes', x=election_years, y=[winner_totals[yr] for yr in election_years]),
    go.Bar(name='Runnerup Votes', x=election_years, y=[runnerup_totals[yr] for yr in election_years]),
    go.Bar(name='Other Votes', x=election_years, y=[other_totals[yr] for yr in election_years])
    
])
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', title="Voting Metrics From 1976-2016",title_x=0.5,xaxis_title="Year",
yaxis_title="Votes")

fig.update_xaxes( tickvals = list(range(1976,2017,4)) ) 

    
fig.show()

## Plot Total Elections Won By Party for Each State

In [7]:
#state_leans

import plotly as px
import plotly.graph_objects as go


#List meant to hold "Democrat" or "Republican" labels for each state
#elections won each year, used for a linear regression
#Y = []
#Y2 = []


# 0 is dem 1 is rep
state_lean_by_winners = OrderedDict((state,[0,0]) for state in states)

for YEAR, STATES in slides.items():
    for state, election_info in STATES.items():
        if(election_info.winner_party == "democrat"):
            state_lean_by_winners[state][0] += 1
            #appends 0 if democrat
            #Y.append(0)
        else:
            state_lean_by_winners[state][1] += 1
            #appends 1 if republican
            #Y.append(1)


fig = go.Figure(data=[
    go.Bar(name='Democratic Elections Won', x=state_codes, y=[x[0] for x in state_lean_by_winners.values()]),
    go.Bar(name='Republican Elections Won', x=state_codes, y=[x[1] for x in state_lean_by_winners.values()]),
    
    
])
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', title="Total Elections Won by Party vs State 1976-2016",title_x=0.5)
fig['layout']['yaxis1'].update( range=[0, 12], dtick=1, autorange=False)
fig['layout']['xaxis1'].update(tickmode='linear')
fig.update_layout(
    margin=dict(l=30, r=30, t=30, b=30),
    xaxis_title="State",
    yaxis_title="Number of Elections Won"

)
fig.show()
# download these plots from jupyter since plotly write_image messes up plot 


## Plot Total Votes by Party for Each State

In [8]:
import plotly as px
import plotly.graph_objects as go


fig = go.Figure(data=[
    go.Bar(name='Total Democrat Votes', x=state_codes, y=[x[0] for x in state_leans.values()]),
    go.Bar(name='Total Republican Votes', x=state_codes, y=[x[1] for x in state_leans.values()]),
    
])
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', title="Total Votes by Party vs State 1976-2016",title_x=0.5)

fig['layout']['xaxis1'].update(tickmode='linear')
fig.update_layout(
    margin=dict(l=30, r=30, t=30, b=30),
    xaxis_title="State",
    yaxis_title="Total Votes"
)
fig.show()


## Plot Population vs Election Results

In [9]:
import plotly.graph_objects as go
import numpy as np
import plotly.express as px

# Create figure
fig = go.Figure()

# Add traces, one for each slider step
for year in election_years:
    state_decisions = []
    state_pops = []

    for state, election_info in slides[year].items():
        state_decisions.append(election_info.winner_party.title())
        state_pops.append(election_info.state_population)

    
    fig.add_trace(
        go.Scatter(visible=False, x=state_codes, y=state_pops,mode='markers',marker_color=['#0015BC' if x == 'Democrat' else '#DE0100' for x in state_decisions], customdata=state_decisions))

fig.data[0].visible = True
fig.update_traces(hovertemplate='%{x}<br>Pop: %{y}<br>%{customdata}<extra></extra>')

# Create and add slider
steps = []

for i,yr in zip(range(len(fig.data)), election_years):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Population vs Election Results - " + str(yr)},{'title_x':0.5}],
        label='Year {}'.format(i*4 + 1976),# layout attribute
       
    )
    step["args"][0]["visible"][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=0,
    pad={"t": 50},

    steps=steps
)]

fig.update_layout(
     yaxis=dict(range=[0,40000000]),
    title= "Population vs Election Results - " + str(1976), title_x=.5,
    sliders=sliders,
    xaxis_title="State",
    yaxis_title="Population"
)

fig.show()
offline.plot(fig, auto_open=True, filename='plots/html_plots/Population vs Election Results.html', validate=True)


'plots/html_plots/Population vs Election Results.html'

## Plot Density vs Election Results

In [10]:
import plotly.graph_objects as go
import numpy as np
import plotly.express as px

# Create figure
fig = go.Figure()

# Add traces, one for each slider step
for year in election_years:
    state_decisions = []
    state_dens = []

    for state, election_info in slides[year].items():
        state_decisions.append(election_info.winner_party.title())
        state_dens.append(election_info.state_density)
   

    
    fig.add_trace(
        go.Scatter(visible=False, x=state_codes, y=state_dens,mode='markers',marker_color=['#0015BC' if x == 'Democrat' else '#DE0100' for x in state_decisions], customdata=state_decisions))

fig.data[0].visible = True
fig.update_traces(hovertemplate='%{x}<br>Density: %{y}<br>%{customdata}<extra></extra>')


# Create and add slider
steps = []
for i,yr in zip(range(len(fig.data)), election_years):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Density vs Election Results - " + str(yr)},{'title_x':0.5}],
        label='Year {}'.format(i*4 + 1976),# layout attribute,
       
    )
    step["args"][0]["visible"][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=0,
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
     yaxis=dict(range=[0,1500]),
    title= "Density vs Election Results - " + str(1976), title_x=.5,
    sliders=sliders,
    xaxis_title="State",
    yaxis_title="Density (ppsm)"
)

fig.show()
offline.plot(fig, auto_open=True, filename='plots/html_plots/Density vs Election Results.html', validate=True)

'plots/html_plots/Density vs Election Results.html'

## Plot Area vs Overall State Lean (via Total Votes)

In [11]:
import plotly.express as px
axdf = pd.DataFrame()
axdf['party'] = ['Democrat' if x[0] > x[1] else 'Republican' for x in state_leans.values()]
axdf['states'] = state_codes
axdf['text'] = states
axdf['area'] = [int(areas_df.loc[areas_df['State'] == state].values[0][1].replace(',', '')) for state in states]

fig = px.strip(axdf, x='states', y='area', color='party', color_discrete_map={
                                'Democrat':'#0015BC',
                                'Republican':'#DE0100'},category_orders = {'states':state_codes}, title='Land Area vs Overeall State Lean(Total Votes)')
fig.update_layout(
    xaxis_type = 'category',
    xaxis_title="State",
    yaxis_title="Land Area (mi²)",
    title_x=.5
)

fig.show()


In [12]:

import pandas as pd
import random
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from matplotlib import pyplot

# This is where we put density, area, and population into a df
pdf = pd.read_csv("plots/State_Populations.csv")
sdf = pd.read_csv("plots/State_Densities.csv")
adf = pd.read_csv("plots/Areas.csv")

dens1970 = []
dens1980 = []
dens1990 = []
dens2000 = []
dens2010 = []

statePop1970 = []
statePop1980 = []
statePop1990 = []
statePop2000 = []
statePop2010 = []

area = []
stateName = []

for i in range(0, len(pdf)):
    statePop1970.append(pdf.iloc[i, 0])
    statePop1980.append(pdf.iloc[i, 1])
    statePop1990.append(pdf.iloc[i, 2])
    statePop2000.append(pdf.iloc[i, 3])
    statePop2010.append(pdf.iloc[i, 4])

    dens1970.append(sdf.iloc[i, 0])
    dens1980.append(sdf.iloc[i, 1])
    dens1990.append(sdf.iloc[i, 2])
    dens2000.append(sdf.iloc[i, 3])
    dens2010.append(sdf.iloc[i, 4])

    area.append(adf.iloc[i, 0])

    stateName.append(adf.iloc[i, 1])

s1970 = {
    'statePop': statePop1970,
    'stateDens': dens1970,
    'area': area,
    'stateName': stateName
}

s1980 = {
    'statePop': statePop1980,
    'stateDens': dens1980,
    'area': area,
    'stateName': stateName
}
s1990 = {
    'statePop': statePop1990,
    'stateDens': dens1990,
    'area': area,
    'stateName': stateName
}
s2000 = {
    'statePop': statePop2000,
    'stateDens': dens2000,
    'area': area,
    'stateName': stateName
}
s2010 = {
    'statePop': statePop2010,
    'stateDens': dens2010,
    'area': area,
    'stateName': stateName
}

df1970 = pd.DataFrame(s1970, columns=['statePop', 'stateDens', 'area', 'stateName'])
df1980 = pd.DataFrame(s1980, columns=['statePop', 'stateDens', 'area', 'stateName'])
df1990 = pd.DataFrame(s1990, columns=['statePop', 'stateDens', 'area', 'stateName'])
df2000 = pd.DataFrame(s2000, columns=['statePop', 'stateDens', 'area', 'stateName'])
df2010 = pd.DataFrame(s2010, columns=['statePop', 'stateDens', 'area', 'stateName'])

# Change the following code to include every election using the corresponding winner values
#1 = republican state
#0 = democrat state

Y = []
#Y = [1,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,1,1,1,0,0,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,1,0,1]
Y2 = []

#print("length of Y = {p1}".format(p1 = len(Y)))

#print("len of Y: {p1}".format(p1 = len(Y)))
Y2 = Y
#print("len of Y2: {p1}".format(p1 = len(Y2)))

for year in election_years:
    print("YEAR: {p1}".format(p1 = year))

    #resets Y and Y2
    Y = []
    Y2 = []

    for state, election_info in slides[year].items():
        #print("\tstate: {p1}, winner_party: {p2}".format(p1 = state,p2 = election_info.winner_party.title()))

        if election_info.winner_party.title() == "Republican":
            #appends Republican as a y value
            Y.append(50)
        else:
            #appends Democrat as a y value
            Y.append(0)
            
        Y2 = Y
    # 0 is dem 1 is rep

    X_train = df1970.iloc[:, 0:3]
    #X_train = df2010.iloc[:, 0:3]

    #print("X_train length: {p1}".format(p1 = len(X_train)))

    if year < 1980:
        X_train = df1970.iloc[:, 0:3]
    elif year < 1990:
        X_train = df1980.iloc[:, 0:3]
    elif year < 2000:
        X_train = df1990.iloc[:, 0:3]
    elif year < 2010:
        X_train = df2000.iloc[:, 0:3]
    else:
        X_train = df2010.iloc[:, 0:3]

    #linear regression fit
    regr = linear_model.LinearRegression()
    regr.fit(X_train, Y)


    importance = regr.coef_
    for i,v in enumerate(importance):
	    print('Feature: %0d, Score: %.5f' % (i,v))

    # plot feature importance
    pyplot.bar([x for x in range(len(importance))], importance)
    pyplot.show()

    X_test = df1980.iloc[:, 0:3]
    y_pred = regr.predict(X_test)


    print('Coefficients: \n', regr.coef_)
    # The mean squared error
    print('Mean squared error: %.2f'
        % mean_squared_error(Y2, y_pred))
    # The coefficient of determination: 1 is perfect prediction
    print('Coefficient of determination: %.2f'
        % r2_score(Y2, y_pred))
    
    #whitespace buffer print
    print()

YEAR: 1976
Feature: 0, Score: -0.00000
Feature: 1, Score: -0.00358
Feature: 2, Score: 0.00007
Coefficients: 
 [-1.21184727e-06 -3.57633773e-03  6.79342545e-05]
Mean squared error: 542.32
Coefficient of determination: 0.13

YEAR: 1980
Feature: 0, Score: -0.00000
Feature: 1, Score: 0.00067
Feature: 2, Score: 0.00005
Coefficients: 
 [-4.91703447e-08  6.69362733e-04  4.64781603e-05]
Mean squared error: 315.63
Coefficient of determination: 0.05

YEAR: 1984
Feature: 0, Score: 0.00000
Feature: 1, Score: 0.00033
Feature: 2, Score: 0.00001
Coefficients: 
 [1.86861294e-07 3.34377262e-04 8.03679135e-06]
Mean squared error: 92.89
Coefficient of determination: 0.01

YEAR: 1988
Feature: 0, Score: 0.00000
Feature: 1, Score: 0.00107
Feature: 2, Score: 0.00005
Coefficients: 
 [8.95289106e-08 1.06818310e-03 4.63605022e-05]
Mean squared error: 407.32
Coefficient of determination: 0.04

YEAR: 1992
Feature: 0, Score: -0.00000
Feature: 1, Score: -0.00204
Feature: 2, Score: 0.00009
Coefficients: 
 [-6.182642