# Election Analysis

## Import dependencies and data files

In [2]:
#Import dependencies
import pandas as pd
import numpy as np

In [3]:
#import data files from CSV file in module and as a variable.
election_df = pd.read_csv('../Resources/election_data.csv')

In [4]:
#check dataframe was created correctly and all information was translated to jupyter notebook.
election_df

Unnamed: 0,Ballot ID,County,Candidate
0,1323913,Jefferson,Charles Casper Stockham
1,1005842,Jefferson,Charles Casper Stockham
2,1880345,Jefferson,Charles Casper Stockham
3,1600337,Jefferson,Charles Casper Stockham
4,1835994,Jefferson,Charles Casper Stockham
...,...,...,...
369706,4714953,Arapahoe,Raymon Anthony Doane
369707,4497542,Arapahoe,Raymon Anthony Doane
369708,4085849,Arapahoe,Raymon Anthony Doane
369709,4592018,Arapahoe,Raymon Anthony Doane


## Clean Data For Further Analysis

In [5]:
#analyze the records to calculate each of the required values.
#First, the total number of votes cast. 

election_df.sort_values('Ballot ID').count()

Ballot ID    369711
County       369711
Candidate    369711
dtype: int64

In [6]:
#Show a better visualization of the total amount of months as a pivot table. 
election_df.pivot_table('Ballot ID', 'Candidate')

Unnamed: 0_level_0,Ballot ID
Candidate,Unnamed: 1_level_1
Charles Casper Stockham,2461479.0
Diana DeGette,6055045.0
Raymon Anthony Doane,3372827.0


In [60]:
#Show a complete list of candidates who received votes.
election_df.sort_values('Candidate')

Unnamed: 0,Ballot ID,County,Candidate,% Percent Change
0,1323913,Jefferson,Charles Casper Stockham,
67245,2871495,Denver,Charles Casper Stockham,43.0
67244,2002887,Denver,Charles Casper Stockham,-24.0
67243,2650977,Denver,Charles Casper Stockham,9.0
67242,2426080,Denver,Charles Casper Stockham,3.0
...,...,...,...,...
339357,3253513,Denver,Raymon Anthony Doane,-17.0
339358,3572325,Denver,Raymon Anthony Doane,10.0
339359,3115551,Denver,Raymon Anthony Doane,-13.0
339350,3704663,Denver,Raymon Anthony Doane,15.0


In [17]:
#Percent change by candidate.

election_df['% Percent Change'] = election_df['Ballot ID'].pct_change()
election_df['% Percent Change'] = election_df['% Percent Change'].round(2)
election_df['% Percent Change'] = election_df['% Percent Change'] *100
election_df

Unnamed: 0,Ballot ID,County,Candidate,% Percent Change
0,1323913,Jefferson,Charles Casper Stockham,
1,1005842,Jefferson,Charles Casper Stockham,-24.0
2,1880345,Jefferson,Charles Casper Stockham,87.0
3,1600337,Jefferson,Charles Casper Stockham,-15.0
4,1835994,Jefferson,Charles Casper Stockham,15.0
...,...,...,...,...
369706,4714953,Arapahoe,Raymon Anthony Doane,2.0
369707,4497542,Arapahoe,Raymon Anthony Doane,-5.0
369708,4085849,Arapahoe,Raymon Anthony Doane,-9.0
369709,4592018,Arapahoe,Raymon Anthony Doane,12.0


In [31]:
# Groupby columns Candidate and Ballot ID to show total number of votes each candidate won.

election_df.groupby(['Candidate'])['Ballot ID'].sum()


Candidate
Charles Casper Stockham     209750048043
Diana DeGette              1652373440459
Raymon Anthony Doane         39145031540
Name: Ballot ID, dtype: int64

In [54]:
# Show the total votes per percentage of each candidate.
df2 = election_df.groupby(['Candidate', '% Percent Change']).agg({'% Percent Change': 'sum'})
print(df2)


                                          % Percent Change
Candidate               % Percent Change                  
Charles Casper Stockham -50.0                       -100.0
                        -49.0                       -637.0
                        -48.0                      -1440.0
                        -47.0                      -1974.0
                        -46.0                      -3036.0
...                                                    ...
Raymon Anthony Doane     85.0                        170.0
                         86.0                        172.0
                         87.0                         87.0
                         88.0                        176.0
                         93.0                         93.0

[434 rows x 1 columns]


In [55]:
# Percentage by lambda and election_df.apply() method. 

df3 = df2.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
print(df3)

                                          % Percent Change
Candidate               % Percent Change                  
Charles Casper Stockham -50.0                    -0.062135
                        -49.0                    -0.395800
                        -48.0                    -0.894743
                        -47.0                    -1.226544
                        -46.0                    -1.886417
...                                                    ...
Raymon Anthony Doane     85.0                     1.465391
                         86.0                     1.482631
                         87.0                     0.749935
                         88.0                     1.517111
                         93.0                     0.801655

[434 rows x 1 columns]
