In [26]:
import pandas as pd

# Extracting enrolment division info and turnout data from CSV

In [27]:
df = pd.read_csv('Resources/HouseVotesCountedByDivisionDownload-20499.csv', header=1)

In [28]:
df.head()

Unnamed: 0,DivisionID,DivisionNm,StateAb,Enrolment,OrdinaryVotes,AbsentVotes,ProvisionalVotes,PrePollVotes,PostalVotes,TotalVotes,TotalPercentage
0,179,Adelaide,SA,109217,79148,6869,686,3444,8515,98662,90.34
1,197,Aston,VIC,96043,72395,3756,292,3090,9578,89111,92.78
2,198,Ballarat,VIC,110755,87377,3439,783,3118,8451,103168,93.15
3,103,Banks,NSW,104891,81420,4109,469,3395,7095,96488,91.99
4,180,Barker,SA,105600,85870,3019,245,2877,6662,98673,93.44


# Transforming the dataframe to look at the turnout of the 2016 federal election

In [29]:
df.columns

Index(['DivisionID', 'DivisionNm', 'StateAb', 'Enrolment', 'OrdinaryVotes',
       'AbsentVotes', 'ProvisionalVotes', 'PrePollVotes', 'PostalVotes',
       'TotalVotes', 'TotalPercentage'],
      dtype='object')

In [30]:
df.dtypes

DivisionID            int64
DivisionNm           object
StateAb              object
Enrolment             int64
OrdinaryVotes         int64
AbsentVotes           int64
ProvisionalVotes      int64
PrePollVotes          int64
PostalVotes           int64
TotalVotes            int64
TotalPercentage     float64
dtype: object

In [31]:
# checking for duplicates
df['DivisionID'].is_unique

True

In [32]:
len(df)

150

In [33]:
# dropping any na values 
df = df.dropna()
len(df)

150

In [34]:
# columns we want to keep
columns = ['DivisionID', 'Enrolment', 'TotalVotes', 'TotalPercentage']
turnout_df = df[columns]
turnout_df

Unnamed: 0,DivisionID,Enrolment,TotalVotes,TotalPercentage
0,179,109217,98662,90.34
1,197,96043,89111,92.78
2,198,110755,103168,93.15
3,103,104891,96488,91.99
4,180,105600,98673,93.44
...,...,...,...,...
145,153,108557,97942,90.22
146,150,112051,104069,92.88
147,178,102856,94617,91.99
148,234,113851,100915,88.64


In [35]:
# renaming columns
turnout_df.columns = ['division_id', 'total_enrolled', 'total_votes', 'turnout%']
turnout_df.head()

Unnamed: 0,division_id,total_enrolled,total_votes,turnout%
0,179,109217,98662,90.34
1,197,96043,89111,92.78
2,198,110755,103168,93.15
3,103,104891,96488,91.99
4,180,105600,98673,93.44


In [36]:
# set the division_id as index
turnout_df = turnout_df.set_index("division_id")
turnout_df.head()

Unnamed: 0_level_0,total_enrolled,total_votes,turnout%
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
179,109217,98662,90.34
197,96043,89111,92.78
198,110755,103168,93.15
103,104891,96488,91.99
180,105600,98673,93.44


In [37]:
turnout_df.describe()

Unnamed: 0,total_enrolled,total_votes,turnout%
count,150.0,150.0,150.0
mean,104477.006667,95080.106667,90.943867
std,10054.470041,9944.933948,2.5808
min,63131.0,46525.0,73.7
25%,100363.25,90801.75,89.7225
50%,104874.0,95891.5,91.67
75%,109182.0,99188.75,92.625
max,143231.0,132310.0,94.19


# Export df to csv

In [38]:
turnout_df.to_csv("04-ouput_election_turnout/election_turnout.csv")