In [1]:
import pandas as pd

# Extraction of count and type of votes

In [2]:
df = pd.read_csv('Resources/HouseVotesCountedByDivisionDownload-20499.csv', header=1)

In [3]:
df.head()

Unnamed: 0,DivisionID,DivisionNm,StateAb,Enrolment,OrdinaryVotes,AbsentVotes,ProvisionalVotes,PrePollVotes,PostalVotes,TotalVotes,TotalPercentage
0,179,Adelaide,SA,109217,79148,6869,686,3444,8515,98662,90.34
1,197,Aston,VIC,96043,72395,3756,292,3090,9578,89111,92.78
2,198,Ballarat,VIC,110755,87377,3439,783,3118,8451,103168,93.15
3,103,Banks,NSW,104891,81420,4109,469,3395,7095,96488,91.99
4,180,Barker,SA,105600,85870,3019,245,2877,6662,98673,93.44


In [4]:
df.columns

Index(['DivisionID', 'DivisionNm', 'StateAb', 'Enrolment', 'OrdinaryVotes',
       'AbsentVotes', 'ProvisionalVotes', 'PrePollVotes', 'PostalVotes',
       'TotalVotes', 'TotalPercentage'],
      dtype='object')

In [5]:
df.dtypes

DivisionID            int64
DivisionNm           object
StateAb              object
Enrolment             int64
OrdinaryVotes         int64
AbsentVotes           int64
ProvisionalVotes      int64
PrePollVotes          int64
PostalVotes           int64
TotalVotes            int64
TotalPercentage     float64
dtype: object

In [6]:
# checking for duplicates
df['DivisionID'].is_unique

True

In [7]:
# drop n/a
# df = df.dropna(how = 'any' , inplace = True)
# df.head()

# Transforming the dataframe to look at the turnout of the 2016 federal election

In [8]:
# columns we want to keep
columns = ['DivisionID','DivisionNm', 'StateAb', 'Enrolment', 'TotalVotes', 'TotalPercentage']
turnout_df = df[columns]
turnout_df

Unnamed: 0,DivisionID,DivisionNm,StateAb,Enrolment,TotalVotes,TotalPercentage
0,179,Adelaide,SA,109217,98662,90.34
1,197,Aston,VIC,96043,89111,92.78
2,198,Ballarat,VIC,110755,103168,93.15
3,103,Banks,NSW,104891,96488,91.99
4,180,Barker,SA,105600,98673,93.44
...,...,...,...,...,...,...
145,153,Werriwa,NSW,108557,97942,90.22
146,150,Whitlam,NSW,112051,104069,92.88
147,178,Wide Bay,QLD,102856,94617,91.99
148,234,Wills,VIC,113851,100915,88.64


In [9]:
# renaming columns
turnout_df.columns = ['division_id', 'electoral_division', 'state', 'no_enrolled', 'total_votes', 'turnout%']
turnout_df.head()

Unnamed: 0,division_id,electoral_division,state,no_enrolled,total_votes,turnout%
0,179,Adelaide,SA,109217,98662,90.34
1,197,Aston,VIC,96043,89111,92.78
2,198,Ballarat,VIC,110755,103168,93.15
3,103,Banks,NSW,104891,96488,91.99
4,180,Barker,SA,105600,98673,93.44


In [10]:
# not sure if we want to do set the division_id as index
turnout_df = turnout_df.set_index("division_id")
turnout_df.head()

Unnamed: 0_level_0,electoral_division,state,no_enrolled,total_votes,turnout%
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
179,Adelaide,SA,109217,98662,90.34
197,Aston,VIC,96043,89111,92.78
198,Ballarat,VIC,110755,103168,93.15
103,Banks,NSW,104891,96488,91.99
180,Barker,SA,105600,98673,93.44


In [11]:
# do we want to order this in the same way as the postal survey divisions NSW, VIC, QLD, SA, WA, TAS, NT, ACT
turnout_df['state'].unique()

array(['SA', 'VIC', 'NSW', 'TAS', 'QLD', 'WA', 'ACT', 'NT'], dtype=object)

In [13]:
# export this, to a folder named same as file '02-...' to follow same format as Ryan's repos
turnout_df.to_csv("02-output_2016_fedelection_turnout_votetype/fedelect_turnout.csv")

# Transforming the dataframe to look at the vote types of the 2016 federal election

In [14]:
# columns we want to keep
columns = ['DivisionID','DivisionNm', 'StateAb', 'OrdinaryVotes', 'AbsentVotes', 'ProvisionalVotes','PrePollVotes','PostalVotes','TotalVotes']
votetypes_df = df[columns]
votetypes_df

Unnamed: 0,DivisionID,DivisionNm,StateAb,OrdinaryVotes,AbsentVotes,ProvisionalVotes,PrePollVotes,PostalVotes,TotalVotes
0,179,Adelaide,SA,79148,6869,686,3444,8515,98662
1,197,Aston,VIC,72395,3756,292,3090,9578,89111
2,198,Ballarat,VIC,87377,3439,783,3118,8451,103168
3,103,Banks,NSW,81420,4109,469,3395,7095,96488
4,180,Barker,SA,85870,3019,245,2877,6662,98673
...,...,...,...,...,...,...,...,...,...
145,153,Werriwa,NSW,86095,4673,508,1946,4720,97942
146,150,Whitlam,NSW,93549,3597,358,1991,4574,104069
147,178,Wide Bay,QLD,79037,3177,131,3560,8712,94617
148,234,Wills,VIC,79320,5864,374,5684,9673,100915


In [15]:
# renaming columns
votetypes_df.columns = ['division_id', 'electoral_division', 'state', 'ordinary_votes', 'absent_votes', 'provisional_votes', 'prepoll_votes', 'postal_votes', 'total_votes']
votetypes_df.head()

Unnamed: 0,division_id,electoral_division,state,ordinary_votes,absent_votes,provisional_votes,prepoll_votes,postal_votes,total_votes
0,179,Adelaide,SA,79148,6869,686,3444,8515,98662
1,197,Aston,VIC,72395,3756,292,3090,9578,89111
2,198,Ballarat,VIC,87377,3439,783,3118,8451,103168
3,103,Banks,NSW,81420,4109,469,3395,7095,96488
4,180,Barker,SA,85870,3019,245,2877,6662,98673


In [16]:
# not sure if we want to do set the division_id as index
votetypes_df = votetypes_df.set_index("division_id")
votetypes_df.head()

Unnamed: 0_level_0,electoral_division,state,ordinary_votes,absent_votes,provisional_votes,prepoll_votes,postal_votes,total_votes
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
179,Adelaide,SA,79148,6869,686,3444,8515,98662
197,Aston,VIC,72395,3756,292,3090,9578,89111
198,Ballarat,VIC,87377,3439,783,3118,8451,103168
103,Banks,NSW,81420,4109,469,3395,7095,96488
180,Barker,SA,85870,3019,245,2877,6662,98673


In [18]:
# export this, to a folder named same as file '02-...' to follow same format as Ryan's repos
votetypes_df.to_csv("02-output_2016_fedelection_turnout_votetype/fedelect_votetypes.csv")