In [4]:
import pandas as pd

# Extracting enrolment division info and vote types data from CSV

In [5]:
df = pd.read_csv('Resources/HouseVotesCountedByDivisionDownload-20499.csv', header=1)

In [6]:
df.head()

Unnamed: 0,DivisionID,DivisionNm,StateAb,Enrolment,OrdinaryVotes,AbsentVotes,ProvisionalVotes,PrePollVotes,PostalVotes,TotalVotes,TotalPercentage
0,179,Adelaide,SA,109217,79148,6869,686,3444,8515,98662,90.34
1,197,Aston,VIC,96043,72395,3756,292,3090,9578,89111,92.78
2,198,Ballarat,VIC,110755,87377,3439,783,3118,8451,103168,93.15
3,103,Banks,NSW,104891,81420,4109,469,3395,7095,96488,91.99
4,180,Barker,SA,105600,85870,3019,245,2877,6662,98673,93.44


# Transforming the dataframe to look at the vote types of the 2016 federal election

In [7]:
df.columns

Index(['DivisionID', 'DivisionNm', 'StateAb', 'Enrolment', 'OrdinaryVotes',
       'AbsentVotes', 'ProvisionalVotes', 'PrePollVotes', 'PostalVotes',
       'TotalVotes', 'TotalPercentage'],
      dtype='object')

In [8]:
df.dtypes

DivisionID            int64
DivisionNm           object
StateAb              object
Enrolment             int64
OrdinaryVotes         int64
AbsentVotes           int64
ProvisionalVotes      int64
PrePollVotes          int64
PostalVotes           int64
TotalVotes            int64
TotalPercentage     float64
dtype: object

In [9]:
# checking for duplicates
df['DivisionID'].is_unique

True

In [10]:
len(df)

150

In [11]:
# dropping any na values 
df = df.dropna()
len(df)

150

In [12]:
# columns we want to keep
columns = ['DivisionID', 'OrdinaryVotes', 'AbsentVotes', 'ProvisionalVotes','PrePollVotes','PostalVotes','TotalVotes']
votetypes_df = df[columns]
votetypes_df

Unnamed: 0,DivisionID,OrdinaryVotes,AbsentVotes,ProvisionalVotes,PrePollVotes,PostalVotes,TotalVotes
0,179,79148,6869,686,3444,8515,98662
1,197,72395,3756,292,3090,9578,89111
2,198,87377,3439,783,3118,8451,103168
3,103,81420,4109,469,3395,7095,96488
4,180,85870,3019,245,2877,6662,98673
...,...,...,...,...,...,...,...
145,153,86095,4673,508,1946,4720,97942
146,150,93549,3597,358,1991,4574,104069
147,178,79037,3177,131,3560,8712,94617
148,234,79320,5864,374,5684,9673,100915


In [13]:
# renaming columns
votetypes_df.columns = ['division_id', 'ordinary_votes', 'absent_votes', 'provisional_votes', 'prepoll_votes', 'postal_votes', 'total_votes']
votetypes_df.head()

Unnamed: 0,division_id,ordinary_votes,absent_votes,provisional_votes,prepoll_votes,postal_votes,total_votes
0,179,79148,6869,686,3444,8515,98662
1,197,72395,3756,292,3090,9578,89111
2,198,87377,3439,783,3118,8451,103168
3,103,81420,4109,469,3395,7095,96488
4,180,85870,3019,245,2877,6662,98673


In [14]:
# set the division_id as index
votetypes_df = votetypes_df.set_index("division_id")
votetypes_df.head()

Unnamed: 0_level_0,ordinary_votes,absent_votes,provisional_votes,prepoll_votes,postal_votes,total_votes
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
179,79148,6869,686,3444,8515,98662
197,72395,3756,292,3090,9578,89111
198,87377,3439,783,3118,8451,103168
103,81420,4109,469,3395,7095,96488
180,85870,3019,245,2877,6662,98673


In [18]:
votetypes_df.describe()

Unnamed: 0,ordinary_votes,absent_votes,provisional_votes,prepoll_votes,postal_votes,total_votes
count,150.0,150.0,150.0,150.0,150.0,150.0
mean,78772.72,4390.073333,367.346667,3396.506667,8153.46,95080.106667
std,9976.15196,1375.398354,151.943428,1031.496435,2755.236756,9944.933948
min,42399.0,748.0,130.0,1640.0,1509.0,46525.0
25%,72555.0,3575.25,257.75,2723.25,6078.0,90801.75
50%,78913.5,4409.0,333.0,3236.0,7514.0,95891.5
75%,84086.75,5280.25,457.25,3935.75,9681.25,99188.75
max,117248.0,8118.0,803.0,6901.0,15106.0,132310.0


# Exporting to CSV

In [16]:
# export df to csv
votetypes_df.to_csv("03-output_2016_federal_election_votetypes/fedelect_votetypes.csv")