In [1]:
import pandas as pd

# Extracting electorate division data from CSV

In [2]:
df = pd.read_csv("Resources/HouseVotesCountedByDivisionDownload-20499.csv", header = 1)

In [3]:
df.head()

Unnamed: 0,DivisionID,DivisionNm,StateAb,Enrolment,OrdinaryVotes,AbsentVotes,ProvisionalVotes,PrePollVotes,PostalVotes,TotalVotes,TotalPercentage
0,179,Adelaide,SA,109217,79148,6869,686,3444,8515,98662,90.34
1,197,Aston,VIC,96043,72395,3756,292,3090,9578,89111,92.78
2,198,Ballarat,VIC,110755,87377,3439,783,3118,8451,103168,93.15
3,103,Banks,NSW,104891,81420,4109,469,3395,7095,96488,91.99
4,180,Barker,SA,105600,85870,3019,245,2877,6662,98673,93.44


# Transforming the dataframe to pull out electorate division data

In [4]:
df.columns

Index(['DivisionID', 'DivisionNm', 'StateAb', 'Enrolment', 'OrdinaryVotes',
       'AbsentVotes', 'ProvisionalVotes', 'PrePollVotes', 'PostalVotes',
       'TotalVotes', 'TotalPercentage'],
      dtype='object')

In [5]:
df.dtypes

DivisionID            int64
DivisionNm           object
StateAb              object
Enrolment             int64
OrdinaryVotes         int64
AbsentVotes           int64
ProvisionalVotes      int64
PrePollVotes          int64
PostalVotes           int64
TotalVotes            int64
TotalPercentage     float64
dtype: object

In [6]:
len(df)

150

In [7]:
# dropping any na values 
df = df.dropna()
len(df)

150

In [8]:
# checking each electorate ID is unique
df['DivisionID'].is_unique

True

In [9]:
# columns we want to keep
electorate_df = df[["DivisionID","DivisionNm","StateAb"]]
electorate_df.head()

Unnamed: 0,DivisionID,DivisionNm,StateAb
0,179,Adelaide,SA
1,197,Aston,VIC
2,198,Ballarat,VIC
3,103,Banks,NSW
4,180,Barker,SA


In [10]:
# renaming columns
electorate_df.columns = ['division_id', 'electoral_division', 'state']
electorate_df.head()

Unnamed: 0,division_id,electoral_division,state
0,179,Adelaide,SA
1,197,Aston,VIC
2,198,Ballarat,VIC
3,103,Banks,NSW
4,180,Barker,SA


In [11]:
# set the index as division_id
electorate_df = electorate_df.set_index("division_id")
electorate_df.head()

Unnamed: 0_level_0,electoral_division,state
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1
179,Adelaide,SA
197,Aston,VIC
198,Ballarat,VIC
103,Banks,NSW
180,Barker,SA


# Exporting DataFrame to CSV 

In [12]:
electorate_df.to_csv("01-output_electoral_division/electoral_division.csv")