In [72]:
import pandas as pd

In [73]:
df = pd.read_csv('electorate-results.csv')

# Transformation
## Checking for empty 

In [74]:
# dropping empty cells
df = df.dropna()

In [75]:
# view 1st 5 rows
df.head()

Unnamed: 0,electoral_division,state,yes,no,response_clear,response_unclear,nonresponding
0,Banks,New South Wales,37736,46343,84079,247,20928
1,Barton,New South Wales,37153,47984,85137,226,24008
2,Bennelong,New South Wales,42943,43215,86158,244,19973
3,Berowra,New South Wales,48471,40369,88840,212,16038
4,Blaxland,New South Wales,20406,57926,78332,220,25883


In [76]:
# identify the data types
df.dtypes

electoral_division    object
state                 object
yes                    int64
no                     int64
response_clear         int64
response_unclear       int64
nonresponding          int64
dtype: object

In [77]:
# total number of rows in table
len(df)

150

In [78]:
# duplicates removed from tables except the first occurence
df.drop_duplicates(subset='electoral_division', keep='first', inplace=True)

In [79]:
# identify number of duplicates by subtracting it from the total len indentified
len(df)

150

In [80]:
# statistical description of data
df.describe()

Unnamed: 0,yes,no,response_clear,response_unclear,nonresponding
count,150.0,150.0,150.0,150.0,150.0
mean,52114.98,32493.25,84608.23,244.57,21855.07
std,12315.11,8262.79,10318.85,55.9,4197.47
min,19026.0,14860.0,34924.0,106.0,13092.0
25%,44619.75,28452.25,80220.75,207.25,18951.0
50%,51782.5,31653.5,85726.5,240.0,21416.5
75%,59878.75,36726.75,90369.75,276.0,24513.25
max,89590.0,57926.0,120951.0,377.0,35841.0


In [81]:
# correlation between variables in the tables
df.corr()

Unnamed: 0,yes,no,response_clear,response_unclear,nonresponding
yes,1.0,-0.56,0.75,0.12,-0.37
no,-0.56,1.0,0.14,0.42,0.44
response_clear,0.75,0.14,1.0,0.48,-0.09
response_unclear,0.12,0.42,0.48,1.0,0.15
nonresponding,-0.37,0.44,-0.09,0.15,1.0


In [82]:
# grouping by state
df['state']

0                   New South Wales
1                   New South Wales
2                   New South Wales
3                   New South Wales
4                   New South Wales
                   ...             
145                        Tasmania
146              Northern Territory
147              Northern Territory
148    Australian Capital Territory
149    Australian Capital Territory
Name: state, Length: 150, dtype: object

In [83]:
# calculate the number of electoral_divisions per state
df['state'].value_counts()

New South Wales                 47
Victoria                        37
Queensland                      30
Western Australia               16
South Australia                 11
Tasmania                         5
Australian Capital Territory     2
Northern Territory               2
Name: state, dtype: int64

In [84]:
# group
state_electorial_groups = df.groupby(['state']).sum()

In [85]:
state_electorial_groups

Unnamed: 0_level_0,yes,no,response_clear,response_unclear,nonresponding
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Australian Capital Territory,175459,61520,236979,534,50595
New South Wales,2374362,1736838,4111200,11036,1065445
Northern Territory,48686,31690,80376,229,57496
Queensland,1487060,961015,2448075,7088,695710
South Australia,592528,356247,948775,2778,242027
Tasmania,191948,109655,301603,805,77020
Victoria,2145629,1161098,3306727,11028,743634
Western Australia,801575,455924,1257499,3188,346333


In [86]:
state_electorial_groups['Total_Clear_Votes'] = state_electorial_groups['yes'] + state_electorial_groups['no']

In [87]:
state_electorial_groups

Unnamed: 0_level_0,yes,no,response_clear,response_unclear,nonresponding,Total_Clear_Votes
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Australian Capital Territory,175459,61520,236979,534,50595,236979
New South Wales,2374362,1736838,4111200,11036,1065445,4111200
Northern Territory,48686,31690,80376,229,57496,80376
Queensland,1487060,961015,2448075,7088,695710,2448075
South Australia,592528,356247,948775,2778,242027,948775
Tasmania,191948,109655,301603,805,77020,301603
Victoria,2145629,1161098,3306727,11028,743634,3306727
Western Australia,801575,455924,1257499,3188,346333,1257499


In [88]:

state_electorial_groups['yes'].sum()

7817247

In [89]:
sum_yes = state_electorial_groups['yes'].sum()

In [90]:
state_electorial_groups['Percentage_yes'] = state_electorial_groups['yes']/sum_yes

In [91]:
state_electorial_groups

Unnamed: 0_level_0,yes,no,response_clear,response_unclear,nonresponding,Total_Clear_Votes,Percentage_yes
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Australian Capital Territory,175459,61520,236979,534,50595,236979,0.02
New South Wales,2374362,1736838,4111200,11036,1065445,4111200,0.3
Northern Territory,48686,31690,80376,229,57496,80376,0.01
Queensland,1487060,961015,2448075,7088,695710,2448075,0.19
South Australia,592528,356247,948775,2778,242027,948775,0.08
Tasmania,191948,109655,301603,805,77020,301603,0.02
Victoria,2145629,1161098,3306727,11028,743634,3306727,0.27
Western Australia,801575,455924,1257499,3188,346333,1257499,0.1


# End of Transform