In [12]:
# Imports
import pandas as pd

# Extracting 

### Extracting labor or liberal vote counts 

In [13]:
# Reading in the csv
df = pd.read_csv('Resources/labor_liberal_vote_counts.csv', header=1)
df.head()

Unnamed: 0,DivisionNm,DivisionID,StateAb,PartyAb,Liberal/National Coalition Votes,Liberal/National Coalition Percentage,Australian Labor Party Votes,Australian Labor Party Percentage,TotalVotes,Swing
0,Adelaide,179,SA,ALP,43325,45.35,52219,54.65,95544,-0.7
1,Aston,197,VIC,LP,50142,58.59,35444,41.41,85586,0.39
2,Ballarat,198,VIC,ALP,41705,42.68,56002,57.32,97707,-2.43
3,Banks,103,NSW,LP,46210,51.44,43622,48.56,89832,-1.36
4,Barker,180,SA,LP,61566,65.19,32879,34.81,94445,-1.36


# Transforming

### Removing unwanted columns and renaming

In [14]:
# Dropping State column
df = df.drop(columns = ["DivisionNm", "StateAb", "PartyAb", "TotalVotes", "Swing"])
df.head()

Unnamed: 0,DivisionID,Liberal/National Coalition Votes,Liberal/National Coalition Percentage,Australian Labor Party Votes,Australian Labor Party Percentage
0,179,43325,45.35,52219,54.65
1,197,50142,58.59,35444,41.41
2,198,41705,42.68,56002,57.32
3,103,46210,51.44,43622,48.56
4,180,61566,65.19,32879,34.81


In [15]:
# Renaming the columns
df.columns = ['division_id', 'liberal_votes', 'liberal_percent', 'labor_votes', 'labor_percent']
df.head()

Unnamed: 0,division_id,liberal_votes,liberal_percent,labor_votes,labor_percent
0,179,43325,45.35,52219,54.65
1,197,50142,58.59,35444,41.41
2,198,41705,42.68,56002,57.32
3,103,46210,51.44,43622,48.56
4,180,61566,65.19,32879,34.81


### Checking for the presence of NaN values

In [17]:
# Summing the NaN values to see how many there are per column
print(df.isnull().sum())

division_id        0
liberal_votes      0
liberal_percent    0
labor_votes        0
labor_percent      0
dtype: int64


In [18]:
df.tail()

Unnamed: 0,division_id,liberal_votes,liberal_percent,labor_votes,labor_percent
145,153,37356,41.8,52005,58.2
146,150,35711,36.28,62730,63.72
147,178,52264,58.14,37625,41.86
148,234,27071,28.77,67037,71.23
149,316,52651,59.62,35667,40.38


### Checking the datatypes

In [19]:
# check datatypes
df.dtypes

division_id          int64
liberal_votes        int64
liberal_percent    float64
labor_votes          int64
labor_percent      float64
dtype: object

### Don't need to merge with a second dataframe as division id is present

In [20]:
# Setting the index
df = df.set_index('division_id')
df.head()

Unnamed: 0_level_0,liberal_votes,liberal_percent,labor_votes,labor_percent
division_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
179,43325,45.35,52219,54.65
197,50142,58.59,35444,41.41
198,41705,42.68,56002,57.32
103,46210,51.44,43622,48.56
180,61566,65.19,32879,34.81


# Exploratory Data Analysis - in the interest of project 2

In [27]:
# to check the range of values we will have for our bubble chart
lib_max = df['liberal_percent'].max()
lib_min = df['liberal_percent'].min()
print (lib_min, lib_max)

27.64 74.9


In [28]:
lab_max = df['labor_percent'].max()
lab_min = df['labor_percent'].min()
print (lab_min, lab_max)

25.1 72.36


# Exporting to csv

In [29]:
# Exporting dataframe to csv
df.to_csv("11-output_labor_liberal_vote_counts/11-labor_liberal_votes.csv")