In [1]:
# Dependencies
import pandas as pd
import numpy as np

In [2]:
# Read into csv file
schools_csv_file = "./Resources/dv279-schoollocations2019.csv"
schools_df = pd.read_csv(schools_csv_file)
schools_df.head()

Unnamed: 0,Education_Sector,Entity_Type,School_No,School_Name,School_Type,School_Status,Address_Line_1,Address_Line_2,Address_Town,Address_State,...,Postal_Address_Line_1,Postal_Address_Line_2,Postal_Town,Postal_State,Postal_Postcode,Full_Phone_No,LGA_ID,LGA_Name,X,Y
0,Government,1,1,Alberton Primary School,Primary,O,21 Thomson Street,,Alberton,VIC,...,21 Thomson Street,,ALBERTON,VIC,3971,03 5183 2412,681,Wellington (S),146.666601,-38.617713
1,Government,1,3,Allansford and District Primary School,Primary,O,Frank Street,,Allansford,VIC,...,Frank Street,,ALLANSFORD,VIC,3277,03 5565 1382,673,Warrnambool (C),142.590393,-38.386281
2,Government,1,4,Avoca Primary School,Primary,O,118 Barnett Street,,Avoca,VIC,...,P O Box 12,,AVOCA,VIC,3467,03 5465 3176,599,Pyrenees (S),143.475649,-37.084502
3,Government,1,8,Avenel Primary School,Primary,O,40 Anderson Street,,Avenel,VIC,...,40 Anderson Street,,AVENEL,VIC,3664,03 5796 2264,643,Strathbogie (S),145.234722,-36.901368
4,Government,1,12,Warrandyte Primary School,Primary,O,5-11 Forbes Street,,Warrandyte,VIC,...,5-11 Forbes Street,,WARRANDYTE,VIC,3113,03 9844 3537,421,Manningham (C),145.21398,-37.742675


In [3]:
# Ensuring all schools are in VIC
schools_df["Postal_State"].unique()

array(['VIC'], dtype=object)

In [4]:
# Drop unwanted columns
to_drop = ["School_Status",
          "Address_Line_1",
          "Address_Line_2",
          "Address_Town",
          "Address_State",
          "Postal_Address_Line_2",
          "Full_Phone_No",
          "Address_Postcode",
          "Postal_State",
          "LGA_ID"]

schools_df.drop(columns=to_drop, inplace=True)

In [5]:
schools_df.head()

Unnamed: 0,Education_Sector,Entity_Type,School_No,School_Name,School_Type,Postal_Address_Line_1,Postal_Town,Postal_Postcode,LGA_Name,X,Y
0,Government,1,1,Alberton Primary School,Primary,21 Thomson Street,ALBERTON,3971,Wellington (S),146.666601,-38.617713
1,Government,1,3,Allansford and District Primary School,Primary,Frank Street,ALLANSFORD,3277,Warrnambool (C),142.590393,-38.386281
2,Government,1,4,Avoca Primary School,Primary,P O Box 12,AVOCA,3467,Pyrenees (S),143.475649,-37.084502
3,Government,1,8,Avenel Primary School,Primary,40 Anderson Street,AVENEL,3664,Strathbogie (S),145.234722,-36.901368
4,Government,1,12,Warrandyte Primary School,Primary,5-11 Forbes Street,WARRANDYTE,3113,Manningham (C),145.21398,-37.742675


In [6]:
# Split LGA Name
schools_df["LGA_Name"] = schools_df.LGA_Name.apply(lambda x: pd.Series(str(x).split(" "))) 

In [7]:
# Capitalise and lower postal town names
schools_df["Postal_Town"] = schools_df.Postal_Town.apply(lambda x: pd.Series(str(x).lower().capitalize()))

In [8]:
schools_df.head()

Unnamed: 0,Education_Sector,Entity_Type,School_No,School_Name,School_Type,Postal_Address_Line_1,Postal_Town,Postal_Postcode,LGA_Name,X,Y
0,Government,1,1,Alberton Primary School,Primary,21 Thomson Street,Alberton,3971,Wellington,146.666601,-38.617713
1,Government,1,3,Allansford and District Primary School,Primary,Frank Street,Allansford,3277,Warrnambool,142.590393,-38.386281
2,Government,1,4,Avoca Primary School,Primary,P O Box 12,Avoca,3467,Pyrenees,143.475649,-37.084502
3,Government,1,8,Avenel Primary School,Primary,40 Anderson Street,Avenel,3664,Strathbogie,145.234722,-36.901368
4,Government,1,12,Warrandyte Primary School,Primary,5-11 Forbes Street,Warrandyte,3113,Manningham,145.21398,-37.742675


In [10]:
# Rename postcode column
schools_df = schools_df.rename(columns={"Postal_Postcode": "Postcode",
                                       "Postal_Town": "Suburb",
                                       "X": "Lat",
                                       "Y": "Lng"})
schools_df

Unnamed: 0,Education_Sector,Entity_Type,School_No,School_Name,School_Type,Postal_Address_Line_1,Suburb,Postcode,LGA_Name,Lat,Lng
0,Government,1,1,Alberton Primary School,Primary,21 Thomson Street,Alberton,3971,Wellington,146.666601,-38.617713
1,Government,1,3,Allansford and District Primary School,Primary,Frank Street,Allansford,3277,Warrnambool,142.590393,-38.386281
2,Government,1,4,Avoca Primary School,Primary,P O Box 12,Avoca,3467,Pyrenees,143.475649,-37.084502
3,Government,1,8,Avenel Primary School,Primary,40 Anderson Street,Avenel,3664,Strathbogie,145.234722,-36.901368
4,Government,1,12,Warrandyte Primary School,Primary,5-11 Forbes Street,Warrandyte,3113,Manningham,145.213980,-37.742675
...,...,...,...,...,...,...,...,...,...,...,...
2249,Catholic,2,2222,Our Lady Star of the Sea Catholic Primary School,Primary,6 Cowes-Rhyll Road,Cowes,3922,Bass,145.239805,-38.463698
2250,Independent,2,2223,Maxwell Creative School,Primary,6 Rainy Hill Road,Cockatoo,3781,Cardinia,145.518028,-37.923946
2251,Catholic,2,2224,St Anne's College,Pri/Sec,72 Wendouree Drive,Kialla,3631,Greater,145.400337,-36.436121
2252,Catholic,2,2225,Holy Trinity Catholic Primary School,Primary,145 Mitchells Lane,Sunbury,3429,Hume,144.705316,-37.584791


In [15]:
# Drop any null values
schools_df = schools_df.dropna()
schools_df.count()

Education_Sector         2254
Entity_Type              2254
School_No                2254
School_Name              2254
School_Type              2254
Postal_Address_Line_1    2254
Suburb                   2254
Postcode                 2254
LGA_Name                 2254
Lat                      2254
Lng                      2254
dtype: int64

In [16]:
# Retrieve postcode and suburbs
postcode_suburb_df = schools_df[["Postcode", "Suburb"]]
postcode_suburb_df = postcode_suburb_df.drop_duplicates()

# Export into a csv file
postcode_suburb_df.to_csv("./output/postcode_suburb_df.csv", index=False)

In [17]:
# Read into created csv
postcode_suburb = pd.read_csv("./output/postcode_suburb_df.csv")
postcode_suburb

Unnamed: 0,Postcode,Suburb
0,3971,Alberton
1,3277,Allansford
2,3467,Avoca
3,3664,Avenel
4,3113,Warrandyte
...,...,...
953,3954,Koonwarra
954,3076,Epping north
955,3030,Werribee plaza
956,3029,Truganina south


In [18]:
postcode_suburb.loc[postcode_suburb["Suburb"] == "Melbourne"]

Unnamed: 0,Postcode,Suburb
827,3004,Melbourne
943,3000,Melbourne
