# Park data
This notebook is to analyse the park data for each suburb.

### Import packages

In [4]:
import geopandas as gpd
from shapely.geometry import Point
import folium
import pandas as pd

### Read files

In [1]:
# read files
csv_file_path = '../data/raw/parks-and-reserves1.csv'
park = pd.read_csv(csv_file_path)

### Cleaning the data

In [2]:
# drop unnecessary columns
columns_to_drop = ['geo_point_2d', 'postcode', 'ward_name','easting','northing','projection','googlemaps_drive_to','area']
park_drop = park.drop(columns=columns_to_drop)

Unnamed: 0,name,latitude,longitude,address,suburb_name
0,Glasscocks Road from Norwegian Way to Chatswoo...,-38.068252,145.288488,763P Glasscocks Road NARRE WARREN SOUTH,NARRE WARREN SOUTH
1,Claret Street Undeveloped,-37.993916,145.230284,Claret Street DOVETON,DANDENONG
2,Brookvale Close Reserve,-38.057068,145.368004,50-52I Brookvale Close BEACONSFIELD,BEACONSFIELD
3,Narre Warren North Road at Fox Road,-37.993186,145.313325,Narre Warren North Road NARRE WARREN NORTH,NARRE WARREN NORTH
4,Ryelands Drive and Hillcrest Avenue Tree Reserve,-38.017290,145.318246,Ryelands Drive BERWICK,BERWICK
...,...,...,...,...,...
3259,Pattersons Median west of Orana,-38.124433,145.373448,Pattersons Road CLYDE NORTH,CLYDE
3260,Charleston Chase - Masquerade Road tree reserve,-38.076620,145.363471,Bells Road BERWICK,BERWICK
3261,Swanston Street Reserve,-38.128512,145.328783,110S Twyford Road CLYDE,CLYDE
3262,Copley Street Reserve,-38.115009,145.259665,Copley Street CRANBOURNE WEST,CRANBOURNE


In [3]:
# drop null
park_drop = park_drop.dropna(subset=['name'])
filtered_park = park_drop[park_drop['name'].str.contains('garden|reserve', case=False, regex=True)]


In [17]:
filtered_park

Unnamed: 0,name,latitude,longitude,address,suburb_name
2,Brookvale Close Reserve,-38.057068,145.368004,50-52I Brookvale Close BEACONSFIELD,BEACONSFIELD
4,Ryelands Drive and Hillcrest Avenue Tree Reserve,-38.017290,145.318246,Ryelands Drive BERWICK,BERWICK
5,Hinrichsen Drive Reserve,-38.014240,145.283707,127-129 Hinrichsen Drive HALLAM,HALLAM
6,Lawson Way Reserve,-37.976977,145.266181,78-92I Heatherton Road ENDEAVOUR HILLS,ENDEAVOUR HILLS
8,Western Way Tree Reserve,-38.022186,145.296446,401I Princes Highway NARRE WARREN,NARRE WARREN
...,...,...,...,...,...
3255,Artois Road Reserve,-38.048459,145.323247,"Artois Road, BERWICK",BERWICK
3260,Charleston Chase - Masquerade Road tree reserve,-38.076620,145.363471,Bells Road BERWICK,BERWICK
3261,Swanston Street Reserve,-38.128512,145.328783,110S Twyford Road CLYDE,CLYDE
3262,Copley Street Reserve,-38.115009,145.259665,Copley Street CRANBOURNE WEST,CRANBOURNE


### Read shapefile to map the park attribute to suburb

In [13]:
# Get the shpaefile for SA2 region
suburbs = gpd.read_file("../data/landing/SAL_2021_AUST_GDA2020_SHP/SAL_2021_AUST_GDA2020.shp")


Unnamed: 0,SAL_CODE21,SAL_NAME21,STE_CODE21,STE_NAME21,AUS_CODE21,AUS_NAME21,AREASQKM21,LOCI_URI21,SHAPE_Leng,SHAPE_Area,geometry
0,10001,Aarons Pass,1,New South Wales,AUS,Australia,82.7639,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.554241,0.007975,"POLYGON ((149.82477 -32.84384, 149.83271 -32.8..."
1,10002,Abbotsbury,1,New South Wales,AUS,Australia,4.9788,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.123051,0.000485,"POLYGON ((150.86523 -33.88264, 150.86479 -33.8..."
2,10003,Abbotsford (NSW),1,New South Wales,AUS,Australia,1.0180,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.053423,0.000099,"POLYGON ((151.13472 -33.85492, 151.13445 -33.8..."
3,10004,Abercrombie,1,New South Wales,AUS,Australia,2.9775,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.097338,0.000289,"POLYGON ((149.55192 -33.39280, 149.55148 -33.3..."
4,10005,Abercrombie River,1,New South Wales,AUS,Australia,127.1701,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.848903,0.012397,"POLYGON ((149.25562 -33.96535, 149.25563 -33.9..."
...,...,...,...,...,...,...,...,...,...,...,...
15348,90004,Norfolk Island,9,Other Territories,AUS,Australia,38.6510,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.629774,0.003580,"MULTIPOLYGON (((167.94051 -29.06260, 167.94046..."
15349,90005,West Island,9,Other Territories,AUS,Australia,5.9276,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.416115,0.000492,"MULTIPOLYGON (((96.82779 -12.17627, 96.82773 -..."
15350,99494,No usual address (OT),9,Other Territories,AUS,Australia,0.0000,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.000000,0.000000,
15351,99797,Migratory - Offshore - Shipping (OT),9,Other Territories,AUS,Australia,0.0000,http://linked.data.gov.au/dataset/asgsed3/SAL/...,0.000000,0.000000,


In [11]:
# select the subset that we care about
suburbs = suburbs.loc[suburbs['STE_NAME21']=='Victoria']
suburbs = suburbs[['SAL_CODE21','SAL_NAME21','geometry']]

In [16]:
suburbs

Unnamed: 0,SAL_CODE21,SAL_NAME21,geometry
0,10001,Aarons Pass,"POLYGON ((149.82477 -32.84384, 149.83271 -32.8..."
1,10002,Abbotsbury,"POLYGON ((150.86523 -33.88264, 150.86479 -33.8..."
2,10003,Abbotsford (NSW),"POLYGON ((151.13472 -33.85492, 151.13445 -33.8..."
3,10004,Abercrombie,"POLYGON ((149.55192 -33.39280, 149.55148 -33.3..."
4,10005,Abercrombie River,"POLYGON ((149.25562 -33.96535, 149.25563 -33.9..."
...,...,...,...
15348,90004,Norfolk Island,"MULTIPOLYGON (((167.94051 -29.06260, 167.94046..."
15349,90005,West Island,"MULTIPOLYGON (((96.82779 -12.17627, 96.82773 -..."
15350,99494,No usual address (OT),
15351,99797,Migratory - Offshore - Shipping (OT),


In [18]:
# Convert the dataframe to a geodataframe
geometry = [Point(xy) for xy in zip(filtered_park['longitude'], filtered_park['latitude'])]
filtered_park_gdf = gpd.GeoDataFrame(filtered_park, geometry=geometry)

In [21]:
# Use sjoin to find out which suburb each point belongs to
joined = gpd.sjoin(filtered_park_gdf, suburbs, how="left", op="within")

# If your suburbs shapefile has a column named 'suburb_name' that specifies the name of the suburb
filtered_park_gdf['SAL_NAME'] = joined['SAL_NAME21']
filtered_park_gdf['SAL_CODE'] = joined['SAL_CODE21']

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:7844

  joined = gpd.sjoin(filtered_park_gdf, suburbs, how="left", op="within")


In [22]:
filtered_park_gdf

Unnamed: 0,name,latitude,longitude,address,suburb_name,geometry,SA2_NAME,SA2_CODE,SAL_NAME,SAL_CODE
2,Brookvale Close Reserve,-38.057068,145.368004,50-52I Brookvale Close BEACONSFIELD,BEACONSFIELD,POINT (145.36800 -38.05707),Beaconsfield (Vic.),20175,Beaconsfield (Vic.),20175
4,Ryelands Drive and Hillcrest Avenue Tree Reserve,-38.017290,145.318246,Ryelands Drive BERWICK,BERWICK,POINT (145.31825 -38.01729),Berwick,20224,Berwick,20224
5,Hinrichsen Drive Reserve,-38.014240,145.283707,127-129 Hinrichsen Drive HALLAM,HALLAM,POINT (145.28371 -38.01424),Hallam,21125,Hallam,21125
6,Lawson Way Reserve,-37.976977,145.266181,78-92I Heatherton Road ENDEAVOUR HILLS,ENDEAVOUR HILLS,POINT (145.26618 -37.97698),Endeavour Hills,20871,Endeavour Hills,20871
8,Western Way Tree Reserve,-38.022186,145.296446,401I Princes Highway NARRE WARREN,NARRE WARREN,POINT (145.29645 -38.02219),Narre Warren,21893,Narre Warren,21893
...,...,...,...,...,...,...,...,...,...,...
3255,Artois Road Reserve,-38.048459,145.323247,"Artois Road, BERWICK",BERWICK,POINT (145.32325 -38.04846),Berwick,20224,Berwick,20224
3260,Charleston Chase - Masquerade Road tree reserve,-38.076620,145.363471,Bells Road BERWICK,BERWICK,POINT (145.36347 -38.07662),Berwick,20224,Berwick,20224
3261,Swanston Street Reserve,-38.128512,145.328783,110S Twyford Road CLYDE,CLYDE,POINT (145.32878 -38.12851),Clyde (Vic.),20581,Clyde (Vic.),20581
3262,Copley Street Reserve,-38.115009,145.259665,Copley Street CRANBOURNE WEST,CRANBOURNE,POINT (145.25966 -38.11501),Cranbourne,20662,Cranbourne,20662


### Save the data

In [23]:
filtered_park_gdf.to_csv("../data/curated/park_suburbs_SA2.csv", index=False)

In [24]:
filtered_park_gdf.to_file("../data/curated/park_suburbs_SA2.shp")

  pd.Int64Index,
  filtered_park_gdf.to_file("../data/curated/park_suburbs_SA2.shp")


In [25]:
# Group the data by 'SA2_NAME21' and count the number of 'park_Name' for each group
SA2_park_count = filtered_park_gdf.groupby(['SAL_NAME', 'SAL_CODE'])['name'].count().reset_index()

# Rename the 'School_Name' column to 'Count' for clarity
SA2_park_count = SA2_park_count.rename(columns={'name': 'Park_Count'})
SA2_park_count

Unnamed: 0,SAL_NAME,SAL_CODE,Park_Count
0,Beaconsfield (Vic.),20175,5
1,Berwick,20224,156
2,Blind Bight,20260,2
3,Botanic Ridge,20307,28
4,Cannons Creek,20466,1
5,Clyde (Vic.),20581,35
6,Clyde North,20582,104
7,Cranbourne,20662,67
8,Cranbourne East,20663,47
9,Cranbourne North,20664,51


In [26]:
# Check for duplicate values in 'School_Name'
duplicates = SA2_park_count[SA2_park_count['SAL_NAME'].duplicated(keep=False)]

# If 'duplicates' DataFrame is not empty, it means there are two or more rows with the same value in 'Column1'
if not duplicates.empty:
    print("There is duplicate in SAL_NAME")
else:
    print("There are no duplicate in SAL_NAME")

There are no duplicate in SAL_NAME


In [27]:
SA2_park_count.to_csv('../data/curated/Park_count_with_SA2.csv',index=False)

### Merge with the listing property data

In [146]:
SA2_school_count = pd.read_csv("../data/curated/School_count_with_SA2.csv")
listings_suburbs = pd.read_csv("../data/curated/PTV_count_with_SA2_with_PTV_count.csv")
listings_suburbs 

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,SAL_CODE,SA2_NAME21,SA2_CODE21,METRO_TRAIN,REGIONAL_TRAIN,Tram_Count,METRO_BUS,REGIONAL_BUS,REGIONAL_COACH,SKYBUS
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,21640.0,Melbourne CBD West,206041505.0,2.0,1.0,42.0,24.0,0.0,1.0,0.0
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,21640.0,Melbourne CBD West,206041505.0,2.0,1.0,42.0,24.0,0.0,1.0,0.0
2,512/118 Franklin Street Melbourne VIC 3000,Apartment / Unit / Flat,$430,,29.0,16.0,VIC,1.0,1.0,0.0,...,21640.0,Melbourne CBD North,206041504.0,1.0,0.0,18.0,13.0,0.0,0.0,0.0
3,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,21640.0,Melbourne CBD West,206041505.0,2.0,1.0,42.0,24.0,0.0,1.0,0.0
4,1.2/187 Collins Street Melbourne VIC 3000,Apartment / Unit / Flat,$475 pw,37.0,49.0,51.0,VIC,1.0,1.0,0.0,...,21640.0,Melbourne CBD East,206041503.0,1.0,0.0,25.0,6.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12043,16 Nautilus Road Inverloch VIC 3996,House,$400,47.0,72.0,61.0,VIC,3.0,1.0,1.0,...,21235.0,Wonthaggi Inverloch,205031093.0,0.0,0.0,0.0,0.0,122.0,32.0,0.0
12044,1/59 Williams Street Inverloch VIC 3996,Townhouse,$460 per week,63.0,57.0,50.0,VIC,3.0,2.0,1.0,...,21235.0,Wonthaggi Inverloch,205031093.0,0.0,0.0,0.0,0.0,122.0,32.0,0.0
12045,10 Beacon Crt Inverloch Inverloch VIC 3996,House,$410,52.0,81.0,65.0,VIC,2.0,1.0,1.0,...,21235.0,Wonthaggi Inverloch,205031093.0,0.0,0.0,0.0,0.0,122.0,32.0,0.0
12046,10 Blue Wren Way Inverloch VIC 3996,House,$600,,79.0,73.0,VIC,3.0,2.0,2.0,...,21235.0,Wonthaggi Inverloch,205031093.0,0.0,0.0,0.0,0.0,122.0,32.0,0.0


In [147]:
SA2_school_count
SA2_park_count

Unnamed: 0,SAL_NAME,SAL_CODE,Park_Count
0,Beaconsfield (Vic.),20175,5
1,Berwick,20224,156
2,Blind Bight,20260,2
3,Botanic Ridge,20307,28
4,Cannons Creek,20466,1
5,Clyde (Vic.),20581,35
6,Clyde North,20582,104
7,Cranbourne,20662,67
8,Cranbourne East,20663,47
9,Cranbourne North,20664,51


In [148]:
print(SA2_school_count['SAL_NAME'].dtype)
print(SA2_school_count['SAL_CODE'].dtype)
print(SA2_park_count['SAL_NAME'].dtype)
print(SA2_park_count['SAL_CODE'].dtype)


object
int64
object
int64


In [149]:
import numpy as np
SA2_park_count['SAL_CODE'] = SA2_park_count['SAL_CODE'].astype(np.int64)

aaa=SA2_park_count[SA2_park_count['SAL_NAME']=='Botanic Ridge']['Park_Count']
aaa

3    28
Name: Park_Count, dtype: int64

In [150]:
# Merge the dataframes on 'SA2_NAME21' with an outer join
SA2_school_park_count = SA2_school_count.merge(SA2_park_count, on=['SAL_NAME', 'SAL_CODE'], how='outer')

# Fill NaN values with 0 for tram and bus counts
SA2_school_park_count = SA2_school_park_count.fillna(0)
SA2_school_park_count['Park_Count']

0      0.0
1      0.0
2      0.0
3      0.0
4      0.0
      ... 
922    1.0
923    7.0
924    7.0
925    2.0
926    1.0
Name: Park_Count, Length: 927, dtype: float64

In [151]:
print(listings_suburbs['SAL_CODE'].dtype)
print(SA2_school_count['SAL_CODE'].dtype)


float64
int64


In [152]:
#listings_suburbs['SAL_CODE'] = listings_suburbs['SAL_CODE'].astype(str)
#SA2_school_park_count['SAL_CODE'] = SA2_school_park_count['SAL_CODE'].astype(str)


### Save the final dataset

In [153]:
# join based on SAL_NAME21 in both df
listings_suburbs = listings_suburbs .merge(SA2_school_park_count, left_on='SAL_CODE', right_on='SAL_CODE', how='left')

In [154]:
listings_suburbs

Unnamed: 0,Location,type_property,price,LT_resident_pcg,owner_pcg,family_pcg,state,rooms,bath,parking,...,METRO_TRAIN,REGIONAL_TRAIN,Tram_Count,METRO_BUS,REGIONAL_BUS,REGIONAL_COACH,SKYBUS,SAL_NAME,School_Count,Park_Count
0,4506/33 Rose Lane Melbourne VIC 3000,Apartment / Unit / Flat,$520 per week,12.0,28.0,27.0,VIC,1.0,1.0,0.0,...,2.0,1.0,42.0,24.0,0.0,1.0,0.0,Melbourne,8.0,0.0
1,1715/220 Spencer Street Melbourne VIC 3000,Apartment / Unit / Flat,$750,12.0,28.0,27.0,VIC,2.0,2.0,1.0,...,2.0,1.0,42.0,24.0,0.0,1.0,0.0,Melbourne,8.0,0.0
2,512/118 Franklin Street Melbourne VIC 3000,Apartment / Unit / Flat,$430,,29.0,16.0,VIC,1.0,1.0,0.0,...,1.0,0.0,18.0,13.0,0.0,0.0,0.0,Melbourne,8.0,0.0
3,5801/648 Lonsdale Street Melbourne VIC 3000,Apartment / Unit / Flat,$800,12.0,20.0,34.0,VIC,2.0,2.0,0.0,...,2.0,1.0,42.0,24.0,0.0,1.0,0.0,Melbourne,8.0,0.0
4,1.2/187 Collins Street Melbourne VIC 3000,Apartment / Unit / Flat,$475 pw,37.0,49.0,51.0,VIC,1.0,1.0,0.0,...,1.0,0.0,25.0,6.0,0.0,0.0,0.0,Melbourne,8.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12043,16 Nautilus Road Inverloch VIC 3996,House,$400,47.0,72.0,61.0,VIC,3.0,1.0,1.0,...,0.0,0.0,0.0,0.0,122.0,32.0,0.0,Inverloch,1.0,0.0
12044,1/59 Williams Street Inverloch VIC 3996,Townhouse,$460 per week,63.0,57.0,50.0,VIC,3.0,2.0,1.0,...,0.0,0.0,0.0,0.0,122.0,32.0,0.0,Inverloch,1.0,0.0
12045,10 Beacon Crt Inverloch Inverloch VIC 3996,House,$410,52.0,81.0,65.0,VIC,2.0,1.0,1.0,...,0.0,0.0,0.0,0.0,122.0,32.0,0.0,Inverloch,1.0,0.0
12046,10 Blue Wren Way Inverloch VIC 3996,House,$600,,79.0,73.0,VIC,3.0,2.0,2.0,...,0.0,0.0,0.0,0.0,122.0,32.0,0.0,Inverloch,1.0,0.0


In [155]:
listings_suburbs.to_csv('../data/curated/SCHOOL&PARK_count_with_SA2_with_SCHOOL&PARK_count.csv',index=False)