# Public transport visualisation 

This notebook will generate a visualisation of public transport station count within each suburb.

This will include total number of train stops, bus stop and tram stops in each suburb


In [1]:
import pandas as pd
import geopandas as gpd
import folium

## read SAL geometry region file

In [2]:
# Read SAL gdf
SAL_gdf = gpd.read_file("../data/raw/victoria_region_gdf/SAL_region_gdf.geojson")
# remove null
SAL_gdf = SAL_gdf.dropna()

In [3]:
# Find all SAL regions name and id in victoria 
Full_SAL = SAL_gdf[['SAL_NAME21', 'SAL_CODE21']]
# convert the value type to int
Full_SAL['SAL_CODE21'] = Full_SAL['SAL_CODE21'].astype(int)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Full_SAL['SAL_CODE21'] = Full_SAL['SAL_CODE21'].astype(int)


## Import the data for public transport (station) count within each SAL/suburb 

For this project, as our target is rental market within Victoria regions, we believe that regional transportation has negligible effect on the rental market for individual suburb. Hence, here we only consider total public transport count excluding regionals

In [4]:
#read PTV_count_with_SAL
PTV_count = pd.read_csv('../data/curated/PTV_count_with_SAL.csv')

In [5]:
# Select feature of interest 
PTV_count = PTV_count[['SAL_NAME21', 'SAL_CODE21', 'Total_Public_Transport_Count_No_Regional']]

PTV_count['SAL_CODE21'] = PTV_count['SAL_CODE21'].astype(int)


In [6]:
PTV_count

Unnamed: 0,SAL_NAME21,SAL_CODE21,Total_Public_Transport_Count_No_Regional
0,Abbotsford,20002,20.0
1,Albion,20021,20.0
2,Alphington,20034,29.0
3,Altona,20035,67.0
4,Ararat,20053,0.0
...,...,...,...
1137,Yarroweyah,22921,0.0
1138,Yea,22924,0.0
1139,Yendon,22927,0.0
1140,Yinnar,22934,0.0


While some suburb appears to have 0 public transport which seems unreasonable, we believe that nowaday every suburb should be able to have an access to at least 1 public transport. Hence, we will add 1 to every location's public transport count 

## Merge the PTV count with every SAL region, including those missing in ptv data 

In [7]:
# Join by SAL_CODE21
merged_full = Full_SAL.merge(PTV_count, left_on='SAL_CODE21', right_on='SAL_CODE21', how='left')

# fill null value of Total_Public_Transport_Count by 0 
merged_full['Total_Public_Transport_Count_No_Regional'].fillna(0, inplace=True)


# add extra 1 count to every SAL location# Hence we will add 1 count for every SAL location
merged_full['Total_Public_Transport_Count_No_Regional'] = merged_full['Total_Public_Transport_Count_No_Regional'] + 1

# rename column name for SAL_NAME21_x
merged_full.rename(columns={'SAL_NAME21_x': 'SAL_NAME21'}, inplace=True)


# Select desire feature 
merged_full = merged_full[['SAL_NAME21', 'SAL_CODE21', 'Total_Public_Transport_Count_No_Regional']]
# Rename the column for public transport count
merged_full.rename(columns={'Total_Public_Transport_Count_No_Regional': 'Total_Public_Transport_Count'}, inplace=True)

merged_full

Unnamed: 0,SAL_NAME21,SAL_CODE21,Total_Public_Transport_Count
0,Abbeyard,20001,1.0
1,Abbotsford,20002,21.0
2,Aberfeldie,20003,21.0
3,Aberfeldy,20004,1.0
4,Acheron,20005,3.0
...,...,...,...
2939,Yundool,22940,1.0
2940,Yuroke,22941,1.0
2941,Yuulong,22942,1.0
2942,Zeerust,22943,1.0


# Visualise it !

In [8]:
# create a JSON 
geoJSON = SAL_gdf[['SAL_CODE21', 'geometry']].to_json()
# print the first 300 chars of the json
print(geoJSON[:300])

{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"SAL_CODE21": "20001"}, "geometry": {"type": "Polygon", "coordinates": [[[146.89823840500003, -37.046023841999954], [146.89946546400006, -37.048662812999964], [146.8993791040001, -37.048770832999935], [146.89915


In [9]:
m = folium.Map(location=[-36.9848, 143.3906], tiles="Stamen Terrain", zoom_start=7)

# refer to the folium documentations on more information on how to plot aggregated data.
c = folium.Choropleth(
    geo_data=geoJSON, # geoJSON 
    name='choropleth', # name of plot
    data=merged_full.reset_index(), # data source
    columns=['SAL_CODE21','Total_Public_Transport_Count'], # the columns required
    key_on='properties.SAL_CODE21', # this is from the geoJSON's properties
    fill_color='YlOrRd', # color scheme
    nan_fill_color='black',
    legend_name='Public Transport Station Count'
)

c.add_to(m)
# I will not display this as it will make this file toooooo big to push up to git 
#m


<folium.features.Choropleth at 0x7fde9e1e13c0>

## Label the Top 5 suburub with most public transport 

In [10]:
sorted_df = merged_full.sort_values(by='Total_Public_Transport_Count', ascending=False)
sorted_df.head(10)

Unnamed: 0,SAL_NAME21,SAL_CODE21,Total_Public_Transport_Count
2160,Reservoir,22161,327.0
2390,Sunbury,22391,229.0
2749,Werribee,22750,228.0
946,Frankston,20947,217.0
223,Berwick,20224,205.0
2120,Preston,22121,204.0
1639,Melbourne,21640,189.0
1202,Hoppers Crossing,21203,177.0
1012,Glen Waverley,21013,174.0
877,Epping,20878,162.0


### Create centroid for each region

In [11]:
# (y, x) since we want (lat, long)
SAL_gdf['centroid'] = SAL_gdf['geometry'].apply(lambda x: (x.centroid.y, x.centroid.x))
SAL_gdf[['SAL_NAME21', 'SAL_CODE21', 'centroid']].head()

Unnamed: 0,SAL_NAME21,SAL_CODE21,centroid
0,Abbeyard,20001,"(-37.01941875010008, 146.7691121313647)"
1,Abbotsford,20002,"(-37.80458484450762, 144.99976768739876)"
2,Aberfeldie,20003,"(-37.75957575055507, 144.89740380057304)"
3,Aberfeldy,20004,"(-37.699788425368155, 146.37843544613273)"
4,Acheron,20005,"(-37.2689324082219, 145.69931893264342)"


In [12]:
# Assuming you have a GeoPandas DataFrame called gdf
values_to_find = ['22161', '22391', '22750', '20947', '20224', '22121', '21640', '21203', '21013', '20878']

# Use boolean indexing to filter rows where column 'x' contains the values from the list
filtered_rows = SAL_gdf[SAL_gdf['SAL_CODE21'].isin(values_to_find)]
filtered_rows

Unnamed: 0,SAL_NAME21,SAL_CODE21,SHAPE_Area,geometry,centroid
223,Berwick,20224,0.003407,"POLYGON ((145.35419 -38.07681, 145.34943 -38.0...","(-38.03991103966851, 145.34856811986464)"
877,Epping,20878,0.003601,"POLYGON ((145.02904 -37.66070, 145.02859 -37.6...","(-37.63893797928744, 145.00908477269124)"
946,Frankston,20947,0.002033,"POLYGON ((145.16317 -38.13236, 145.16351 -38.1...","(-38.14814421784339, 145.14568762990515)"
1012,Glen Waverley,21013,0.001725,"POLYGON ((145.16448 -37.86142, 145.16514 -37.8...","(-37.881591838972255, 145.16818290050003)"
1202,Hoppers Crossing,21203,0.001835,"POLYGON ((144.71212 -37.85117, 144.71243 -37.8...","(-37.86666874663791, 144.69504495097428)"
1639,Melbourne,21640,0.000674,"POLYGON ((144.96885 -37.80728, 144.97094 -37.8...","(-37.824913473838265, 144.97149704743745)"
2120,Preston,22121,0.001166,"POLYGON ((145.00466 -37.73074, 145.00468 -37.7...","(-37.741774902996454, 145.00794974871872)"
2160,Reservoir,22161,0.001944,"POLYGON ((145.02209 -37.71516, 145.02201 -37.7...","(-37.7121549498847, 145.0068399920836)"
2390,Sunbury,22391,0.013481,"POLYGON ((144.71079 -37.48236, 144.71112 -37.4...","(-37.54706639426133, 144.71495357301734)"
2749,Werribee,22750,0.004393,"POLYGON ((144.67539 -37.91386, 144.67262 -37.9...","(-37.90692095289865, 144.64201235607825)"


In [13]:
for SAL_NAME21, coord in filtered_rows[['SAL_NAME21', 'centroid']].values:
    folium.Marker(
        location=coord, 
        tooltip=folium.Tooltip(SAL_NAME21, permanent=True, direction="below")
    ).add_to(m)

# file will be too large to display the map 
#m

In [14]:
# Save to plot 
m.save('../plots/Public_transport_count_SAL.html')
