# Identifying potential bus routes - combined analysis with geospatial and ridership data
We will combine both the geosptial and ridership analysis to draw our recommendation considersation

In [32]:
import pandas as pd
import numpy as np

In [33]:
ridership_df = pd.read_csv('data/analysis/ridership_analysis_final.csv')
geospatial_analysis_df = pd.read_csv('data/analysis/BusMRTOverlap.csv')
bus_category_df = pd.read_csv('data/cleaned/bus_route_trips.csv')

In [34]:
# Drop any rows with NaN in 'ServiceNo' if these were added accidentally as data rows
ridership = ridership_df.dropna().reset_index(drop=True)
ridership

geospatial = geospatial_analysis_df.dropna().reset_index(drop=True)
geospatial

Unnamed: 0,MRT_Line,Bus_ServiceNo,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Consecutive_Coverage_Percentage,Max_Consecutive_Segments,Weighted_Average_Angle,Weighted_Average_Score
0,Sengkang LRT,374,6668.132696,6223.689276,42.857143,85.714286,6.0,65.3,64.488571
1,Bukit Panjang LRT,991B,1481.883585,1481.883585,33.333333,100.000000,2.0,33.5,60.033333
2,Bukit Panjang LRT,973A,1249.539533,1249.539533,16.666667,100.000000,2.0,30.7,52.806667
3,North-South,302A,1929.302493,1929.302493,4.761905,100.000000,2.0,40.4,49.984762
4,Bukit Panjang LRT,976,7464.432230,7163.349917,50.000000,37.500000,3.0,59.1,46.820000
...,...,...,...,...,...,...,...,...,...
448,East-West,40,20452.545407,7176.035422,5.660377,4.761905,1.0,19.4,8.048913
449,East-West,98,22507.716192,7141.653055,5.660377,4.347826,1.0,19.2,7.843281
450,East-West,247,18530.465021,5808.141066,3.773585,5.263158,1.0,20.2,7.654697
451,East-West,182M,27034.860320,7337.964234,3.773585,3.571429,1.0,20.5,7.038005


For this analysis we will only consider Trunk services, and be excluding all other types of Bus Routes

In [35]:
# Get list of trunk services
bus_category_trunk = bus_category_df[bus_category_df["Category"] == "TRUNK"]

bus_category_trunk  =  bus_category_trunk[['ServiceNo', "Category"]] 

bus_category_trunk




Unnamed: 0,ServiceNo,Category
0,118,TRUNK
1,118,TRUNK
2,118,TRUNK
3,118,TRUNK
4,118,TRUNK
...,...,...
713499,992,TRUNK
713500,992,TRUNK
713501,992,TRUNK
713502,992,TRUNK


In [36]:
# Filter all trunk services for ridership
ridership = ridership[ridership['ServiceNo'].isin(bus_category_trunk["ServiceNo"])]
ridership.reset_index(drop=True)
ridership


Unnamed: 0,ServiceNo,Weekday_Percentage_Exceed,Weekend_Percentage_Exceed
1,7A,97.142857,94.285714
3,196A,97.058824,85.294118
4,3A,96.428571,85.714286
5,73T,96.428571,82.142857
6,966A,96.296296,92.592593
...,...,...,...
536,79,5.263158,1.754386
540,254,4.761905,4.761905
543,983,3.333333,3.333333
544,975,3.260870,1.086957


In [37]:
geospatial = geospatial[geospatial["Bus_ServiceNo"].isin(bus_category_trunk["ServiceNo"])]
geospatial = geospatial.reset_index(drop=True)

In [38]:
geospatial.head()

Unnamed: 0,MRT_Line,Bus_ServiceNo,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Consecutive_Coverage_Percentage,Max_Consecutive_Segments,Weighted_Average_Angle,Weighted_Average_Score
0,Bukit Panjang LRT,991B,1481.883585,1481.883585,33.333333,100.0,2.0,33.5,60.033333
1,Bukit Panjang LRT,973A,1249.539533,1249.539533,16.666667,100.0,2.0,30.7,52.806667
2,Bukit Panjang LRT,976,7464.43223,7163.349917,50.0,37.5,3.0,59.1,46.82
3,Bukit Panjang LRT,974A,17749.950298,6847.603315,33.333333,38.888889,7.0,62.5,41.388889
4,Punggol LRT,83T,5486.421431,3593.028154,33.333333,33.333333,2.0,56.8,38.026667


## 1. Limit the scope from geospatial analysis to existing lines
We will consider only MRT & exisitng lines in service. This means we will drop
- Jurong Regional Line (~2027)
- Cross Island Line (~2032)  
- LRT

Within this scope, we identify the top 20 buses identified that have a high degree of overlap with new MRT Lines, based on a weighted scoring system.  
A high `Weighted_Average_Score` refers to a bus route having many stops that are close to an MRT line, and have a general route trajectory similar to the given MRT line

In [81]:
identified_buses_df = geospatial_analysis_df[~geospatial_analysis_df['MRT_Line'].str.contains("LRT") \
    & ((~geospatial_analysis_df['MRT_Line'].str.contains("Jurong Region")) \
       & (~geospatial_analysis_df['MRT_Line'].str.contains("Cross Island"))
) 
    & (geospatial_analysis_df['Max_Consecutive_Segments'] > 4)


] 
identified_buses_df

Unnamed: 0,MRT_Line,Bus_ServiceNo,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Consecutive_Coverage_Percentage,Max_Consecutive_Segments,Weighted_Average_Angle,Weighted_Average_Score
38,Circle,10,29465.954967,14530.777472,24.324324,23.333333,7.0,55.7,30.203063
43,Circle,106,19535.987287,8031.290719,16.216216,30.0,6.0,47.6,28.006486
48,Downtown,67,30761.463662,19834.301859,17.5,19.354839,6.0,63.7,27.481935
50,Circle,57,19299.120039,8342.500309,16.216216,25.0,5.0,54.3,27.346486
56,Circle,30,32624.14942,13433.418209,21.621622,18.181818,6.0,51.3,26.181376
64,Downtown,852,21916.782849,6490.149538,7.5,31.818182,7.0,46.2,24.967273
72,Circle,143,25253.551959,8303.883876,13.513514,19.230769,5.0,54.5,23.997713
79,North-South,167,27389.091885,9231.35341,11.904762,17.857143,5.0,56.7,23.244762
82,Circle,502A,24555.671286,6043.642827,13.513514,20.0,5.0,47.3,22.865405


In [82]:
identified_bus_routes = identified_buses_df['Bus_ServiceNo'].unique()
identified_bus_routes

array(['10', '106', '67', '57', '30', '852', '143', '167', '502A'],
      dtype=object)

## 2. Further narrow down the list using ridership data
We want to further supplement our choices with ridership data - for the routes identified via geospatial analysis, we re-rank them based on increasing ridership.  

In our ridership analysis, for each bus route, we counted the number of stops that have low passenger volume for the majority of the day (more details in ridership analysis).  
This allows us to infer which bus routes have low ridership.  
A high `%_Exceed` means that the bus route has a high number of stops that experience low passenger volume in majority (threshold=6hrs) of the day. 

After re-ranking, we take the top 10 bus routes with low ridership.

In [83]:
# Filter ridership data to only include identified bus routes
filtered_ridership_df = ridership[ridership['ServiceNo'].isin(identified_bus_routes)]

filtered_ridership_df


Unnamed: 0,ServiceNo,Weekday_Percentage_Exceed,Weekend_Percentage_Exceed
204,167,39.43662,14.084507
233,852,32.653061,14.285714
255,67,30.0,10.0
279,106,27.659574,10.638298
378,30,19.230769,3.846154
399,57,18.0,4.0
426,143,16.216216,8.108108
456,10,13.513514,2.702703


Note: It turns out that most of the bus routes identified by geospatial anlaysis happen to only service 1 direction.

In [84]:
merged_df = filtered_ridership_df.merge(identified_buses_df, left_on='ServiceNo', right_on='Bus_ServiceNo')
merged_df

Unnamed: 0,ServiceNo,Weekday_Percentage_Exceed,Weekend_Percentage_Exceed,MRT_Line,Bus_ServiceNo,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Consecutive_Coverage_Percentage,Max_Consecutive_Segments,Weighted_Average_Angle,Weighted_Average_Score
0,167,39.43662,14.084507,North-South,167,27389.091885,9231.35341,11.904762,17.857143,5.0,56.7,23.244762
1,852,32.653061,14.285714,Downtown,852,21916.782849,6490.149538,7.5,31.818182,7.0,46.2,24.967273
2,67,30.0,10.0,Downtown,67,30761.463662,19834.301859,17.5,19.354839,6.0,63.7,27.481935
3,106,27.659574,10.638298,Circle,106,19535.987287,8031.290719,16.216216,30.0,6.0,47.6,28.006486
4,30,19.230769,3.846154,Circle,30,32624.14942,13433.418209,21.621622,18.181818,6.0,51.3,26.181376
5,57,18.0,4.0,Circle,57,19299.120039,8342.500309,16.216216,25.0,5.0,54.3,27.346486
6,143,16.216216,8.108108,Circle,143,25253.551959,8303.883876,13.513514,19.230769,5.0,54.5,23.997713
7,10,13.513514,2.702703,Circle,10,29465.954967,14530.777472,24.324324,23.333333,7.0,55.7,30.203063


In [85]:
merged_df = merged_df.drop(columns=['Bus_ServiceNo'])

# Reorder columns
merged_df = merged_df[['ServiceNo',  'MRT_Line', 'Weighted_Average_Score','Max_Consecutive_Segments','Consecutive_Coverage_Percentage','Bus_Route_Length_m', 'Overlap_Length_m', 'Coverage_Percentage',	'Weighted_Average_Angle','Weekday_Percentage_Exceed','Weekend_Percentage_Exceed'	  ]]
merged_df

Unnamed: 0,ServiceNo,MRT_Line,Weighted_Average_Score,Max_Consecutive_Segments,Consecutive_Coverage_Percentage,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Weighted_Average_Angle,Weekday_Percentage_Exceed,Weekend_Percentage_Exceed
0,167,North-South,23.244762,5.0,17.857143,27389.091885,9231.35341,11.904762,56.7,39.43662,14.084507
1,852,Downtown,24.967273,7.0,31.818182,21916.782849,6490.149538,7.5,46.2,32.653061,14.285714
2,67,Downtown,27.481935,6.0,19.354839,30761.463662,19834.301859,17.5,63.7,30.0,10.0
3,106,Circle,28.006486,6.0,30.0,19535.987287,8031.290719,16.216216,47.6,27.659574,10.638298
4,30,Circle,26.181376,6.0,18.181818,32624.14942,13433.418209,21.621622,51.3,19.230769,3.846154
5,57,Circle,27.346486,5.0,25.0,19299.120039,8342.500309,16.216216,54.3,18.0,4.0
6,143,Circle,23.997713,5.0,19.230769,25253.551959,8303.883876,13.513514,54.5,16.216216,8.108108
7,10,Circle,30.203063,7.0,23.333333,29465.954967,14530.777472,24.324324,55.7,13.513514,2.702703


## Updated Weighted scoring.

We will examine the following metrics:

Max_Consecutive_Segments: Indicates the number of consecutive stations on the route, aiding in the assessment of connectivity and coverage.
Weighted_Average_Score: Based on parallel routes, this score provides insights into overlapping services and potential redundancies.
Weekday_Percentage_Exceed: Reflects the percentage of ridership exceeding typical weekday averages, particularly impactful in high-traffic city areas.
Weekend_Percentage_Exceed: Measures the percentage of ridership exceeding typical weekend averages, which helps identify demand on non-work days.
By adjusting the weights of these metrics, we will attempt to identify the best routes to recommend. This evaluation will be conducted in two ways:

Ensuring that Route 167 ranks within the top 3.
Conducting a manual inspection with final recommendations left to group discretion.

In [86]:
# Define weights for each column
weights = {
    'Max_Consecutive_Segments': 0.25,
    'Weighted_Average_Score': 0.50,
    'Weekday_Percentage_Exceed': 0.15,
    'Weekend_Percentage_Exceed': 0.10
}

# Calculate the weighted score for each row in merged_df
merged_df['Weighted_Total_Score'] = (
    merged_df['Max_Consecutive_Segments'] * weights['Max_Consecutive_Segments'] +
    merged_df['Weighted_Average_Score'] * weights['Weighted_Average_Score'] +
    merged_df['Weekday_Percentage_Exceed'] * weights['Weekday_Percentage_Exceed'] +
    merged_df['Weekend_Percentage_Exceed'] * weights['Weekend_Percentage_Exceed'] 
)

# Sort by the new weighted total score in descending order
merged_df_sorted = merged_df.sort_values(by='Weighted_Total_Score', ascending=False).reset_index(drop=True)
merged_df_sorted

Unnamed: 0,ServiceNo,MRT_Line,Weighted_Average_Score,Max_Consecutive_Segments,Consecutive_Coverage_Percentage,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Weighted_Average_Angle,Weekday_Percentage_Exceed,Weekend_Percentage_Exceed,Weighted_Total_Score
0,67,Downtown,27.481935,6.0,19.354839,30761.463662,19834.301859,17.5,63.7,30.0,10.0,20.740968
1,106,Circle,28.006486,6.0,30.0,19535.987287,8031.290719,16.216216,47.6,27.659574,10.638298,20.716009
2,852,Downtown,24.967273,7.0,31.818182,21916.782849,6490.149538,7.5,46.2,32.653061,14.285714,20.560167
3,167,North-South,23.244762,5.0,17.857143,27389.091885,9231.35341,11.904762,56.7,39.43662,14.084507,20.196325
4,10,Circle,30.203063,7.0,23.333333,29465.954967,14530.777472,24.324324,55.7,13.513514,2.702703,19.148829
5,57,Circle,27.346486,5.0,25.0,19299.120039,8342.500309,16.216216,54.3,18.0,4.0,18.023243
6,30,Circle,26.181376,6.0,18.181818,32624.14942,13433.418209,21.621622,51.3,19.230769,3.846154,17.859919
7,143,Circle,23.997713,5.0,19.230769,25253.551959,8303.883876,13.513514,54.5,16.216216,8.108108,16.4921


In [45]:
# Lets look at JRL 
identified_buses_JRL_df = geospatial_analysis_df[~geospatial_analysis_df['MRT_Line'].str.contains("LRT") \
    & ((geospatial_analysis_df['MRT_Line'].str.contains("Jurong Region")) \
) & (~geospatial_analysis_df['Bus_ServiceNo'].str.contains('[A-Za-z]', regex=True))


] \
    [:20] # we are interested in the top 20 lines
identified_buses_JRL_df


Unnamed: 0,MRT_Line,Bus_ServiceNo,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Consecutive_Coverage_Percentage,Max_Consecutive_Segments,Weighted_Average_Angle,Weighted_Average_Score
60,Jurong Region,181,5074.947432,4803.392522,10.526316,33.333333,2.0,38.1,25.16386
62,Jurong Region,333,6560.938008,5162.935426,10.526316,28.571429,2.0,47.5,25.139098
69,Jurong Region,179,7984.510855,3322.358191,15.789474,25.0,2.0,41.8,24.675789
74,Jurong Region,199,9731.601788,6858.183172,21.052632,20.0,2.0,36.2,23.661053
125,Jurong Region,242,4141.560873,3630.31131,10.526316,20.0,1.0,38.5,19.910526
133,Jurong Region,172,12722.528338,3466.796182,21.052632,7.692308,1.0,39.1,19.317976
140,Jurong Region,79,20226.728876,7891.107157,15.789474,9.52381,2.0,44.7,19.065313
172,Jurong Region,993,7336.205499,4309.641613,5.263158,12.5,1.0,53.0,17.705263
173,Jurong Region,249,14592.396079,7071.070003,15.789474,6.666667,1.0,43.6,17.702456
185,Jurong Region,194,6081.198809,4541.941224,5.263158,14.285714,1.0,47.9,17.399549


# 3. Analysis of potential future lines to remove.

We are looking at Jurong Regional Line based on our algorithm to identify future lines to remove. 

In [46]:
# Identify JRL routes
identified_JRL_bus_routes = identified_buses_JRL_df['Bus_ServiceNo'].unique()
identified_JRL_bus_routes

array(['181', '333', '179', '199', '242', '172', '79', '993', '249',
       '194', '258', '78', '405', '301', '300'], dtype=object)

In [47]:
# Filter ridership data to only include identified bus routes
filtered_ridership_JRL_df = ridership[ridership['ServiceNo'].isin(identified_JRL_bus_routes)]

filtered_ridership_JRL_df

Unnamed: 0,ServiceNo,Weekday_Percentage_Exceed,Weekend_Percentage_Exceed
302,405,25.714286,17.142857
367,993,20.0,8.0
386,199,18.518519,7.407407
495,258,9.756098,4.878049
511,179,8.333333,4.166667
524,78,6.896552,3.448276
529,172,6.060606,3.030303
531,181,5.555556,5.555556
532,194,5.555556,5.555556
536,79,5.263158,1.754386


In [48]:
merged_JRL_df = filtered_ridership_JRL_df.merge(identified_buses_JRL_df, left_on='ServiceNo', right_on='Bus_ServiceNo')
merged_JRL_df

Unnamed: 0,ServiceNo,Weekday_Percentage_Exceed,Weekend_Percentage_Exceed,MRT_Line,Bus_ServiceNo,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Consecutive_Coverage_Percentage,Max_Consecutive_Segments,Weighted_Average_Angle,Weighted_Average_Score
0,405,25.714286,17.142857,Jurong Region,405,13325.297933,2899.201737,10.526316,7.142857,1.0,38.5,14.767669
1,993,20.0,8.0,Jurong Region,993,7336.205499,4309.641613,5.263158,12.5,1.0,53.0,17.705263
2,199,18.518519,7.407407,Jurong Region,199,9731.601788,6858.183172,21.052632,20.0,2.0,36.2,23.661053
3,258,9.756098,4.878049,Jurong Region,258,14324.916072,4668.433243,5.263158,13.333333,2.0,44.0,16.238596
4,179,8.333333,4.166667,Jurong Region,179,7984.510855,3322.358191,15.789474,25.0,2.0,41.8,24.675789
5,78,6.896552,3.448276,Jurong Region,78,21310.996564,4726.359028,5.263158,9.090909,2.0,47.8,15.301627
6,172,6.060606,3.030303,Jurong Region,172,12722.528338,3466.796182,21.052632,7.692308,1.0,39.1,19.317976
7,181,5.555556,5.555556,Jurong Region,181,5074.947432,4803.392522,10.526316,33.333333,2.0,38.1,25.16386
8,194,5.555556,5.555556,Jurong Region,194,6081.198809,4541.941224,5.263158,14.285714,1.0,47.9,17.399549
9,79,5.263158,1.754386,Jurong Region,79,20226.728876,7891.107157,15.789474,9.52381,2.0,44.7,19.065313


In [49]:
# Define weights for each column
weights = {
    'Max_Consecutive_Segments': 0.25,
    'Weighted_Average_Score': 0.50,
    'Weekday_Percentage_Exceed': 0.15,
    'Weekend_Percentage_Exceed': 0.10
}

# Calculate the weighted score for each row in merged_df
merged_JRL_df['Weighted_Total_Score'] = (
    merged_JRL_df['Max_Consecutive_Segments'] * weights['Max_Consecutive_Segments'] +
    merged_JRL_df['Weighted_Average_Score'] * weights['Weighted_Average_Score'] +
    merged_JRL_df['Weekday_Percentage_Exceed'] * weights['Weekday_Percentage_Exceed'] +
    merged_JRL_df['Weekend_Percentage_Exceed'] * weights['Weekend_Percentage_Exceed'] 
)

# Sort by the new weighted total score in descending order
merged_JRL_df_sorted = merged_JRL_df.sort_values(by='Weighted_Total_Score', ascending=False).reset_index(drop=True)
merged_JRL_df_sorted

Unnamed: 0,ServiceNo,Weekday_Percentage_Exceed,Weekend_Percentage_Exceed,MRT_Line,Bus_ServiceNo,Bus_Route_Length_m,Overlap_Length_m,Coverage_Percentage,Consecutive_Coverage_Percentage,Max_Consecutive_Segments,Weighted_Average_Angle,Weighted_Average_Score,Weighted_Total_Score
0,199,18.518519,7.407407,Jurong Region,199,9731.601788,6858.183172,21.052632,20.0,2.0,36.2,23.661053,15.849045
1,179,8.333333,4.166667,Jurong Region,179,7984.510855,3322.358191,15.789474,25.0,2.0,41.8,24.675789,14.504561
2,181,5.555556,5.555556,Jurong Region,181,5074.947432,4803.392522,10.526316,33.333333,2.0,38.1,25.16386,14.470819
3,405,25.714286,17.142857,Jurong Region,405,13325.297933,2899.201737,10.526316,7.142857,1.0,38.5,14.767669,13.205263
4,993,20.0,8.0,Jurong Region,993,7336.205499,4309.641613,5.263158,12.5,1.0,53.0,17.705263,12.902632
5,172,6.060606,3.030303,Jurong Region,172,12722.528338,3466.796182,21.052632,7.692308,1.0,39.1,19.317976,11.121109
6,79,5.263158,1.754386,Jurong Region,79,20226.728876,7891.107157,15.789474,9.52381,2.0,44.7,19.065313,10.997569
7,258,9.756098,4.878049,Jurong Region,258,14324.916072,4668.433243,5.263158,13.333333,2.0,44.0,16.238596,10.570518
8,194,5.555556,5.555556,Jurong Region,194,6081.198809,4541.941224,5.263158,14.285714,1.0,47.9,17.399549,10.338663
9,78,6.896552,3.448276,Jurong Region,78,21310.996564,4726.359028,5.263158,9.090909,2.0,47.8,15.301627,9.530124


## 4. Plot the bus routes and stations for visual inspection
For making choosing the routes for final recommendations, we shall plot these routes along with the relevant MRT lines for visual inspection and a sanity check.  

From the previous table, it looks like bus 98A might be a good candidate - high weighted avg score, and a high number of stops with low ridership. It has also high consecutive coverage by MRT stations.

In [50]:
import geopandas as gpd

bus_stops_df = gpd.read_file('data/cleaned/BusStops.geojson')
bus_routes_df = pd.read_json('data/cleaned/BusRoutes.json', lines=True)
rail_stations_df = gpd.read_file('data/cleaned/RailStationsMerged.geojson')

In [51]:
bus_stops_df.head()

Unnamed: 0,BUS_STOP_N,BUS_ROOF_N,LOC_DESC,geometry
0,65059,B12,ST ANNE'S CH,POINT (103.9013 1.39303)
1,16171,B06,YUSOF ISHAK HSE,POINT (103.77437 1.29892)
2,61101,NIL,BLK 120,POINT (103.8637 1.33564)
3,1239,B01,SULTAN PLAZA,POINT (103.86165 1.30285)
4,17269,B01,BLK 730,POINT (103.76264 1.30492)


In [52]:
bus_routes_df.head()

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus
0,10,SBST,1,1,75009,0.0,500,2300,500,2300,500,2300
1,10,SBST,1,2,76059,0.6,502,2302,502,2302,502,2302
2,10,SBST,1,3,76069,1.1,504,2304,504,2304,503,2304
3,10,SBST,1,4,96289,2.3,508,2308,508,2309,507,2308
4,10,SBST,1,5,96109,2.7,509,2310,509,2311,508,2309


In [53]:
rail_stations_df.head()

Unnamed: 0,Name,Description,StationType,StationName,StationCode,StationLine,geometry
0,kml_105,<center><table><tr><th colspan='2' align='cent...,MRT,Admiralty,NS10,North-South,"POLYGON Z ((103.80013 1.44004 0, 103.80003 1.4..."
1,kml_154,<center><table><tr><th colspan='2' align='cent...,MRT,Aljunied,EW9,East-West,"POLYGON Z ((103.88373 1.31643 0, 103.88374 1.3..."
2,kml_1,<center><table><tr><th colspan='2' align='cent...,MRT,Ang Mo Kio,NS16,North-South,"POLYGON Z ((103.84988 1.36925 0, 103.84976 1.3..."
3,kml_163,<center><table><tr><th colspan='2' align='cent...,MRT,Aviation Park,CR2,Cross Island,"POLYGON Z ((104.0021 1.37095 0, 104.00237 1.37..."
4,kml_243,<center><table><tr><th colspan='2' align='cent...,MRT,Bahar Junction,JS7,Jurong Region,"POLYGON Z ((103.70444 1.34699 0, 103.70445 1.3..."


In [54]:
rail_stations_df.StationLine.unique()

array(['North-South', 'East-West', 'Cross Island', 'Jurong Region',
       'Sengkang LRT', 'Bukit Panjang LRT', 'Circle', 'Downtown',
       'Thomson East Coast', 'North-East', 'Punggol LRT'], dtype=object)

In [55]:
def get_line_color(stn_code):
    if pd.isna(stn_code):  # Handle NaN cases
        return 'gray'  # Default color for missing station code
    if stn_code.startswith('NS'):
        return 'lightred'
    elif stn_code.startswith('EW') or stn_code.startswith('CG'):
        return 'green'
    elif stn_code.startswith('NE'):
        return 'purple'
    elif stn_code.startswith('CC'):
        return 'orange'
    elif stn_code.startswith('DT'):
        return 'blue'
    elif stn_code.startswith('TE'):
        return 'darkred'
    elif stn_code.startswith('J'):
        return 'lightgreen'
    else:
        return 'gray' 

In [56]:
import folium
from matplotlib import cm
from matplotlib.colors import to_hex

PLOT_LOCATION = [1.3521, 103.8198]

# Define a list of named colors to cycle through for each bus route
bus_route_colors = [
    'blue', 'green', 'purple' , 'red', 'darkblue', 'darkgreen',
    'darkpurple', 'lightblue', 'lightgreen', 'lightred', 'pink', 'cadetblue', 
     'lightgray', 'black'
]

def plot_bus_routes_and_mrt_lines(bus_routes: list[str], mrt_lines: list[str], exclude_routes: list[str] = []):
    # Filter out excluded routes
    bus_routes = [route for route in bus_routes if route not in exclude_routes]

    # Set up the map centered on Singapore
    m = folium.Map(location=[1.3521, 103.8198], zoom_start=12)
    
    # Assign unique colors to each bus route from the predefined list, cycling if there are more routes than colors
    route_colors = {route: bus_route_colors[i % len(bus_route_colors)] for i, route in enumerate(bus_routes)}

    # Plot each bus route with unique colors
    for bus_route in bus_routes:
        chosen_bus_stops = bus_routes_df[bus_routes_df['ServiceNo'] == bus_route]['BusStopCode']
        chosen_bus_stops_geo = bus_stops_df[bus_stops_df['BUS_STOP_N'].isin(chosen_bus_stops)]
        
        print(f"Number of bus stops for route {bus_route}: {len(chosen_bus_stops_geo)}")
        route_color = route_colors[bus_route]

        # Plot bus stops for each route with different colors
        for _, stop in chosen_bus_stops_geo.iterrows():
            folium.Marker(
                [stop['geometry'].y, stop['geometry'].x], 
                tooltip=f"Bus Stop: {stop['LOC_DESC']} (Route: {bus_route})", 
                icon=folium.Icon(color=route_color)
            ).add_to(m)

    # Plot MRT stations for the specified MRT lines
    mrt_stations_geo = rail_stations_df[rail_stations_df['StationLine'].isin(mrt_lines)]
    
    print(f"Number of MRT stations: {len(mrt_stations_geo)}")
    for _, station in mrt_stations_geo.iterrows():
        tooltip = f"Station: {station['StationCode']} {station['StationName']}"
        
        if station['geometry'] is None:
            print(f"Station {station['StationCode']} has no geometry")
            continue
        elif station['geometry'].geom_type == 'Point':
            loc = [station['geometry'].y, station['geometry'].x]
        else:
            loc = station['geometry'].centroid

        # Use predefined colors for MRT lines based on station codes
        station_color = get_line_color(station['StationCode'])
        folium.Marker(
            [loc.y, loc.x], 
            tooltip=tooltip, 
            icon=folium.Icon(color=station_color)
        ).add_to(m)

    return m

### Bus 98A
This one is v sus

In [57]:
plot_bus_routes_and_mrt_lines([ "106", "10", "57", "30", "143"], ["Circle"])

Number of bus stops for route 106: 96
Number of bus stops for route 10: 146
Number of bus stops for route 57: 96
Number of bus stops for route 30: 150
Number of bus stops for route 143: 148
Number of MRT stations: 48
Station CC23 has no geometry


In [58]:
plot_bus_routes_and_mrt_lines(['179', "181",'199'], ["Jurong Region"])

Number of bus stops for route 179: 24
Number of bus stops for route 181: 17
Number of bus stops for route 199: 26
Number of MRT stations: 21
