Install Packages

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
import matplotlib.pyplot as plt
import json




In [2]:
data = pd.read_json("dsa4264-lta-geospatial/data/cleaned/BusRoutes.json", encoding="utf-8", lines=True, chunksize=1000)

# Combine chunks into a dataframe
df_list = []
for chunk in data:
    df_list.append(chunk)

# Concatenate all chunks into a single dataframe
bus_routes = pd.concat(df_list, ignore_index=True)

print(bus_routes.head())

  ServiceNo Operator  Direction  StopSequence  BusStopCode  Distance  \
0        10     SBST          1             1        75009       0.0   
1        10     SBST          1             2        76059       0.6   
2        10     SBST          1             3        76069       1.1   
3        10     SBST          1             4        96289       2.3   
4        10     SBST          1             5        96109       2.7   

  WD_FirstBus WD_LastBus SAT_FirstBus SAT_LastBus SUN_FirstBus SUN_LastBus  
0         500       2300         0500        2300         0500        2300  
1         502       2302         0502        2302         0502        2302  
2         504       2304         0504        2304         0503        2304  
3         508       2308         0508        2309         0507        2308  
4         509       2310         0509        2311         0508        2309  


In [3]:
# Load BusStops.geojson using GeoPandas
bus_stops = gpd.read_file('dsa4264-lta-geospatial/data/cleaned/BusStops.geojson')
print(bus_stops.head())


   BUS_STOP_N BUS_ROOF_N         LOC_DESC                   geometry
0       65059        B12     ST ANNE'S CH  POINT (103.90130 1.39303)
1       16171        B06  YUSOF ISHAK HSE  POINT (103.77437 1.29892)
2       61101        NIL          BLK 120  POINT (103.86370 1.33564)
3        1239        B01     SULTAN PLAZA  POINT (103.86165 1.30285)
4       17269        B01          BLK 730  POINT (103.76264 1.30492)


In [4]:
rail_line = gpd.read_file('dsa4264-lta-geospatial/data/cleaned/RailLines.geojson')
print(rail_line.head())

    Name                                        Description  GroundLevel  \
0  kml_1  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   
1  kml_2  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   
2  kml_3  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   
3  kml_4  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   
4  kml_5  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   

  RailType                                           geometry  
0      MRT  LINESTRING Z (103.73650 1.35301 0.00000, 103.7...  
1      MRT  LINESTRING Z (103.73708 1.35164 0.00000, 103.7...  
2      MRT  LINESTRING Z (103.71328 1.35252 0.00000, 103.7...  
3      MRT  LINESTRING Z (103.71320 1.35265 0.00000, 103.7...  
4      MRT  LINESTRING Z (103.70767 1.34888 0.00000, 103.7...  


In [66]:
rail_stn = gpd.read_file('dsa4264-lta-geospatial/data/cleaned/RailStationsMerged.geojson')
print(rail_stn.head())


    Name                                        Description StationType  \
0  kml_1  <center><table><tr><th colspan='2' align='cent...         MRT   
1  kml_2  <center><table><tr><th colspan='2' align='cent...         MRT   
2  kml_3  <center><table><tr><th colspan='2' align='cent...         LRT   
3  kml_4  <center><table><tr><th colspan='2' align='cent...         LRT   
4  kml_5  <center><table><tr><th colspan='2' align='cent...         LRT   

  StationName StationCode   StationLine  \
0  Ang Mo Kio        NS16   North-South   
1    Buangkok        NE15    North-East   
2       Bakau         SE3  Sengkang LRT   
3     Riviera         PE4   Punggol LRT   
4    Fernvale         SW5  Sengkang LRT   

                                            geometry  
0  POLYGON Z ((103.84988 1.36925 0.00000, 103.849...  
1  POLYGON Z ((103.89304 1.38166 0.00000, 103.892...  
2  POLYGON Z ((103.90538 1.38786 0.00000, 103.905...  
3  POLYGON Z ((103.91600 1.39444 0.00000, 103.916...  
4  POLYGON Z ((

In [5]:
## Step 2: Merge Bus Stop Coordinates with Bus Routes
# Ensure that BusStopCode is of the same type for both DataFrames (string)
bus_stops['BusStopCode'] = bus_stops['BUS_STOP_N'].astype(str)
bus_routes['BusStopCode'] = bus_routes['BusStopCode'].astype(str)

# Merge bus_routes with bus_stops to get geometry for each bus stop
merged_bus_data = bus_routes.merge(bus_stops[['BusStopCode', 'geometry']], on='BusStopCode', how='left')

# Drop rows without geometry
merged_bus_data = merged_bus_data.dropna(subset=['geometry'])

# Convert to GeoDataFrame
bus_routes_geo = gpd.GeoDataFrame(merged_bus_data, geometry='geometry')

In [6]:
### Set Coordinate Reference System (CRS)
bus_routes_geo.set_crs(epsg=4326, inplace=True)

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus,geometry
0,10,SBST,1,1,75009,0.0,500,2300,0500,2300,0500,2300,POINT (103.94364 1.35480)
1,10,SBST,1,2,76059,0.6,502,2302,0502,2302,0502,2302,POINT (103.94164 1.35303)
2,10,SBST,1,3,76069,1.1,504,2304,0504,2304,0503,2304,POINT (103.94210 1.34861)
3,10,SBST,1,4,96289,2.3,508,2308,0508,2309,0507,2308,POINT (103.94836 1.34007)
4,10,SBST,1,5,96109,2.7,509,2310,0509,2311,0508,2309,POINT (103.95069 1.33732)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25589,9B,SBST,1,25,95091,9.5,741,817,-,-,-,-,POINT (103.99968 1.38721)
25590,9B,SBST,1,26,95131,9.7,742,818,-,-,-,-,POINT (103.99977 1.38517)
25591,9B,SBST,1,27,95141,10.2,744,820,-,-,-,-,POINT (103.99816 1.38135)
25592,9B,SBST,1,28,95061,10.6,745,821,-,-,-,-,POINT (103.99649 1.37743)


In [8]:
## Step 3: Create LineStrings for Entire Bus Routes
# Group bus stops by ServiceNo and Direction and create LineStrings for each bus route
bus_routes_lines = (
    bus_routes_geo.groupby(['ServiceNo', 'Direction'])
    .apply(lambda x: LineString(x.sort_values('StopSequence')['geometry'].tolist()))
    .reset_index()
    .rename(columns={0: 'geometry'})
)
# Convert to GeoDataFrame
bus_routes_lines = gpd.GeoDataFrame(bus_routes_lines, geometry='geometry')

In [10]:
## Step 4: Buffer MRT Lines to Define Areas of Influence
rail_line_buffer = rail_line.buffer(800)


  


In [11]:
## Step 5: Identify Bus Routes that Overlap with MRT Lines
# Find bus routes that intersect with the MRT buffer
overlapping_bus_routes = bus_routes_lines[bus_routes_lines.intersects(rail_line_buffer.unary_union)]

### Calculate Overlap Length
overlapping_bus_routes['overlap_length'] = overlapping_bus_routes.intersection(rail_line_buffer.unary_union).length

In [12]:
## Step 6: Output Results
output_df = overlapping_bus_routes[['ServiceNo', 'Direction', 'overlap_length']]

# Display the output
print(output_df)

    ServiceNo  Direction  overlap_length
0          10          1        0.265364
1          10          2        0.265041
2         100          1        0.196837
3         100          2        0.190908
4        100A          1        0.039348
..        ...        ...             ...
717       992          1        0.016189
718       992          2        0.020850
719       993          1        0.066255
720        9A          1        0.062940
721        9B          1        0.094024

[722 rows x 3 columns]
