Install Packages

In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString
import matplotlib.pyplot as plt
import json
import io
import subprocess




In [2]:
# Get file from the branch general-exploration
BusRoutes = subprocess.run(
    ["git", "show", "general-exploration:data/cleaned/BusRoutes.json"],
    capture_output=True,
    text=True,
    check=True
)

data = pd.read_json(io.StringIO(BusRoutes.stdout), encoding="utf-8", lines=True, chunksize=1000)
# data = pd.read_json("dsa4264-lta-geospatial/data/cleaned/BusRoutes.json", encoding="utf-8", lines=True, chunksize=1000)

# Combine chunks into a dataframe
df_list = []
for chunk in data:
    df_list.append(chunk)

# Concatenate all chunks into a single dataframe
bus_routes = pd.concat(df_list, ignore_index=True)

print(bus_routes.head())

  ServiceNo Operator  Direction  StopSequence  BusStopCode  Distance  \
0        10     SBST          1             1        75009       0.0   
1        10     SBST          1             2        76059       0.6   
2        10     SBST          1             3        76069       1.1   
3        10     SBST          1             4        96289       2.3   
4        10     SBST          1             5        96109       2.7   

  WD_FirstBus WD_LastBus SAT_FirstBus SAT_LastBus SUN_FirstBus SUN_LastBus  
0         500       2300         0500        2300         0500        2300  
1         502       2302         0502        2302         0502        2302  
2         504       2304         0504        2304         0503        2304  
3         508       2308         0508        2309         0507        2308  
4         509       2310         0509        2311         0508        2309  


In [3]:
# Get file from the branch general-exploration
BusStops = subprocess.run(
    ["git", "show", "general-exploration:data/cleaned/BusStops.geojson"],
    capture_output=True,
    text=True,
    check=True
)

bus_stops = gpd.read_file(io.StringIO(BusStops.stdout)).to_crs(3857)

# Load BusStops.geojson using GeoPandas
# bus_stops = gpd.read_file('dsa4264-lta-geospatial/data/cleaned/BusStops.geojson')
print(bus_stops.head())


   BUS_STOP_N BUS_ROOF_N         LOC_DESC                         geometry
0       65059        B12     ST ANNE'S CH  POINT (11566240.032 155086.389)
1       16171        B06  YUSOF ISHAK HSE  POINT (11552110.389 144607.564)
2       61101        NIL          BLK 120   POINT (11562054.12 148695.893)
3        1239        B01     SULTAN PLAZA  POINT (11561825.506 145045.114)
4       17269        B01          BLK 730  POINT (11550804.326 145275.915)


In [4]:
# Get file from the branch general-exploration
RailLine = subprocess.run(
    ["git", "show", "general-exploration:data/cleaned/RailLines.geojson"],
    capture_output=True,
    text=True,
    check=True
)

rail_line = gpd.read_file(io.StringIO(RailLine.stdout))
rail_line = rail_line.to_crs(3857)
# rail_line = gpd.read_file('dsa4264-lta-geospatial/data/cleaned/RailLines.geojson')
print(rail_line.head())

    Name                                        Description  GroundLevel  \
0  kml_1  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   
1  kml_2  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   
2  kml_3  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   
3  kml_4  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   
4  kml_5  <center><table><tr><th colspan='2' align='cent...  ABOVEGROUND   

  RailType                                           geometry  
0      MRT  LINESTRING Z (11547894.417 150630.303 0, 11547...  
1      MRT  LINESTRING Z (11547959.257 150478.363 0, 11548...  
2      MRT  LINESTRING Z (11545309.779 150576.259 0, 11545...  
3      MRT  LINESTRING Z (11545300.817 150589.927 0, 11545...  
4      MRT  LINESTRING Z (11544684.799 150170.778 0, 11544...  


In [5]:
# Get file from the branch general-exploration
RailStation = subprocess.run(
    ["git", "show", "general-exploration:data/cleaned/RailStationsMerged.geojson"],
    capture_output=True,
    text=True,
    check=True
)

rail_stn = gpd.read_file(io.StringIO(RailStation.stdout)).to_crs(3857)
# rail_stn = gpd.read_file('dsa4264-lta-geospatial/data/cleaned/RailStationsMerged.geojson')
print(rail_stn.head())


    Name                                        Description StationType  \
0  kml_1  <center><table><tr><th colspan='2' align='cent...         MRT   
1  kml_2  <center><table><tr><th colspan='2' align='cent...         MRT   
2  kml_3  <center><table><tr><th colspan='2' align='cent...         LRT   
3  kml_4  <center><table><tr><th colspan='2' align='cent...         LRT   
4  kml_5  <center><table><tr><th colspan='2' align='cent...         LRT   

  StationName StationCode   StationLine  \
0  Ang Mo Kio        NS16   North-South   
1    Buangkok        NE15    North-East   
2       Bakau         SE3  Sengkang LRT   
3     Riviera         PE4   Punggol LRT   
4    Fernvale         SW5  Sengkang LRT   

                                            geometry  
0  POLYGON Z ((11560516.161 152438.322 0, 1156050...  
1  POLYGON Z ((11565320.289 153820.46 0, 11565296...  
2  POLYGON Z ((11566694.327 154511.376 0, 1156668...  
3  POLYGON Z ((11567875.886 155243.313 0, 1156791...  
4  POLYGON Z ((

In [6]:
## Step 2: Merge Bus Stop Coordinates with Bus Routes
# Ensure that BusStopCode is of the same type for both DataFrames (string)
bus_stops['BusStopCode'] = bus_stops['BUS_STOP_N'].astype(str)
bus_routes['BusStopCode'] = bus_routes['BusStopCode'].astype(str)

# Merge bus_routes with bus_stops to get geometry for each bus stop
merged_bus_data = bus_routes.merge(bus_stops[['BusStopCode', 'geometry']], on='BusStopCode', how='left')

# Drop rows without geometry
merged_bus_data = merged_bus_data.dropna(subset=['geometry'])

# Convert to GeoDataFrame
bus_routes_geo = gpd.GeoDataFrame(merged_bus_data, geometry='geometry')
bus_routes_geo.head()

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus,geometry
0,10,SBST,1,1,75009,0.0,500,2300,500,2300,500,2300,POINT (11570953.542 150829.913)
1,10,SBST,1,2,76059,0.6,502,2302,502,2302,502,2302,POINT (11570730.671 150633.094)
2,10,SBST,1,3,76069,1.1,504,2304,504,2304,503,2304,POINT (11570781.667 150140.182)
3,10,SBST,1,4,96289,2.3,508,2308,508,2309,507,2308,POINT (11571478.676 149189.976)
4,10,SBST,1,5,96109,2.7,509,2310,509,2311,508,2309,POINT (11571737.431 148883.559)


In [7]:
### Set Coordinate Reference System (CRS)
bus_routes_geo.to_crs(3857)
# bus_routes_geo.set_crs(epsg=4326, inplace=True)

Unnamed: 0,ServiceNo,Operator,Direction,StopSequence,BusStopCode,Distance,WD_FirstBus,WD_LastBus,SAT_FirstBus,SAT_LastBus,SUN_FirstBus,SUN_LastBus,geometry
0,10,SBST,1,1,75009,0.0,500,2300,0500,2300,0500,2300,POINT (11570953.542 150829.913)
1,10,SBST,1,2,76059,0.6,502,2302,0502,2302,0502,2302,POINT (11570730.671 150633.094)
2,10,SBST,1,3,76069,1.1,504,2304,0504,2304,0503,2304,POINT (11570781.667 150140.182)
3,10,SBST,1,4,96289,2.3,508,2308,0508,2309,0507,2308,POINT (11571478.676 149189.976)
4,10,SBST,1,5,96109,2.7,509,2310,0509,2311,0508,2309,POINT (11571737.431 148883.559)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25589,9B,SBST,1,25,95091,9.5,741,817,-,-,-,-,POINT (11577191.56 154438.931)
25590,9B,SBST,1,26,95131,9.7,742,818,-,-,-,-,POINT (11577201.068 154211.148)
25591,9B,SBST,1,27,95141,10.2,744,820,-,-,-,-,POINT (11577021.87 153786.577)
25592,9B,SBST,1,28,95061,10.6,745,821,-,-,-,-,POINT (11576836.446 153349.234)


In [8]:
## Step 3: Create LineStrings for Entire Bus Routes
# Group bus stops by ServiceNo and Direction and create LineStrings for each bus route
bus_routes_lines = (
    bus_routes_geo.groupby(['ServiceNo', 'Direction'])
    .apply(lambda x: LineString(x.sort_values('StopSequence')['geometry'].tolist()))
    .reset_index()
    .rename(columns={0: 'geometry'})
)
# Convert to GeoDataFrame
bus_routes_lines = gpd.GeoDataFrame(bus_routes_lines, geometry='geometry', crs = "EPSG:3857")
bus_routes_lines

  bus_routes_geo.groupby(['ServiceNo', 'Direction'])


Unnamed: 0,ServiceNo,Direction,geometry
0,10,1,"LINESTRING (11570953.542 150829.913, 11570730...."
1,10,2,"LINESTRING (11551606.395 144079.537, 11551529...."
2,100,1,"LINESTRING (11562949.156 150346.323, 11562981...."
3,100,2,"LINESTRING (11553772.221 145957.517, 11553771...."
4,100A,1,"LINESTRING (11562949.156 150346.323, 11562981...."
...,...,...,...
717,992,1,"LINESTRING (11549515.54 150324.826, 11549377.6..."
718,992,2,"LINESTRING (11547899.694 150954.062, 11548141...."
719,993,1,"LINESTRING (11548491.077 148423.92, 11548156.4..."
720,9A,1,"LINESTRING (11569396.253 147495.238, 11569449...."


In [31]:
bus_routes_lines['buffer'] = bus_routes_lines.geometry.buffer(500)
bus_routes_lines['area'] = bus_routes_lines['buffer'].area
bus_routes_buffer = bus_routes_lines.drop(columns={'geometry', 'area'}).rename(columns={'buffer':'geometry'})
bus_routes_buffer

Unnamed: 0,ServiceNo,Direction,geometry
0,10,1,"POLYGON ((11552097.191 144173.432, 11552153.59..."
1,10,2,"POLYGON ((11552093.14 144191.336, 11552124.357..."
2,100,1,"POLYGON ((11553792.797 146466.948, 11553832.33..."
3,100,2,"POLYGON ((11553272.837 145930.669, 11553271.59..."
4,100A,1,"POLYGON ((11563364.696 149390.713, 11563372.70..."
...,...,...,...
717,992,1,"POLYGON ((11548880.711 151112.927, 11549149.31..."
718,992,2,"POLYGON ((11547734.91 151583.528, 11547764.632..."
719,993,1,"POLYGON ((11548370.186 149449.918, 11548438.32..."
720,9A,1,"POLYGON ((11569605.778 147948.574, 11569909.74..."


In [22]:
## Step 4: Buffer MRT Lines to Define Areas of Influence
rail_line_buffer = rail_line.to_crs(3857).buffer(100)
rail_line_buffer.head()


0    POLYGON ((11547797.949 150603.733, 11547796.79...
1    POLYGON ((11548121.352 150295.625, 11548122.35...
2    POLYGON ((11545361.474 150490.652, 11545361.38...
3    POLYGON ((11545360.251 150509.475, 11545358.64...
4    POLYGON ((11544406.903 150100.902, 11544417.34...
dtype: geometry

In [37]:
## Step 5: Identify Bus Routes that Overlap with MRT Lines
# Find bus routes that intersect with the MRT buffer
overlapping_bus_routes = bus_routes_buffer[bus_routes_buffer.intersects(rail_line_buffer.union_all())]

### Calculate Overlap Length
overlapping_bus_routes['overlap_area(sq metres)'] = overlapping_bus_routes.intersection(rail_line_buffer.union_all()).length
overlapping_bus_routes['pct_overlap'] = (overlapping_bus_routes['overlap_area(sq metres)']/(overlapping_bus_routes['geometry'].area))*100
overlapping_bus_routes.head()

Unnamed: 0,ServiceNo,Direction,geometry,overlap_area(sq metres),pct_overlap
0,10,1,"POLYGON ((11552097.191 144173.432, 11552153.59...",73776.725949,0.245373
1,10,2,"POLYGON ((11552093.14 144191.336, 11552124.357...",73917.256872,0.24616
2,100,1,"POLYGON ((11553792.797 146466.948, 11553832.33...",59761.241469,0.267666
3,100,2,"POLYGON ((11553272.837 145930.669, 11553271.59...",58890.63708,0.268883
4,100A,1,"POLYGON ((11563364.696 149390.713, 11563372.70...",12827.297585,0.249676


In [40]:
## Step 6: Output Results
output_df = overlapping_bus_routes[['ServiceNo', 'Direction', 'overlap_area(sq metres)', 'pct_overlap']]
output_df.sort_values(by=['pct_overlap'], ascending=False, inplace=True)

# Display the output
print(output_df)

    ServiceNo  Direction  overlap_area(sq metres)  pct_overlap
682      973A          1              8720.933342     0.434653
541       84A          1              7456.237675     0.434629
354       372          1             14410.702248     0.418921
610       902          1              5374.287303     0.401229
17        107          1             51453.438892     0.365421
..        ...        ...                      ...          ...
277       253          1              2484.677277     0.046195
269       248          1              5533.704033     0.045913
270      248M          1              5533.704033     0.042856
280       257          1              2482.853526     0.032739
294       272          1               516.434699     0.021018

[722 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  output_df.sort_values(by=['pct_overlap'], ascending=False, inplace=True)
