In [20]:
import geopandas as gpd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import csv
import shapely
from shapely.geometry import Point
import os 
import time

In [21]:
directory = os.getcwd()

os.chdir(directory)

hdbCentroids_df = pd.read_csv(r"../../data/Cluster_data/hdb_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])
mrt_stations_df = pd.read_csv(r"../../data/MRT/mrt_stations.csv",usecols = [1,2,3])

In [22]:
hdbCentroids_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275 entries, 0 to 274
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   275 non-null    float64
 1   Longitude  275 non-null    float64
dtypes: float64(2)
memory usage: 4.4 KB


In [23]:
#Plotting the Centroids of HDB Clusters on a Map

fig = go.Figure()

residential_centroid_trace = go.Scattermapbox(lat = hdbCentroids_df['Latitude'],
                                              lon =hdbCentroids_df['Longitude'],
                                              mode = 'markers',
                                              name = 'Residential Centroids'
                                              )
fig.add_trace(residential_centroid_trace)
# fig = px.scatter_mapbox(hdbCentroids_df, 
#                   lat="Latitude", 
#                   lon="Longitude", 
#                   zoom=10)

fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'style': "open-street-map",
        'center': {'lat': 1.36, 'lon': 103.85},
        'zoom': 10.55})

fig.show()

In [24]:
mrt_stations_df.info()

mrt_stations_df.sort_values(by='MRT Name', inplace=True)

mrt_stations_df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 215 entries, 0 to 214
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   MRT Name   215 non-null    object 
 1   Latitude   215 non-null    float64
 2   Longitude  215 non-null    float64
dtypes: float64(2), object(1)
memory usage: 5.2+ KB


Unnamed: 0,MRT Name,Latitude,Longitude
12,ADMIRALTY MRT STATION,1.440589,103.800991
9,ALJUNIED MRT STATION,1.316433,103.882906
54,ANG MO KIO MRT STATION,1.369429,103.849455
132,BAKAU LRT STATION,1.387994,103.905415
57,BANGKIT LRT STATION,1.380022,103.772647
...,...,...,...
186,WOODLANDS SOUTH MRT STATION,1.427488,103.792730
85,WOODLEIGH MRT STATION,1.339190,103.870818
82,YEW TEE MRT STATION,1.397298,103.747358
84,YIO CHU KANG MRT STATION,1.381499,103.845171


In [25]:
mrt_station_trace = go.Scattermapbox(lat = mrt_stations_df['Latitude'],
                                     lon =mrt_stations_df['Longitude'],
                                     mode = 'markers',
                                     hoverinfo = 'text',
                                     text = (mrt_stations_df['MRT Name'] + '<br>' +
                                             'Latitude: ' + mrt_stations_df['Latitude'].apply(lambda x: f"{x:.1f}").astype(str) + '<br>' +
                                             'Longitude: ' + mrt_stations_df['Longitude'].apply(lambda x: f"{x:.1f}").astype(str)),
                                     name = 'MRT Stations'
                                     )


fig.add_trace(mrt_station_trace)

In [26]:
import openrouteservice as ors

In [27]:
client = ors.Client(key='5b3ce3597851110001cf6248c9f909e4f51141df9bd906bea00e541a')

In [28]:
#create a psuedo index for my residential centroids df
hdbCentroids_df['index'] = hdbCentroids_df.index

hdbCentroids_df['index']

#create a psuedo index for my residential centroids df
mrt_stations_df['index'] = mrt_stations_df.index

mrt_stations_df['index']

##create a dummy variable to cross join on 
hdbCentroids_df['join_key'] = "A"
mrt_stations_df['join_key'] = "A"

#Remove a non existent train station called SUB STATION
mrt_stations_df = mrt_stations_df[~mrt_stations_df['MRT Name'].str.contains('SUB')]

#Cross join to obtain combinations of all possible pairings between MRTs and Residential Centroids
combined_df = pd.merge(hdbCentroids_df, mrt_stations_df, on='join_key')

print(combined_df)



       Latitude_x  Longitude_x  index_x join_key                     MRT Name  \
0        1.432477   103.791322        0        A        ADMIRALTY MRT STATION   
1        1.432477   103.791322        0        A         ALJUNIED MRT STATION   
2        1.432477   103.791322        0        A       ANG MO KIO MRT STATION   
3        1.432477   103.791322        0        A            BAKAU LRT STATION   
4        1.432477   103.791322        0        A          BANGKIT LRT STATION   
...           ...          ...      ...      ...                          ...   
58845    1.388649   103.901674      274        A  WOODLANDS SOUTH MRT STATION   
58846    1.388649   103.901674      274        A        WOODLEIGH MRT STATION   
58847    1.388649   103.901674      274        A          YEW TEE MRT STATION   
58848    1.388649   103.901674      274        A     YIO CHU KANG MRT STATION   
58849    1.388649   103.901674      274        A           YISHUN MRT STATION   

       Latitude_y  Longitud

In [29]:
#Defining a function that calculates the Euclidean Distance between two points using Haversine Method?
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in kilometers

    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arcsin(np.sqrt(a))

    distance = R * c
    return distance


#Apply function to dataframe and store distances in new column 'euclidean distance'
combined_df['euclidean_distance'] = haversine(combined_df['Latitude_x'], combined_df['Longitude_x'], combined_df['Latitude_y'], combined_df['Longitude_y'])

#Group by residential centroid, filter out the closest MRT by distance for each centroid into another dataframe and reset its index
result_df = combined_df.loc[combined_df.groupby('index_x')['euclidean_distance'].idxmin()].reset_index()

result_df

#Create a new column 'coordinate_pair' to store coordinate pairs to pass to openrouteservice API direction query
result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])


#create an empty column 'route' to later store query response
result_df['route'] = np.nan

In [30]:
# for i in range(len(result_df)):
#     print(i)
#     try:
#         #result_df['route'][i] = client.directions(result_df['coordinate_pair'][i], profile='cycling-regular', format='geojson', validate=False)
#         result_df['route'][i] = client.directions(result_df.loc[i,'coordinate_pair'], profile='cycling-regular', format='geojson', validate=False)
#     except Exception as e:
#         print(f"Error processing index {i}: {e}")
#         continue
#     time.sleep(2)

In [31]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275 entries, 0 to 274
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   index               275 non-null    int64  
 1   Latitude_x          275 non-null    float64
 2   Longitude_x         275 non-null    float64
 3   index_x             275 non-null    int64  
 4   join_key            275 non-null    object 
 5   MRT Name            275 non-null    object 
 6   Latitude_y          275 non-null    float64
 7   Longitude_y         275 non-null    float64
 8   index_y             275 non-null    int64  
 9   euclidean_distance  275 non-null    float64
 10  coordinate_pair     275 non-null    object 
 11  route               0 non-null      float64
dtypes: float64(6), int64(3), object(3)
memory usage: 25.9+ KB


In [32]:
def get_distance(route):
    try:
        return route['features'][0]['properties']['segments'][0]['distance']
    except (KeyError, IndexError,TypeError) as e:
        print(f"Error: {e}")
        return None

result_df['distance'] = result_df['route'].apply(get_distance)

def get_time(route):
    try:
        return route['features'][0]['properties']['segments'][0]['duration']
    except (KeyError, IndexError,TypeError) as e:
        print(f"Error: {e}")
        return None
    

result_df['duration'] = result_df['route'].apply(get_time)



Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'flo

In [33]:
# new_result_df = result_df. loc[:, result_df. columns != 'route']
# new_result_df.to_csv(r"C:\Users\leoqi\downloads\Centroid_MRT pairing data.csv")
# routes = result_df['route'].copy(deep = True)
# routes.to_json(r'C:\Users\leoqi\downloads\routes.json', orient='records')

In [34]:
result_df = pd.read_csv(r"../../data/Cluster_data/Centroid_MRT pairing data.csv")

import json

# Read the JSON file
with open(r'../../data/Cluster_data/routes.json', 'r') as f:
    geojson_list = json.load(f)

len(geojson_list)

result_df['route'] = pd.Series(geojson_list)

fig = go.Figure()


In [35]:
for i in range(len(result_df)):
    try:
        print(i)
        route_data = result_df.loc[i, 'route']
        route_coordinates = route_data['features'][0]['geometry']['coordinates']
        lons, lats = zip(*route_coordinates)
        fig.add_trace(go.Scattermapbox(
            mode="lines",
            lon=lons,
            lat=lats,
            marker={'size': 10},
            hoverinfo = 'text',
            text = ("Time taken:" + str(round(result_df['duration'][i]/60,2)) + " min" + '<br>' +
                    "Distance:" + str(round(result_df['distance'][i]/1000,3)) + " km"),
            showlegend= False,
        ))
    except Exception as e:
        print(f"Error processing index {i}: {e}")
        continue

fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'style': "open-street-map",
        'center': {'lat': 1.36, 'lon': 103.85},
        'zoom': 10.55})

fig.show()


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44


45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
Error processing index 65: 'NoneType' object is not subscriptable
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274


In [36]:
fig.add_trace(residential_centroid_trace)

In [37]:
#Group by MRT, calculate the average distance from MRT to each centroid into another dataframe and reset its index
df = result_df.groupby('MRT Name')['distance'].mean()
df = round(df/1000,3)
df = df.reset_index()

df

mrt_stations_df = pd.merge(mrt_stations_df, df, left_on='MRT Name',right_on = "MRT Name",how = "left")

mrt_stations_df

Unnamed: 0,MRT Name,Latitude,Longitude,index,join_key,distance
0,ADMIRALTY MRT STATION,1.440589,103.800991,12,A,1.003
1,ALJUNIED MRT STATION,1.316433,103.882906,9,A,1.068
2,ANG MO KIO MRT STATION,1.369429,103.849455,54,A,1.028
3,BAKAU LRT STATION,1.387994,103.905415,132,A,0.839
4,BANGKIT LRT STATION,1.380022,103.772647,57,A,0.583
...,...,...,...,...,...,...
209,WOODLANDS SOUTH MRT STATION,1.427488,103.792730,186,A,0.707
210,WOODLEIGH MRT STATION,1.339190,103.870818,85,A,0.907
211,YEW TEE MRT STATION,1.397298,103.747358,82,A,0.757
212,YIO CHU KANG MRT STATION,1.381499,103.845171,84,A,0.860


In [38]:
mrt_station_trace = go.Scattermapbox(lat = mrt_stations_df['Latitude'],
                                     lon =mrt_stations_df['Longitude'],
                                     mode = 'markers',
                                     hoverinfo = 'text',
                                     text = (mrt_stations_df['MRT Name'] + '<br>' +
                                             'Latitude: ' + mrt_stations_df['Latitude'].apply(lambda x: f"{x:.1f}").astype(str) + '<br>' +
                                             'Longitude: ' + mrt_stations_df['Longitude'].apply(lambda x: f"{x:.1f}").astype(str) + '<br>'+
                                             'average distance to centroids: ' + mrt_stations_df['distance'].astype(str) + ' km'),
                                     name = 'MRT Stations',
                                     marker_size = 10
                                     )

fig.add_trace(mrt_station_trace)

fig.update_traces(marker_size = 7,
                  selector=dict(name='Residential Centroids'))

fig.show()

In [39]:
hdbCentroids_df

Unnamed: 0,Latitude,Longitude,index,join_key
0,1.432477,103.791322,0,A
1,1.349428,103.875844,1,A
2,1.362619,103.745940,2,A
3,1.348037,103.945650,3,A
4,1.294382,103.810463,4,A
...,...,...,...,...
270,1.445612,103.821101,270,A
271,1.372317,103.890116,271,A
272,1.336826,103.921563,272,A
273,1.356861,103.750336,273,A


In [40]:
from dotenv import load_dotenv
import os
import requests
import json


load_dotenv()
one_map_email = os.getenv("ONE_MAP_EMAIL")
one_map_password = os.getenv("ONE_MAP_PASSWORD")
payload = {
        "email": one_map_email,
        "password": one_map_password
      }
api_key = requests.request("POST", "https://www.onemap.gov.sg/api/auth/post/getToken", json=payload)
api_key = api_key.json()["access_token"]




In [41]:
from pyonemap import OneMap

onemap = OneMap(api_key)

geocode = onemap.reverseGeocode.revGeoCode(hdbCentroids_df['Latitude'][0], hdbCentroids_df['Longitude'][0])

address = geocode['GeocodeInfo'][0]['BLOCK'] + " " + geocode['GeocodeInfo'][0]['ROAD']

address

'503A WOODLANDS DRIVE 14'

In [42]:
def get_centroid_name(row):
    geocode = onemap.reverseGeocode.revGeoCode(row['Latitude'], row['Longitude'])
    block = geocode['GeocodeInfo'][0]['BLOCK'] or ''
    road = geocode['GeocodeInfo'][0]['ROAD'] or ''
    return block + " " + road

hdbCentroids_df['centroid_name'] = hdbCentroids_df.apply(get_centroid_name, axis=1)

In [43]:
fig.update_traces(hoverinfo = 'text',
                  text = (hdbCentroids_df['centroid_name'] + '<br>' +
                          'Latitude: ' +hdbCentroids_df['Latitude'].apply(lambda x: f"{x:.1f}").astype(str) + '<br>' +
                          'Longitude: ' + hdbCentroids_df['Longitude'].apply(lambda x: f"{x:.1f}").astype(str) + '<br>'),
                    selector=dict(name='Residential Centroids'))

In [44]:
priv_cluster_centroids_df = pd.read_csv(r"../../data/Cluster_data/priv_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])

In [45]:
priv_cluster_centroids_df

Unnamed: 0,Latitude,Longitude
0,1.366960,103.877984
1,1.330386,103.793159
2,1.317931,103.926300
3,1.371333,103.830336
4,1.307455,103.834089
...,...,...
270,1.323496,103.907747
271,1.292659,103.839734
272,1.388156,103.860770
273,1.250562,103.845572


In [46]:
geocode = onemap.reverseGeocode.revGeoCode(priv_cluster_centroids_df['Latitude'][0], priv_cluster_centroids_df['Longitude'][0])

In [47]:
geocode

{'GeocodeInfo': [{'BUILDINGNAME': 'NOUVELLE PARK',
   'BLOCK': '133',
   'ROAD': 'POH HUAT ROAD WEST',
   'POSTALCODE': '546685',
   'XCOORD': '32956.0634954',
   'YCOORD': '38800.5855034',
   'LATITUDE': '1.3671728174749518',
   'LONGITUDE': '103.87785224302608'},
  {'BUILDINGNAME': 'PARRY PARK',
   'BLOCK': '29',
   'ROAD': 'PARRY TERRACE',
   'POSTALCODE': '547128',
   'XCOORD': '32975.2350244',
   'YCOORD': '38753.3253158',
   'LATITUDE': '1.3667454101163632',
   'LONGITUDE': '103.87802450455956'},
  {'BUILDINGNAME': 'PARRY PARK',
   'BLOCK': '31',
   'ROAD': 'PARRY TERRACE',
   'POSTALCODE': '547130',
   'XCOORD': '32980.9595477',
   'YCOORD': '38754.9478613',
   'LATITUDE': '1.3667600828705526',
   'LONGITUDE': '103.87807594361942'},
  {'BUILDINGNAME': 'PARRY PARK',
   'BLOCK': '25',
   'ROAD': 'PARRY TERRACE',
   'POSTALCODE': '547124',
   'XCOORD': '32962.3878695',
   'YCOORD': '38751.9975199',
   'LATITUDE': '1.3667334041665824',
   'LONGITUDE': '103.87790906379452'},
  {'BUIL

In [48]:
def get_centroid_name(row):
    geocode = onemap.reverseGeocode.revGeoCode(row['Latitude'], row['Longitude'])
    if geocode['GeocodeInfo'][0]['BUILDINGNAME'] != "NIL":
        return geocode['GeocodeInfo'][0]['BUILDINGNAME']
    else:
        if geocode['GeocodeInfo'][0]['BLOCK'] != "NIL":
            return geocode['GeocodeInfo'][0]['BLOCK'] + " " + geocode['GeocodeInfo'][0]['ROAD']
        else:
            return geocode['GeocodeInfo'][0]['ROAD']

priv_cluster_centroids_df['centroid_name'] = priv_cluster_centroids_df.apply(get_centroid_name, axis=1)

In [49]:
priv_cluster_centroids_df

Unnamed: 0,Latitude,Longitude,centroid_name
0,1.366960,103.877984,NOUVELLE PARK
1,1.330386,103.793159,45 LILY AVENUE
2,1.317931,103.926300,OPERA ESTATE
3,1.371333,103.830336,SEMBAWANG HILLS ESTATE
4,1.307455,103.834089,FAR EAST PLAZA
...,...,...,...
270,1.323496,103.907747,KEMBANGAN ESTATE
271,1.292659,103.839734,GAMBIER COURT
272,1.388156,103.860770,CAMELIA PARK
273,1.250562,103.845572,CORAL ISLAND


In [50]:
private_centroid_trace = go.Scattermapbox(lat = priv_cluster_centroids_df['Latitude'],
                                              lon = priv_cluster_centroids_df['Longitude'],
                                              mode = 'markers',
                                              name = 'Private Centroids',
                                              hoverinfo= 'text',
                                              text = (priv_cluster_centroids_df['centroid_name'] + '<br>' +
                                                      'Latitude: ' + priv_cluster_centroids_df['Latitude'].apply(lambda x: f"{x:.1f}").astype(str) + '<br>' +
                                                      'Longitude: ' + priv_cluster_centroids_df['Longitude'].apply(lambda x: f"{x:.1f}").astype(str))
                                              )

fig.add_trace(private_centroid_trace)

In [56]:
mrt_stations_df2 = pd.read_csv(r"../../data/MRT/mrt_stations.csv",usecols = [1,2,3])

mrt_stations_df2.sort_values(by='MRT Name', inplace=True)

#Remove a non existent train station called SUB STATION
mrt_stations_df2 = mrt_stations_df2[~mrt_stations_df2['MRT Name'].str.contains('SUB')]


In [57]:
#create a psuedo index for my residential centroids df
priv_cluster_centroids_df['index'] = priv_cluster_centroids_df.index

priv_cluster_centroids_df['index']

#create a psuedo index for my residential centroids df
mrt_stations_df2['index'] = mrt_stations_df2.index

mrt_stations_df2['index']

##create a dummy variable to cross join on 
priv_cluster_centroids_df['join_key'] = "A"
mrt_stations_df2['join_key'] = "A"

#Cross join to obtain combinations of all possible pairings between MRTs and Residential Centroids
combined_df = pd.merge(priv_cluster_centroids_df, mrt_stations_df2, on='join_key')

print(combined_df)

       Latitude_x  Longitude_x       centroid_name  index_x join_key  \
0        1.366960   103.877984       NOUVELLE PARK        0        A   
1        1.366960   103.877984       NOUVELLE PARK        0        A   
2        1.366960   103.877984       NOUVELLE PARK        0        A   
3        1.366960   103.877984       NOUVELLE PARK        0        A   
4        1.366960   103.877984       NOUVELLE PARK        0        A   
...           ...          ...                 ...      ...      ...   
58845    1.315369   103.792807  OEI TIONG HAM PARK      274        A   
58846    1.315369   103.792807  OEI TIONG HAM PARK      274        A   
58847    1.315369   103.792807  OEI TIONG HAM PARK      274        A   
58848    1.315369   103.792807  OEI TIONG HAM PARK      274        A   
58849    1.315369   103.792807  OEI TIONG HAM PARK      274        A   

                          MRT Name  Latitude_y  Longitude_y  index_y  
0            ADMIRALTY MRT STATION    1.440589   103.800991     

In [60]:
combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58850 entries, 0 to 58849
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Latitude_x          58850 non-null  float64
 1   Longitude_x         58850 non-null  float64
 2   centroid_name       58850 non-null  object 
 3   index_x             58850 non-null  int64  
 4   join_key            58850 non-null  object 
 5   MRT Name            58850 non-null  object 
 6   Latitude_y          58850 non-null  float64
 7   Longitude_y         58850 non-null  float64
 8   index_y             58850 non-null  int64  
 9   euclidean_distance  58850 non-null  float64
dtypes: float64(5), int64(2), object(3)
memory usage: 4.5+ MB


In [58]:
#Apply function to dataframe and store distances in new column 'euclidean distance'
combined_df['euclidean_distance'] = haversine(combined_df['Latitude_x'], combined_df['Longitude_x'], combined_df['Latitude_y'], combined_df['Longitude_y'])

#Group by residential centroid, filter out the closest MRT by distance for each centroid into another dataframe and reset its index
result_df = combined_df.loc[combined_df.groupby('index_x')['euclidean_distance'].idxmin()].reset_index()

result_df

#Create a new column 'coordinate_pair' to store coordinate pairs to pass to openrouteservice API direction query
result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])


#create an empty column 'route' to later store query response
result_df['route'] = np.nan

In [59]:
result_df

Unnamed: 0,index,Latitude_x,Longitude_x,centroid_name,index_x,join_key,MRT Name,Latitude_y,Longitude_y,index_y,euclidean_distance,coordinate_pair,route
0,96,1.366960,103.877984,NOUVELLE PARK,0,A,KOVAN MRT STATION,1.360179,103.885065,153,1.090019,"[[103.87798366999664, 1.3669599975831537], [10...",
1,387,1.330386,103.793159,45 LILY AVENUE,1,A,SIXTH AVENUE MRT STATION,1.330858,103.796907,27,0.419957,"[[103.7931586230639, 1.3303860408530015], [103...",
2,519,1.317931,103.926300,OPERA ESTATE,2,A,KEMBANGAN MRT STATION,1.321038,103.912948,13,1.523988,"[[103.92630025524096, 1.3179313391052505], [10...",
3,762,1.371333,103.830336,SEMBAWANG HILLS ESTATE,3,A,MAYFLOWER MRT STATION,1.371463,103.836568,214,0.692907,"[[103.83033586819448, 1.371333423237411], [103...",
4,981,1.307455,103.834089,FAR EAST PLAZA,4,A,NEWTON MRT STATION,1.312319,103.837985,107,0.692850,"[[103.83408949483868, 1.3074546745785451], [10...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...
270,57871,1.323496,103.907747,KEMBANGAN ESTATE,270,A,KEMBANGAN MRT STATION,1.321038,103.912948,13,0.639467,"[[103.907747221298, 1.3234956754949063], [103....",
271,58063,1.292659,103.839734,GAMBIER COURT,271,A,FORT CANNING MRT STATION,1.292482,103.844331,166,0.511489,"[[103.83973365789451, 1.2926587246321704], [10...",
272,58276,1.388156,103.860770,CAMELIA PARK,272,A,FERNVALE LRT STATION,1.391886,103.876309,128,1.776391,"[[103.86077001200324, 1.3881563822047092], [10...",
273,58611,1.250562,103.845572,CORAL ISLAND,273,A,TANJONG PAGAR MRT STATION,1.276568,103.846007,209,2.892130,"[[103.84557213443625, 1.2505623711772464], [10...",


In [61]:
# for i in range(len(result_df)):
#     print(i)
#     try:
#         #result_df['route'][i] = client.directions(result_df['coordinate_pair'][i], profile='cycling-regular', format='geojson', validate=False)
#         result_df['route'][i] = client.directions(result_df.loc[i,'coordinate_pair'], profile='cycling-regular', format='geojson', validate=False)
#     except Exception as e:
#         print(f"Error processing index {i}: {e}")
#         continue
#     time.sleep(2)

0



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
Error processing index 128: 404 ({'error': {'code': 2010, 'message': 'Could not find routable point within a radius of 350.0 meters of specified coordinate 1: 103.9878836 1.3574790.'}, 'info': {'engine': {'build_date': '2024-01-29T14:41:12Z', 'version': '7.1.1'}, 'timestamp': 1711210100494}})
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
20

In [64]:
result_df['distance'] = result_df['route'].apply(get_distance)
result_df['duration'] = result_df['route'].apply(get_time)

result_df['distance']
result_df['duration']

Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable


0      389.1
1      265.7
2      413.2
3      231.3
4      222.9
       ...  
270    208.5
271    243.3
272    544.9
273      NaN
274    289.9
Name: duration, Length: 275, dtype: float64

In [65]:
# new_result_df2 = result_df. loc[:, result_df. columns != 'route']
# new_result_df2.to_csv(r"C:\Users\leoqi\downloads\PCentroid_MRT pairing data.csv")
# proutes = result_df['route'].copy(deep = True)
# proutes.to_json(r'C:\Users\leoqi\downloads\proutes.json', orient='records')

In [66]:
for i in range(len(result_df)):
    try:
        print(i)
        route_data = result_df.loc[i, 'route']
        route_coordinates = route_data['features'][0]['geometry']['coordinates']
        lons, lats = zip(*route_coordinates)
        fig.add_trace(go.Scattermapbox(
            mode="lines",
            lon=lons,
            lat=lats,
            marker={'size': 10},
            hoverinfo = 'text',
            text = ("Time taken:" + str(round(result_df['duration'][i]/60,2)) + " min" + '<br>' +
                    "Distance:" + str(round(result_df['distance'][i]/1000,3)) + " km"),
            showlegend= False,
        ))
    except Exception as e:
        print(f"Error processing index {i}: {e}")
        continue

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
Error processing index 128: 'float' object is not subscriptable
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
26

In [67]:
fig.show()