In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import csv
import shapely
from shapely.geometry import Point
import os 
import time

In [6]:
directory = os.getcwd()

os.chdir(directory)

hdbCentroids_df = pd.read_csv(r"..\data\hdb_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])
mrt_stations_df = pd.read_csv(r"..\data\mrt_stations.csv",usecols = [1,2,3])

In [4]:
hdbCentroids_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275 entries, 0 to 274
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   275 non-null    float64
 1   Longitude  275 non-null    float64
dtypes: float64(2)
memory usage: 4.4 KB


In [7]:
#Plotting the Centroids of HDB Clusters on a Map

fig = go.Figure()
fig.add_trace(
    go.Scattermapbox(lat = hdbCentroids_df['Latitude'],
                     lon =hdbCentroids_df['Longitude'],
                     mode = 'markers'
                     )
)
# fig = px.scatter_mapbox(hdbCentroids_df, 
#                   lat="Latitude", 
#                   lon="Longitude", 
#                   zoom=10)

fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'style': "open-street-map",
        'center': {'lat': 1.36, 'lon': 103.85},
        'zoom': 10.55})

fig.show()

In [6]:
mrt_stations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 215 entries, 0 to 214
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   MRT Name   215 non-null    object 
 1   Latitude   215 non-null    float64
 2   Longitude  215 non-null    float64
dtypes: float64(2), object(1)
memory usage: 5.2+ KB


In [8]:
fig.add_trace(
    go.Scattermapbox(lat = mrt_stations_df['Latitude'],
                     lon =mrt_stations_df['Longitude'],
                     mode = 'markers',
                     hoverinfo = 'text',
                     text = (mrt_stations_df['MRT Name'] + '<br>' +
                    'Latitude: ' + mrt_stations_df['Latitude'].apply(lambda x: f"{x:.1f}").astype(str) + '<br>' +
                    'Longitude: ' + mrt_stations_df['Longitude'].apply(lambda x: f"{x:.1f}").astype(str)),
                     name = 'MRT Stations'
                     )
)

In [9]:
import openrouteservice as ors

In [10]:
client = ors.Client(key='5b3ce3597851110001cf6248c9f909e4f51141df9bd906bea00e541a')

In [11]:
#create a psuedo index for my residential centroids df
hdbCentroids_df['index'] = hdbCentroids_df.index

hdbCentroids_df['index']

#create a psuedo index for my residential centroids df
mrt_stations_df['index'] = mrt_stations_df.index

mrt_stations_df['index']

##create a dummy variable to cross join on 
hdbCentroids_df['join_key'] = "A"
mrt_stations_df['join_key'] = "A"

#Remove a non existent train station called SUB STATION
mrt_stations_df = mrt_stations_df[~mrt_stations_df['MRT Name'].str.contains('SUB')]

#Cross join to obtain combinations of all possible pairings between MRTs and Residential Centroids
combined_df = pd.merge(hdbCentroids_df, mrt_stations_df, on='join_key')

print(combined_df)



       Latitude_x  Longitude_x  index_x join_key                   MRT Name  \
0        1.432477   103.791322        0        A      ESPLANADE MRT STATION   
1        1.432477   103.791322        0        A     PAYA LEBAR MRT STATION   
2        1.432477   103.791322        0        A    DHOBY GHAUT MRT STATION   
3        1.432477   103.791322        0        A         DAKOTA MRT STATION   
4        1.432477   103.791322        0        A       LAVENDER MRT STATION   
...           ...          ...      ...      ...                        ...   
58845    1.388649   103.901674      274        A    TANAH MERAH MRT STATION   
58846    1.388649   103.901674      274        A    TAN KAH KEE MRT STATION   
58847    1.388649   103.901674      274        A  UPPER THOMSON MRT STATION   
58848    1.388649   103.901674      274        A      CALDECOTT MRT STATION   
58849    1.388649   103.901674      274        A      MAYFLOWER MRT STATION   

       Latitude_y  Longitude_y  index_y  
0        

In [12]:
#Defining a function that calculates the Euclidean Distance between two points using Haversine Method?
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in kilometers

    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arcsin(np.sqrt(a))

    distance = R * c
    return distance


#Apply function to dataframe and store distances in new column 'euclidean distance'
combined_df['euclidean_distance'] = haversine(combined_df['Latitude_x'], combined_df['Longitude_x'], combined_df['Latitude_y'], combined_df['Longitude_y'])

#reset index
result_df = combined_df.loc[combined_df.groupby('index_x')['euclidean_distance'].idxmin()].reset_index()

result_df

#Create a new column 'coordinate_pair' to store coordinate pairs to pass to openrouteservice API direction query
result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])


#create an empty column 'route' to later store query response
result_df['route'] = np.nan

In [12]:
for i in range(len(result_df)):
    print(i)
    try:
        #result_df['route'][i] = client.directions(result_df['coordinate_pair'][i], profile='cycling-regular', format='geojson', validate=False)
        result_df['route'][i] = client.directions(result_df.loc[i,'coordinate_pair'], profile='cycling-regular', format='geojson', validate=False)
    except Exception as e:
        print(f"Error processing index {i}: {e}")
        continue
    time.sleep(2)

0



ChainedAssignmentError: behaviour will change in pandas 3.0!
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
Error processing index 65: 404 ({'error': {'code': 2010, 'message': 'Could not find routable point within a radius of 350.0 meters of specified coordinate 1: 103.9878836 1.3574790.'}, 'info': {'engine': {'build_date': '2024-01-29T14:41:12Z', 'version': '7.1.1'}, 'timestamp': 1711000885745}})
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204


Rate limit exceeded. Retrying for the 1st time.


Rate limit exceeded. Retrying for the 2nd time.


Rate limit exceeded. Retrying for the 3rd time.


Rate limit exceeded. Retrying for the 4th time.


Rate limit exceeded. Retrying for the 5th time.



Error processing index 248: 403 ({'error': 'Quota exceeded'})
249
Error processing index 249: 403 ({'error': 'Quota exceeded'})
250
Error processing index 250: 403 ({'error': 'Quota exceeded'})
251
Error processing index 251: 403 ({'error': 'Quota exceeded'})
252
Error processing index 252: 403 ({'error': 'Quota exceeded'})
253
Error processing index 253: 403 ({'error': 'Quota exceeded'})
254
Error processing index 254: 403 ({'error': 'Quota exceeded'})
255
Error processing index 255: 403 ({'error': 'Quota exceeded'})
256
Error processing index 256: 403 ({'error': 'Quota exceeded'})
257
Error processing index 257: 403 ({'error': 'Quota exceeded'})
258
Error processing index 258: 403 ({'error': 'Quota exceeded'})
259
Error processing index 259: 403 ({'error': 'Quota exceeded'})
260
Error processing index 260: 403 ({'error': 'Quota exceeded'})
261
Error processing index 261: 403 ({'error': 'Quota exceeded'})
262
Error processing index 262: 403 ({'error': 'Quota exceeded'})
263
Error proc


Rate limit exceeded. Retrying for the 6th time.



Error processing index 271: 403 ({'error': 'Quota exceeded'})
272
Error processing index 272: 403 ({'error': 'Quota exceeded'})
273
Error processing index 273: 403 ({'error': 'Quota exceeded'})
274
Error processing index 274: 403 ({'error': 'Quota exceeded'})


In [21]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275 entries, 0 to 274
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Unnamed: 0          275 non-null    int64  
 1   index               275 non-null    int64  
 2   Latitude_x          275 non-null    float64
 3   Longitude_x         275 non-null    float64
 4   index_x             275 non-null    int64  
 5   join_key            275 non-null    object 
 6   MRT Name            275 non-null    object 
 7   Latitude_y          275 non-null    float64
 8   Longitude_y         275 non-null    float64
 9   index_y             275 non-null    int64  
 10  euclidean_distance  275 non-null    float64
 11  coordinate_pair     275 non-null    object 
 12  distance            274 non-null    float64
 13  duration            274 non-null    float64
dtypes: float64(7), int64(4), object(3)
memory usage: 30.2+ KB


In [13]:
def get_distance(route):
    try:
        return route['features'][0]['properties']['segments'][0]['distance']
    except (KeyError, IndexError,TypeError) as e:
        print(f"Error: {e}")
        return None

result_df['distance'] = result_df['route'].apply(get_distance)

def get_time(route):
    try:
        return route['features'][0]['properties']['segments'][0]['duration']
    except (KeyError, IndexError,TypeError) as e:
        print(f"Error: {e}")
        return None
    

result_df['duration'] = result_df['route'].apply(get_time)



Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'float' object is not subscriptable
Error: 'flo

In [5]:
new_result_df = result_df. loc[:, result_df. columns != 'route']
new_result_df.to_csv(r"C:\Users\leoqi\downloads\result.csv")
routes = result_df['route'].copy(deep = True)
routes.to_json(r'C:\Users\leoqi\downloads\routes.json', orient='records')

In [14]:
result_df = pd.read_csv(r"C:\Users\leoqi\downloads\result.csv")

import json

# Read the JSON file
with open(r'C:\Users\leoqi\downloads\routes.json', 'r') as f:
    geojson_list = json.load(f)

len(geojson_list)

result_df['route'] = pd.Series(geojson_list)


In [15]:
for i in range(len(result_df)):
    try:
        print(i)
        route_data = result_df.loc[i, 'route']
        route_coordinates = route_data['features'][0]['geometry']['coordinates']
        lons, lats = zip(*route_coordinates)
        fig.add_trace(go.Scattermapbox(
            mode="lines",
            lon=lons,
            lat=lats,
            marker={'size': 10},
            hoverinfo = 'text',
                     text = "Time taken:" + str(round(result_df['duration'][i]/60,2)) + " min" + '<br>' +
                    "Distance:" + str(round(result_df['distance'][i]/1000,3)) + " km"
        ))
    except Exception as e:
        print(f"Error processing index {i}: {e}")
        continue


fig.show()


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
Error processing index 65: 'NoneType' object is not subscriptable
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260


In [15]:
result_df.loc[0, 'route']['features'][0]['properties']['segments'][0]['duration']
#result_df['routes'].to_json('routes.json', orient='records', lines=True, default=str)
result_df
Canberra = mrt_stations_df[mrt_stations_df['MRT Name'] == 'CANBERRA MRT STATION'][['Latitude','Longitude']].values[0]
Yishun = mrt_stations_df[mrt_stations_df['MRT Name'] == 'YISHUN MRT STATION'][['Latitude','Longitude']].values[0]

coordinates = [Canberra, Yishun]

#flip the order of the colummns 
coordinates = [list(reversed(coord)) for coord in coordinates]
print(coordinates)

route = client.directions(
    coordinates=coordinates,
    profile='cycling-regular',
    format='geojson',
    validate=False,
)

print(route)

route_coordinates = route['features'][0]['geometry']['coordinates']
lons, lats = zip(*route_coordinates)

# Create the Plotly figure
fig = go.Figure(go.Scattermapbox(
    mode="lines",
    lon=lons,
    lat=lats,
    marker={'size': 10},
))
fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'style': "open-street-map",
        'center': {'lat': 1.36, 'lon': 103.85},
        'zoom': 10.55})
fig.show()
# #create a psuedo index for my residential centroids df
# hdbCentroids_df['index'] = hdbCentroids_df.index

# hdbCentroids_df['index']

# mrt_stations_df['index'] = mrt_stations_df.index

# mrt_stations_df['index']

# hdbCentroids_df['join_key'] = "A"
# mrt_stations_df['join_key'] = "A"

# combined_df = pd.merge(hdbCentroids_df, mrt_stations_df, on='join_key')

# print(combined_df)


# def haversine(lat1, lon1, lat2, lon2):
#     R = 6371.0  # Earth radius in kilometers

#     lat1 = np.radians(lat1)
#     lon1 = np.radians(lon1)
#     lat2 = np.radians(lat2)
#     lon2 = np.radians(lon2)

#     dlat = lat2 - lat1
#     dlon = lon2 - lon1

#     a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
#     c = 2 * np.arcsin(np.sqrt(a))

#     distance = R * c
#     return distance


# combined_df['euclidean_distance'] = haversine(combined_df['Latitude_x'], combined_df['Longitude_x'], combined_df['Latitude_y'], combined_df['Longitude_y'])

# #reset index

# result_df = combined_df.loc[combined_df.groupby('index_x')['euclidean_distance'].idxmin()].reset_index()

# result_df

# result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
# result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])

# #result_df.to_csv(r"C:\Users\leoqi\downloads\result.csv")

# #create an empty column 'route'
# result_df['route'] = np.nan



# for i in range(len(result_df)):
#     print(i)
#     result_df['route'][i] = client.directions(result_df['coordinate_pair'][i], profile='cycling-regular', format='geojson', validate=False)
#     time.sleep(2)
#result_df.to_csv(r"C:\Users\leoqi\downloads\result.csv")
route_data = result_df.loc[0, 'route']

route_coordinates = route_data['features'][0]['geometry']['coordinates']

route_data['features'][0]['geometry']['coordinates']
route_coordinates = route_data['features'][0]['geometry']['coordinates']
lons, lats = zip(*route_coordinates)
fig.add_trace(go.Scattermapbox(
    mode="lines",
    lon=lons,
    lat=lats,
    marker={'size': 10},
    ) 
)

fig.update_layout(
    margin ={'l':0,'t':0,'b':0,'r':0},
    mapbox = {
        'style': "open-street-map",
        'center': {'lat': 1.36, 'lon': 103.85},
        'zoom': 10.55})

fig.show()

KeyError: 'route'