In [1]:
import pandas as pd
import numpy as np
import openrouteservice as ors
from dotenv import load_dotenv
from pyonemap import OneMap
import os
import requests
import json
import time

In [2]:
load_dotenv()

True

In [3]:
ors_key = os.getenv("ORS_API_KEY")

client = ors.Client(key= ors_key)

In [4]:
directory = os.getcwd()

os.chdir(directory)

PrivateCentroids_df = pd.read_csv(r"..\data\priv_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])
mrt_stations_df = pd.read_csv(r"..\data\mrt_station_final.csv",usecols = [1,2,3])

In [5]:
PrivateCentroids_df.info()
mrt_stations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275 entries, 0 to 274
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   275 non-null    float64
 1   Longitude  275 non-null    float64
dtypes: float64(2)
memory usage: 4.4 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 215 entries, 0 to 214
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   MRT Name   215 non-null    object 
 1   Latitude   215 non-null    float64
 2   Longitude  215 non-null    float64
dtypes: float64(2), object(1)
memory usage: 5.2+ KB


In [6]:
mrt_stations_df.sort_values(by='MRT Name', inplace=True)

mrt_stations_df

Unnamed: 0,MRT Name,Latitude,Longitude
12,ADMIRALTY MRT STATION,1.440589,103.800991
9,ALJUNIED MRT STATION,1.316433,103.882906
54,ANG MO KIO MRT STATION,1.369429,103.849455
132,BAKAU LRT STATION,1.387994,103.905415
57,BANGKIT LRT STATION,1.380022,103.772647
...,...,...,...
186,WOODLANDS SOUTH MRT STATION,1.427488,103.792730
85,WOODLEIGH MRT STATION,1.339190,103.870818
82,YEW TEE MRT STATION,1.397298,103.747358
84,YIO CHU KANG MRT STATION,1.381499,103.845171


In [7]:
#create a psuedo index for my residential centroids df
PrivateCentroids_df['index'] = PrivateCentroids_df.index

PrivateCentroids_df['index']

#create a psuedo index for my residential centroids df
mrt_stations_df['index'] = mrt_stations_df.index

mrt_stations_df['index']

##create a dummy variable to cross join on 
PrivateCentroids_df['join_key'] = "A"
mrt_stations_df['join_key'] = "A"

#Remove a non existent train station called SUB STATION
mrt_stations_df = mrt_stations_df[~mrt_stations_df['MRT Name'].str.contains('SUB')]

#Cross join to obtain combinations of all possible pairings between MRTs and Residential Centroids
combined_df = pd.merge(PrivateCentroids_df, mrt_stations_df, on='join_key')

print(combined_df)

       Latitude_x  Longitude_x  index_x join_key                     MRT Name  \
0        1.366960   103.877984        0        A        ADMIRALTY MRT STATION   
1        1.366960   103.877984        0        A         ALJUNIED MRT STATION   
2        1.366960   103.877984        0        A       ANG MO KIO MRT STATION   
3        1.366960   103.877984        0        A            BAKAU LRT STATION   
4        1.366960   103.877984        0        A          BANGKIT LRT STATION   
...           ...          ...      ...      ...                          ...   
58845    1.315369   103.792807      274        A  WOODLANDS SOUTH MRT STATION   
58846    1.315369   103.792807      274        A        WOODLEIGH MRT STATION   
58847    1.315369   103.792807      274        A          YEW TEE MRT STATION   
58848    1.315369   103.792807      274        A     YIO CHU KANG MRT STATION   
58849    1.315369   103.792807      274        A           YISHUN MRT STATION   

       Latitude_y  Longitud

In [9]:
#Defining a function that calculates the Euclidean Distance between two points using Haversine Method?
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in kilometers

    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arcsin(np.sqrt(a))

    distance = R * c
    return distance

In [10]:
#Apply function to dataframe and store distances in new column 'euclidean distance'
combined_df['euclidean_distance'] = haversine(combined_df['Latitude_x'], combined_df['Longitude_x'], combined_df['Latitude_y'], combined_df['Longitude_y'])

#Group by residential centroid, filter out the closest MRT by distance for each centroid into another dataframe and reset its index
result_df = combined_df.loc[combined_df.groupby('index_x')['euclidean_distance'].idxmin()].reset_index()

result_df

#Create a new column 'coordinate_pair' to store coordinate pairs to pass to openrouteservice API direction query
result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])


#create an empty column 'route' to later store query response
result_df['route'] = np.nan

In [11]:
result_df

Unnamed: 0,index,Latitude_x,Longitude_x,index_x,join_key,MRT Name,Latitude_y,Longitude_y,index_y,euclidean_distance,coordinate_pair,route
0,96,1.366960,103.877984,0,A,KOVAN MRT STATION,1.360179,103.885065,153,1.090019,"[[103.87798366999664, 1.3669599975831537], [10...",
1,387,1.330386,103.793159,1,A,SIXTH AVENUE MRT STATION,1.330858,103.796907,27,0.419957,"[[103.7931586230639, 1.3303860408530015], [103...",
2,519,1.317931,103.926300,2,A,KEMBANGAN MRT STATION,1.321038,103.912948,13,1.523988,"[[103.92630025524096, 1.3179313391052505], [10...",
3,762,1.371333,103.830336,3,A,MAYFLOWER MRT STATION,1.371463,103.836568,214,0.692907,"[[103.83033586819448, 1.371333423237411], [103...",
4,981,1.307455,103.834089,4,A,NEWTON MRT STATION,1.312319,103.837985,107,0.692850,"[[103.83408949483868, 1.3074546745785451], [10...",
...,...,...,...,...,...,...,...,...,...,...,...,...
270,57871,1.323496,103.907747,270,A,KEMBANGAN MRT STATION,1.321038,103.912948,13,0.639467,"[[103.907747221298, 1.3234956754949063], [103....",
271,58063,1.292659,103.839734,271,A,FORT CANNING MRT STATION,1.292482,103.844331,166,0.511489,"[[103.83973365789451, 1.2926587246321704], [10...",
272,58276,1.388156,103.860770,272,A,FERNVALE LRT STATION,1.391886,103.876309,128,1.776391,"[[103.86077001200324, 1.3881563822047092], [10...",
273,58611,1.250562,103.845572,273,A,TANJONG PAGAR MRT STATION,1.276568,103.846007,209,2.892130,"[[103.84557213443625, 1.2505623711772464], [10...",


In [12]:
def get_route(coordinate_pair):
    time.sleep(2)
    try:
        return client.directions(coordinate_pair, profile='cycling-regular', format='geojson', validate=False)
    except Exception as e:
        print(f"Error: {e}")
        return None

In [13]:
result_df['route'] = result_df['coordinate_pair'].apply(get_route)

Error: 404 ({'error': {'code': 2010, 'message': 'Could not find routable point within a radius of 350.0 meters of specified coordinate 1: 103.9878836 1.3574790.'}, 'info': {'engine': {'build_date': '2024-01-29T14:41:12Z', 'version': '7.1.1'}, 'timestamp': 1711287632735}})
Error: 404 ({'error': {'code': 2010, 'message': 'Could not find routable point within a radius of 350.0 meters of specified coordinate 0: 103.8455721 1.2505624.'}, 'info': {'engine': {'build_date': '2024-01-29T14:41:12Z', 'version': '7.1.1'}, 'timestamp': 1711287966197}})


In [14]:
def get_distance(route):
    try:
        return route['features'][0]['properties']['segments'][0]['distance']
    except (KeyError, IndexError,TypeError) as e:
        print(f"Error: {e}")
        return None

result_df['distance'] = result_df['route'].apply(get_distance)

def get_time(route):
    try:
        return route['features'][0]['properties']['segments'][0]['duration']
    except (KeyError, IndexError,TypeError) as e:
        print(f"Error: {e}")
        return None
    

result_df['duration'] = result_df['route'].apply(get_time)

Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable


In [19]:
one_map_email = os.getenv("ONE_MAP_EMAIL")
one_map_password = os.getenv("ONE_MAP_PASSWORD")
payload = {
        "email": one_map_email,
        "password": one_map_password
      }
api_key = requests.request("POST", "https://www.onemap.gov.sg/api/auth/post/getToken", json=payload)
api_key = api_key.json()["access_token"]

In [20]:
onemap = OneMap(api_key)

In [27]:
def get_centroid_name(row):
    geocode = onemap.reverseGeocode.revGeoCode(row['Latitude_x'], row['Longitude_x'])
    if geocode['GeocodeInfo'][0]['BUILDINGNAME'] != "NIL":
        return geocode['GeocodeInfo'][0]['BUILDINGNAME']
    else:
        if geocode['GeocodeInfo'][0]['BLOCK'] != "NIL":
            return geocode['GeocodeInfo'][0]['BLOCK'] + " " + geocode['GeocodeInfo'][0]['ROAD']
        else:
            return geocode['GeocodeInfo'][0]['ROAD']

In [28]:
result_df['centroid_name'] = result_df.apply(get_centroid_name, axis=1)

In [29]:
new_result_df = result_df. loc[:, result_df. columns != 'route']
new_result_df.to_csv(r"..\data\Private_Centroid_MRT pairing data.csv")
routes = result_df['route'].copy(deep = True)
routes.to_json(r'..\data\Private_MRT_routes.json', orient='records')

In [30]:
result_df[result_df['route'].isna()]

Unnamed: 0,index,Latitude_x,Longitude_x,index_x,join_key,MRT Name,Latitude_y,Longitude_y,index_y,euclidean_distance,coordinate_pair,route,distance,duration,centroid_name
128,27430,1.363718,103.972811,128,A,CHANGI AIRPORT MRT STATION,1.357479,103.987884,40,1.813491,"[[103.97281056163416, 1.3637175542882891], [10...",,,,TOH ESTATE
273,58611,1.250562,103.845572,273,A,TANJONG PAGAR MRT STATION,1.276568,103.846007,209,2.89213,"[[103.84557213443625, 1.2505623711772464], [10...",,,,CORAL ISLAND


In [31]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275 entries, 0 to 274
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   index               275 non-null    int64  
 1   Latitude_x          275 non-null    float64
 2   Longitude_x         275 non-null    float64
 3   index_x             275 non-null    int64  
 4   join_key            275 non-null    object 
 5   MRT Name            275 non-null    object 
 6   Latitude_y          275 non-null    float64
 7   Longitude_y         275 non-null    float64
 8   index_y             275 non-null    int64  
 9   euclidean_distance  275 non-null    float64
 10  coordinate_pair     275 non-null    object 
 11  route               273 non-null    object 
 12  distance            273 non-null    float64
 13  duration            273 non-null    float64
 14  centroid_name       275 non-null    object 
dtypes: float64(7), int64(3), object(5)
memory usage: 32.4+ KB