In [47]:
import pandas as pd
import numpy as np
import openrouteservice as ors
from dotenv import load_dotenv
from pyonemap import OneMap
import os
import requests
import json
import time
from collections import namedtuple

In [5]:
load_dotenv()

True

In [3]:
# ors_key = os.getenv("ORS_API_KEY")

# client = ors.Client(key= ors_key)

In [6]:
directory = os.getcwd()

os.chdir(directory)

hdbCentroids_df = pd.read_csv(r"..\data\hdb_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])
mrt_stations_df = pd.read_csv(r"..\data\mrt_station_final.csv",usecols = [1,2,3])

In [7]:
hdbCentroids_df.info()
mrt_stations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 275 entries, 0 to 274
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   275 non-null    float64
 1   Longitude  275 non-null    float64
dtypes: float64(2)
memory usage: 4.4 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175 entries, 0 to 174
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   MRT.Name   175 non-null    object 
 1   Latitude   175 non-null    float64
 2   Longitude  175 non-null    float64
dtypes: float64(2), object(1)
memory usage: 4.2+ KB


In [16]:
mrt_stations_df.sort_values(by='MRT.Name', inplace=True)

mrt_stations_df.reset_index(drop = True,inplace=True)

mrt_stations_df.head()

Unnamed: 0,MRT.Name,Latitude,Longitude,index,join_key
0,ADMIRALTY MRT STATION,1.440589,103.80099,12,A
1,ALJUNIED MRT STATION,1.316433,103.882906,9,A
2,ANG MO KIO MRT STATION,1.369429,103.849455,52,A
3,BAKAU LRT STATION,1.387994,103.905415,115,A
4,BANGKIT LRT STATION,1.380022,103.772647,54,A


In [18]:
#create a psuedo index for my residential centroids df
hdbCentroids_df['index'] = hdbCentroids_df.index

hdbCentroids_df['index']

##create a dummy variable to cross join on 
hdbCentroids_df['join_key'] = "A"
mrt_stations_df['join_key'] = "A"

#Cross join to obtain combinations of all possible pairings between MRTs and Residential Centroids
combined_df = pd.merge(hdbCentroids_df, mrt_stations_df, on='join_key')

print(combined_df.head(1))

combined_df.info()

   Latitude_x  Longitude_x  index_x join_key               MRT.Name  \
0    1.432477   103.791322        0        A  ADMIRALTY MRT STATION   

   Latitude_y  Longitude_y  index_y  
0    1.440589    103.80099       12  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48125 entries, 0 to 48124
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Latitude_x   48125 non-null  float64
 1   Longitude_x  48125 non-null  float64
 2   index_x      48125 non-null  int64  
 3   join_key     48125 non-null  object 
 4   MRT.Name     48125 non-null  object 
 5   Latitude_y   48125 non-null  float64
 6   Longitude_y  48125 non-null  float64
 7   index_y      48125 non-null  int64  
dtypes: float64(4), int64(2), object(2)
memory usage: 2.9+ MB


In [19]:
#Defining a function that calculates the Euclidean Distance between two points using Haversine Method?
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Earth radius in kilometers

    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arcsin(np.sqrt(a))

    distance = R * c
    return distance

In [49]:
coordinate_pair = namedtuple('coordinate_pair',['lat_x','lon_x','lat_y','lon_y'])

In [50]:
#Apply function to dataframe and store distances in new column 'euclidean distance'
combined_df['euclidean_distance'] = haversine(combined_df['Latitude_x'], combined_df['Longitude_x'], combined_df['Latitude_y'], combined_df['Longitude_y'])

result_df = combined_df.groupby('MRT.Name').apply(lambda group: group.nsmallest(5, 'euclidean_distance'),include_groups = False).reset_index(drop=True)

result_df['MRT_Name'] = pd.merge(result_df, mrt_stations_df, left_on='index_y', right_on='index')['MRT.Name']

result_df['coordinate_pair'] = result_df.apply(lambda x: coordinate_pair(x['Latitude_x'], x['Longitude_x'], x['Latitude_y'], x['Longitude_y']), axis=1)

# #Create a new column 'coordinate_pair' to store coordinate pairs to pass to openrouteservice API direction query
# result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
# result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])


#create an empty column 'route' to later store query response
result_df['route'] = np.nan

result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 875 entries, 0 to 874
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Latitude_x          875 non-null    float64
 1   Longitude_x         875 non-null    float64
 2   index_x             875 non-null    int64  
 3   join_key            875 non-null    object 
 4   Latitude_y          875 non-null    float64
 5   Longitude_y         875 non-null    float64
 6   index_y             875 non-null    int64  
 7   euclidean_distance  875 non-null    float64
 8   MRT_Name            875 non-null    object 
 9   coordinate_pair     875 non-null    object 
 10  route               0 non-null      float64
dtypes: float64(6), int64(2), object(3)
memory usage: 75.3+ KB


In [10]:
# #Apply function to dataframe and store distances in new column 'euclidean distance'
# combined_df['euclidean_distance'] = haversine(combined_df['Latitude_x'], combined_df['Longitude_x'], combined_df['Latitude_y'], combined_df['Longitude_y'])

# #Group by residential centroid, filter out the closest MRT by distance for each centroid into another dataframe and reset its index
# result_df = combined_df.loc[combined_df.groupby('index_x')['euclidean_distance'].idxmin()].reset_index()

# result_df

# #Create a new column 'coordinate_pair' to store coordinate pairs to pass to openrouteservice API direction query
# result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
# result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])


# #create an empty column 'route' to later store query response
# result_df['route'] = np.nan

In [28]:
one_map_email = os.getenv("ONE_MAP_EMAIL")
one_map_password = os.getenv("ONE_MAP_PASSWORD")
payload = {
        "email": one_map_email,
        "password": one_map_password
      }
api_key = requests.request("POST", "https://www.onemap.gov.sg/api/auth/post/getToken", json=payload)
api_key = api_key.json()["access_token"]

In [29]:
onemap = OneMap(api_key)

In [55]:
route = onemap.routing.route(start_lat=lat_x, start_lon=lon_x, end_lat=lat_y, end_lon=lon_y, routeType="cycle")

In [57]:
def get_route(coordinate_pair):
    time.sleep(1)
    try:
        return onemap.routing.route(start_lat=coordinate_pair.lat_x, 
                                    start_lon=coordinate_pair.lon_x, 
                                    end_lat=coordinate_pair.lat_y, 
                                    end_lon=coordinate_pair.lon_y, 
                                    routeType="cycle")
    except Exception as e:
        print(f"Error: {e}")
        return None

In [58]:
result_df['route'] = result_df['coordinate_pair'].apply(get_route)

In [67]:
def get_distance(route):
    try:
        return route['route_summary']['total_distance']/1000 #convert m to km
    except (KeyError, IndexError,TypeError) as e:
        print(f"Error: {e}")
        return None

result_df['distance'] = result_df['route'].apply(get_distance)

def get_time(route):
    try:
        return route['route_summary']['total_time']/60 #convert second to minutes
    except (KeyError, IndexError,TypeError) as e:
        print(f"Error: {e}")
        return None
    

result_df['duration'] = result_df['route'].apply(get_time)

In [68]:
result_df

Unnamed: 0,Latitude_x,Longitude_x,index_x,join_key,Latitude_y,Longitude_y,index_y,euclidean_distance,MRT_Name,coordinate_pair,route,distance,duration
0,1.438808,103.803051,246,A,1.440589,103.800990,12,0.302808,ADMIRALTY MRT STATION,"(1.4388077562480677, 103.80305139967076, 1.440...",{'status_message': 'Found route between points...,0.436,3.066667
1,1.443728,103.801325,204,A,1.440589,103.800990,12,0.351037,ADMIRALTY MRT STATION,"(1.4437277903544683, 103.80132464715813, 1.440...",{'status_message': 'Found route between points...,1.135,7.216667
2,1.440682,103.797733,32,A,1.440589,103.800990,12,0.362213,ADMIRALTY MRT STATION,"(1.440681641599151, 103.79773334236516, 1.4405...",{'status_message': 'Found route between points...,0.414,2.983333
3,1.435566,103.798470,123,A,1.440589,103.800990,12,0.624828,ADMIRALTY MRT STATION,"(1.435566123626416, 103.7984696563486, 1.44058...",{'status_message': 'Found route between points...,1.422,8.900000
4,1.443142,103.806937,104,A,1.440589,103.800990,12,0.719359,ADMIRALTY MRT STATION,"(1.4431417600860352, 103.80693659397264, 1.440...",{'status_message': 'Found route between points...,0.910,6.883333
...,...,...,...,...,...,...,...,...,...,...,...,...,...
870,1.430078,103.835246,187,A,1.429443,103.835005,78,0.075484,YISHUN MRT STATION,"(1.4300775839166504, 103.835246410066, 1.42944...",{'status_message': 'Found route between points...,0.257,2.166667
871,1.427784,103.829635,234,A,1.429443,103.835005,78,0.624787,YISHUN MRT STATION,"(1.42778398503801, 103.82963501492203, 1.42944...",{'status_message': 'Found route between points...,1.200,8.533333
872,1.433466,103.830143,38,A,1.429443,103.835005,78,0.701545,YISHUN MRT STATION,"(1.4334656524684517, 103.8301430011276, 1.4294...",{'status_message': 'Found route between points...,1.745,11.333333
873,1.421933,103.834261,141,A,1.429443,103.835005,78,0.839180,YISHUN MRT STATION,"(1.4219328516149332, 103.8342614561949, 1.4294...",{'status_message': 'Found route between points...,1.311,9.350000


In [70]:
def get_centroid_name(row):
    geocode = onemap.reverseGeocode.revGeoCode(row['Latitude_x'], row['Longitude_x'])
    if geocode['GeocodeInfo'][0]['BUILDINGNAME'] != "NIL":
        return geocode['GeocodeInfo'][0]['BUILDINGNAME']
    else:
        if geocode['GeocodeInfo'][0]['BLOCK'] != "NIL":
            return geocode['GeocodeInfo'][0]['BLOCK'] + " " + geocode['GeocodeInfo'][0]['ROAD']
        else:
            return geocode['GeocodeInfo'][0]['ROAD']

In [71]:
result_df['centroid_name'] = result_df.apply(get_centroid_name, axis=1)

In [73]:
new_result_df = result_df. loc[:, result_df. columns != 'route']
new_result_df.to_csv(r"..\data\HDB_Centroid_MRT pairing data_5_cluster.csv")
routes = result_df['route'].copy(deep = True)
routes.to_json(r'..\data\HDB_MRT_routes_5_cluster.json', orient='records')

In [74]:
result_df[result_df['route'].isna()]

Unnamed: 0,Latitude_x,Longitude_x,index_x,join_key,Latitude_y,Longitude_y,index_y,euclidean_distance,MRT_Name,coordinate_pair,route,distance,duration,centroid_name


In [75]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 875 entries, 0 to 874
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Latitude_x          875 non-null    float64
 1   Longitude_x         875 non-null    float64
 2   index_x             875 non-null    int64  
 3   join_key            875 non-null    object 
 4   Latitude_y          875 non-null    float64
 5   Longitude_y         875 non-null    float64
 6   index_y             875 non-null    int64  
 7   euclidean_distance  875 non-null    float64
 8   MRT_Name            875 non-null    object 
 9   coordinate_pair     875 non-null    object 
 10  route               875 non-null    object 
 11  distance            875 non-null    float64
 12  duration            875 non-null    float64
 13  centroid_name       875 non-null    object 
dtypes: float64(7), int64(2), object(5)
memory usage: 95.8+ KB


In [61]:
route['route_geometry']

'o_xGa`qxRCBYJmEzAmBn@HXC@yAf@k@ZMEAB??FFDBjAfD'

In [64]:
import polyline

polyline.decode(route['route_geometry'])

[(1.4388, 103.80305),
 (1.43882, 103.80303),
 (1.43895, 103.80297),
 (1.43998, 103.80251),
 (1.44053, 103.80227),
 (1.44048, 103.80214),
 (1.4405, 103.80213),
 (1.44095, 103.80193),
 (1.44117, 103.80179),
 (1.44124, 103.80182),
 (1.44125, 103.8018),
 (1.44125, 103.8018),
 (1.44121, 103.80176),
 (1.44118, 103.80174),
 (1.4408, 103.8009)]

In [11]:
# def get_route(coordinate_pair):
#     time.sleep(2)
#     try:
#         return client.directions(coordinate_pair, profile='cycling-regular', format='geojson', validate=False)
#     except Exception as e:
#         print(f"Error: {e}")
#         return None

In [12]:
# result_df['route'] = result_df['coordinate_pair'].apply(get_route)

Error: 404 ({'error': {'code': 2010, 'message': 'Could not find routable point within a radius of 350.0 meters of specified coordinate 1: 103.9878836 1.3574790.'}, 'info': {'engine': {'build_date': '2024-01-29T14:41:12Z', 'version': '7.1.1'}, 'timestamp': 1711286480021}})


In [72]:
# def get_distance(route):
#     try:
#         return route['features'][0]['properties']['segments'][0]['distance']
#     except (KeyError, IndexError,TypeError) as e:
#         print(f"Error: {e}")
#         return None

# result_df['distance'] = result_df['route'].apply(get_distance)

# def get_time(route):
#     try:
#         return route['features'][0]['properties']['segments'][0]['duration']
#     except (KeyError, IndexError,TypeError) as e:
#         print(f"Error: {e}")
#         return None
    

# result_df['duration'] = result_df['route'].apply(get_time)