In [1]:
import pandas as pd
import numpy as np
import openrouteservice as ors
from dotenv import load_dotenv
from pyonemap import OneMap 
import os
import requests
import json
import time
from collections import namedtuple
import helper_functions as helper
from datetime import datetime,timedelta
import polyline
from rdp import rdp

In [2]:
load_dotenv()

True

In [3]:
directory = os.getcwd()

os.chdir(directory)

In [4]:
HDB_centroid_df = pd.read_csv(r"../../data/Cluster_data/hdb_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])

Private_centroid_df = pd.read_csv(r"../../data/Cluster_data/priv_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])

mrt_stations_df = pd.read_csv(r"../../data/Cluster_data/mrt_station_final.csv",usecols = [1,2,3])

In [5]:
#HDB_centroid_df.head(5)
#Private_centroid_df.head(5)
HDB_centroid_df.info()
Private_centroid_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   550 non-null    float64
 1   Longitude  550 non-null    float64
dtypes: float64(2)
memory usage: 8.7 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   550 non-null    float64
 1   Longitude  550 non-null    float64
dtypes: float64(2)
memory usage: 8.7 KB


In [6]:
mrt_stations_df.sort_values(by='MRT.Name', inplace=True)

mrt_stations_df

Unnamed: 0,MRT.Name,Latitude,Longitude
12,ADMIRALTY MRT STATION,1.440589,103.800990
9,ALJUNIED MRT STATION,1.316433,103.882906
52,ANG MO KIO MRT STATION,1.369429,103.849455
115,BAKAU LRT STATION,1.387994,103.905415
54,BANGKIT LRT STATION,1.380022,103.772647
...,...,...,...
158,WOODLANDS SOUTH MRT STATION,1.427488,103.792730
80,WOODLEIGH MRT STATION,1.339190,103.870818
77,YEW TEE MRT STATION,1.397298,103.747358
79,YIO CHU KANG MRT STATION,1.381499,103.845171


In [7]:
combined_centroid_df = pd.concat([HDB_centroid_df,Private_centroid_df],ignore_index=True)

combined_centroid_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   1100 non-null   float64
 1   Longitude  1100 non-null   float64
dtypes: float64(2)
memory usage: 17.3 KB


In [8]:
#create a psuedo index for my residential centroids df
combined_centroid_df['index'] = combined_centroid_df.index

combined_centroid_df['index']

#create a psuedo index for my residential centroids df
mrt_stations_df['index'] = mrt_stations_df.index

mrt_stations_df['index']

##create a dummy variable to cross join on 
combined_centroid_df['join_key'] = "A"
mrt_stations_df['join_key'] = "A"

#Cross join to obtain combinations of all possible pairings between MRTs and Residential Centroids
combined_combinations_df = pd.merge(combined_centroid_df, mrt_stations_df, on='join_key')

print(combined_combinations_df)

        Latitude_x  Longitude_x  index_x join_key  \
0         1.282627   103.826023        0        A   
1         1.282627   103.826023        0        A   
2         1.282627   103.826023        0        A   
3         1.282627   103.826023        0        A   
4         1.282627   103.826023        0        A   
...            ...          ...      ...      ...   
192495    1.318428   103.907190     1099        A   
192496    1.318428   103.907190     1099        A   
192497    1.318428   103.907190     1099        A   
192498    1.318428   103.907190     1099        A   
192499    1.318428   103.907190     1099        A   

                           MRT.Name  Latitude_y  Longitude_y  index_y  
0             ADMIRALTY MRT STATION    1.440589   103.800990       12  
1              ALJUNIED MRT STATION    1.316433   103.882906        9  
2            ANG MO KIO MRT STATION    1.369429   103.849455       52  
3                 BAKAU LRT STATION    1.387994   103.905415      115  
4  

In [9]:
coordinate_pair = namedtuple('coordinate_pair',['lat_x','lon_x','lat_y','lon_y'])

In [10]:
#Apply function to dataframe and store distances in new column 'euclidean distance'
combined_combinations_df['euclidean_distance'] = helper.haversine(combined_combinations_df['Latitude_x'], combined_combinations_df['Longitude_x'], combined_combinations_df['Latitude_y'], combined_combinations_df['Longitude_y'])

#Group by residential centroid, filter out the closest MRT by distance for each centroid into another dataframe and reset its index
result_df = combined_combinations_df.loc[combined_combinations_df.groupby('index_x')['euclidean_distance'].idxmin()].reset_index(drop = True)

result_df

#Create a new column 'coordinate_pair' to store coordinate pairs to pass to openrouteservice API direction query
# result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
# result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])
result_df['coordinate_pair'] = result_df.apply(lambda x: coordinate_pair(x['Latitude_x'], x['Longitude_x'], x['Latitude_y'], x['Longitude_y']), axis=1)

#create an empty column 'route' to later store query response
result_df['cycle_route'] = np.nan
result_df['bus_route'] = np.nan

In [11]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Latitude_x          1100 non-null   float64
 1   Longitude_x         1100 non-null   float64
 2   index_x             1100 non-null   int64  
 3   join_key            1100 non-null   object 
 4   MRT.Name            1100 non-null   object 
 5   Latitude_y          1100 non-null   float64
 6   Longitude_y         1100 non-null   float64
 7   index_y             1100 non-null   int64  
 8   euclidean_distance  1100 non-null   float64
 9   coordinate_pair     1100 non-null   object 
 10  cycle_route         0 non-null      float64
 11  bus_route           0 non-null      float64
dtypes: float64(7), int64(2), object(3)
memory usage: 103.2+ KB


In [12]:
result_df.head(5)

Unnamed: 0,Latitude_x,Longitude_x,index_x,join_key,MRT.Name,Latitude_y,Longitude_y,index_y,euclidean_distance,coordinate_pair,cycle_route,bus_route
0,1.282627,103.826023,0,A,TIONG BAHRU MRT STATION,1.286103,103.827445,87,0.417554,"(1.28262693083003, 103.8260229912674, 1.286102...",,
1,1.380156,103.87942,1,A,TONGKANG LRT STATION,1.389348,103.885844,84,1.246891,"(1.3801559906311147, 103.87941955476325, 1.389...",,
2,1.380869,103.748413,2,A,KEAT HONG LRT STATION,1.378603,103.749056,125,0.261871,"(1.380868921822081, 103.74841283436977, 1.3786...",,
3,1.3442,103.959075,3,A,UPPER CHANGI MRT STATION,1.34174,103.961473,82,0.381959,"(1.3442002658092747, 103.95907516602809, 1.341...",,
4,1.438353,103.798333,4,A,ADMIRALTY MRT STATION,1.440589,103.80099,12,0.386099,"(1.4383527950723751, 103.79833297097744, 1.440...",,


In [13]:
one_map_email = os.getenv("ONE_MAP_EMAIL")
one_map_password = os.getenv("ONE_MAP_PASSWORD")
payload = {
        "email": one_map_email,
        "password": one_map_password
      }
api_key = requests.request("POST", "https://www.onemap.gov.sg/api/auth/post/getToken", json=payload)
api_key = api_key.json()["access_token"]

In [14]:
onemap = OneMap(api_key)

In [15]:
result_df['cycle_route'] = result_df['coordinate_pair'].apply(lambda x: helper.get_cycle_route(onemap,x))

In [16]:
result_df['cycle_route']

0       {'status_message': 'Found route between points...
1       {'status_message': 'Found route between points...
2       {'status_message': 'Found route between points...
3       {'status_message': 'Found route between points...
4       {'status_message': 'Found route between points...
                              ...                        
1095    {'status_message': 'Found route between points...
1096    {'status_message': 'Found route between points...
1097    {'status_message': 'Found route between points...
1098    {'status_message': 'Found route between points...
1099    {'status_message': 'Found route between points...
Name: cycle_route, Length: 1100, dtype: object

In [16]:
result_df['cycle_route'].isna().sum()

17

In [17]:
#To note for later
result_df[result_df['cycle_route'].isna()]

Unnamed: 0,Latitude_x,Longitude_x,index_x,join_key,MRT.Name,Latitude_y,Longitude_y,index_y,euclidean_distance,coordinate_pair,cycle_route,bus_route
2,1.380869,103.748413,2,A,KEAT HONG LRT STATION,1.378603,103.749056,125,0.261871,"(1.380868921822081, 103.74841283436977, 1.3786...",,
136,1.353336,103.961476,136,A,TAMPINES EAST MRT STATION,1.356191,103.954634,21,0.824152,"(1.3533363711406043, 103.9614762014272, 1.3561...",,
137,1.355172,103.946187,137,A,TAMPINES EAST MRT STATION,1.356191,103.954634,21,0.945892,"(1.3551716913027807, 103.94618687420837, 1.356...",,
138,1.428887,103.850365,138,A,YISHUN MRT STATION,1.429443,103.835005,78,1.708525,"(1.428886869803579, 103.85036484275652, 1.4294...",,
184,1.35752,103.741479,184,A,BUKIT GOMBAK MRT STATION,1.358612,103.751791,45,1.152764,"(1.3575200241024, 103.74147857039812, 1.358611...",,
185,1.361995,103.750331,185,A,BUKIT GOMBAK MRT STATION,1.358612,103.751791,45,0.409739,"(1.3619950869649893, 103.75033095773648, 1.358...",,
187,1.302801,103.908031,187,A,EUNOS MRT STATION,1.319784,103.903226,46,1.962444,"(1.3028012251995535, 103.9080308726372, 1.3197...",,
188,1.376442,103.765994,188,A,PETIR LRT STATION,1.377772,103.766646,123,0.16465,"(1.3764422356991495, 103.76599431387432, 1.377...",,
189,1.389415,103.908278,189,A,RUMBIA LRT STATION,1.391468,103.905974,122,0.343124,"(1.3894154808459367, 103.90827802015868, 1.391...",,
191,1.423224,103.8459,191,A,YISHUN MRT STATION,1.429443,103.835005,78,1.394614,"(1.4232239518697667, 103.84589991674228, 1.429...",,


In [18]:
result_df['distance'] = result_df['cycle_route'].apply(helper.get_distance)

Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable


In [19]:
result_df['cycle_duration'] = result_df['cycle_route'].apply(helper.get_time)

Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable
Error: 'NoneType' object is not subscriptable


In [20]:
result_df['centroid_name'] = result_df.apply(lambda x:helper.get_centroid_name(onemap,x), axis=1)

In [62]:
result_df['suitability'] = np.nan
result_df['steepness'] = np.nan

In [95]:
ors_key = os.getenv("ORS_API_KEY")

client = ors.Client(key= ors_key)


In [29]:
# route = result_df['cycle_route'][0]
# route_coordinates = polyline.decode(route['route_geometry'])

# route_coordinates
# from rdp import rdp

# rdp_route_coordinates = rdp(route_coordinates, epsilon=0.0001)
# rdp_route_coordinates = [[i[1],i[0]] for i in rdp_route_coordinates]
# response = client.directions(coordinates = rdp_route_coordinates, profile = 'cycling-regular', format = 'geojson', validate = False, instructions = False, elevation = True, extra_info = ['steepness','suitability'])
# print(response['features'][0]['properties']['extras']['steepness']['summary'])
# print(response['features'][0]['properties']['extras']['suitability']['summary'])
# def get_path_steepness_and_suitability(client,route):
#     time.sleep(1)
#     try:
#         route_coordinates = polyline.decode(route['route_geometry'])
#         rdp_route_coordinates = rdp(route_coordinates, epsilon=0.0001)
#         rdp_route_coordinates = [[i[1],i[0]] for i in rdp_route_coordinates]
#         response = client.directions(coordinates = rdp_route_coordinates, profile = 'cycling-regular', format = 'geojson', validate = False, instructions = False, elevation = True, extra_info = ['steepness','suitability'])

#         steepness = response['features'][0]['properties']['extras']['steepness']['summary']
#         total_length_steepness = sum([i['distance'] for i in steepness])
#         normalized_weighted_steepness = sum([i['distance']*i['value'] for i in steepness])/total_length_steepness

#         suitability = response['features'][0]['properties']['extras']['suitability']['summary']
#         total_length_suitability = sum([i['distance'] for i in suitability])
#         normalized_weighted_suitability = sum([i['distance']*i['value'] for i in suitability])/total_length_suitability

#         return normalized_weighted_steepness,normalized_weighted_suitability
#     except Exception as e:
#         return None

# steepness, suitability = get_path_steepness_and_suitability(client,route)
# print(steepness)
# print(suitability)

In [79]:
def get_path_steepness_and_suitability(client,route):
    time.sleep(1)
    route_coordinates = polyline.decode(route['route_geometry'])
    rdp_route_coordinates = rdp(route_coordinates, epsilon=0.0001)
    rdp_route_coordinates = [[i[1],i[0]] for i in rdp_route_coordinates]
    response = client.directions(coordinates = rdp_route_coordinates, profile = 'cycling-regular', format = 'geojson', validate = False, instructions = False, elevation = True, extra_info = ['steepness','suitability'])

    steepness = response['features'][0]['properties']['extras']['steepness']['summary']
    total_length_steepness = sum([i['distance'] for i in steepness])
    normalized_weighted_steepness = sum([i['distance']*i['value'] for i in steepness])/total_length_steepness

    suitability = response['features'][0]['properties']['extras']['suitability']['summary']
    total_length_suitability = sum([i['distance'] for i in suitability])
    normalized_weighted_suitability = sum([i['distance']*i['value'] for i in suitability])/total_length_suitability

    return normalized_weighted_steepness,normalized_weighted_suitability

In [106]:
for index, row in result_df.iterrows():
    try:
        steepness, suitability = get_path_steepness_and_suitability(client,row['cycle_route'])
        result_df.at[index,'steepness'] = steepness
        result_df.at[index,'suitability'] = suitability
    except Exception as e:
        print(e)
        continue

1000


0.0 5.921282499229267
1001
0.0 7.503941499844413
1002
0.0 5.126621235436359
1003
-1.0 7.308156563415284
1004
0.0 7.249215548155738
1005
0.0 6.158775592672413
1006
-1.0 7.536568251740703
1007
1.0 7.576018917791069
1008
0.0 7.433474448140308
1009
-1.0 7.305695639276179
1010
0.33233403021577057 6.308101924897443
1011
0.0 8.0
1012
0.0 5.683644656820156
1013
-1.0 7.39404296875
1014
0.0 5.870864681026486
1015
0.0 7.107212573201862
1016
0.0 6.658507664944524
1017
0.0 7.931191615402142
1018
0.0 7.10527239744865
1019
-0.160943178796206 7.431175093964968
1020
0.0 8.0
1021
0.0 7.726740874967641
1022
0.0 7.678987030296817
1023
0.0 5.647380900002444
1024
0.0 7.954085348092049
1025
0.0 6.31047265987025
1026
0.0 7.404216444132114
1027
1.0 6.7829034831161925
1028
0.0 6.250584508113941
1029
0.0 7.215723873441996
1030
0.0 6.932204241632324
1031
-0.25849087939768045 5.302543803803249
1032
0.0 7.39898177920686
1033
0.0 7.357830308597797
1034
0.0 7.012609985676284
1035
0.0 7.187529976019184
1036
0.0 7.8566



0.0 7.491366826333964
1041
0.0 6.414814814814815
1042
0.0 6.596894409937889
1043
0.0 6.703103388658366
1044
0.0 6.261578805290965
1045
0.0 7.012204923088425
1046
0.0 7.369416733102066
1047
0.0 6.8567157617058685
1048
-1.0461619904711488 5.687083112758073
1049
0.0 7.507687405401098
1050
0.0 7.382784195047099
1051
-0.13256990805635407 5.72231593452282
1052
1.0 6.927531837477258
1053
0.0 7.565259025184335
1054
0.6033679818188592 7.268608896505475
1055
0.0 5.876887701199428
1056
0.0 7.330846835830022
1057
0.0 6.2137481184144505
1058
0.0 8.0
1059
0.0 6.4245111078272
1060
0.0 6.442979006594471
1061
0.0 6.975461707348573
1062
0.0 7.49459006745439
1063
0.0 7.859511641113004
1064
-0.11492898156542762 6.0435465836632325
1065
-1.0 7.402790381125228
1066
0.0 7.5486640136441165
1067
0.0 7.955824100120559
1068
0.0 7.365658524759076
1069
0.0 6.058436890212591
1070
0.0 7.594333258376433
1071
0.0 7.104019540168853
1072
0.0 8.0
1073
0.23647899099965572 6.9612116310459955
1074
0.0 7.84411503518
1075
0.0 



0.0 7.796232673853809
1080
0.0 6.848117973692596
1081
0.0 7.3443725969940585
1082
0.0 5.2295475656448955
1083
0.0 6.569381552012903
1084
0.0 5.509990939005197
1085
0.0 6.65792418860396
1086
0.0 6.089217933781803
1087
0.0 6.696022401248593
1088
0.0 6.704069561102567
1089
-0.03491030601477313 6.967023545188734
1090
-0.026662607107128873 5.122901383129875
1091
0.0 7.246264811952602
1092
0.0 6.237395246599809
1093
0.0 6.870213690242239
1094
0.0 7.608690504380771
1095
-1.0 6.193218514531755
1096
0.0 6.5828614946524935
1097
0.0 6.707703566100931
1098
0.0 7.282020953024671
1099
0.0 7.493210928481117


In [110]:
result_df.to_csv(r'C:\Users\leoqi\Downloads\indiv_combined_centroid_data.csv',index = False)