In [1]:
import pandas as pd
import numpy as np
import openrouteservice as ors
from dotenv import load_dotenv
from pyonemap import OneMap
import os
import requests
import json
import time
from collections import namedtuple
import helper_functions as helper
from datetime import datetime,timedelta

In [2]:
load_dotenv()

True

In [3]:
directory = os.getcwd()

os.chdir(directory)

In [4]:
hdbCentroids_df = pd.read_csv(r"..\data\hdb_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])
priv_centroid_df = pd.read_csv(r"..\data\priv_cluster_centroids.csv",header = None,names = ['Latitude','Longitude'])
mrt_stations_df = pd.read_csv(r"..\data\mrt_station_final.csv",usecols = [1,2,3])

In [5]:
hdbCentroids_df.info()
priv_centroid_df.info()
mrt_stations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   550 non-null    float64
 1   Longitude  550 non-null    float64
dtypes: float64(2)
memory usage: 8.7 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 550 entries, 0 to 549
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   550 non-null    float64
 1   Longitude  550 non-null    float64
dtypes: float64(2)
memory usage: 8.7 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175 entries, 0 to 174
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   MRT.Name   175 non-null    object 
 1   Latitude   175 non-null    float64
 2   Longitude  175 non-null    float64
dtypes: float64(2), object(1)
memory usage: 4.2+ KB


In [7]:
mrt_stations_df.sort_values(by='MRT.Name', inplace=True)

mrt_stations_df.head(1)

Unnamed: 0,MRT.Name,Latitude,Longitude
12,ADMIRALTY MRT STATION,1.440589,103.80099


In [8]:
combined_centroid_df = pd.concat([hdbCentroids_df,priv_centroid_df],axis = 0).reset_index(drop = True)

In [9]:
combined_centroid_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Latitude   1100 non-null   float64
 1   Longitude  1100 non-null   float64
dtypes: float64(2)
memory usage: 17.3 KB


In [10]:
#create a psuedo index for my residential centroids df
combined_centroid_df['index'] = combined_centroid_df.index

combined_centroid_df['index']

#create a psuedo index for my residential centroids df
mrt_stations_df['index'] = mrt_stations_df.index

mrt_stations_df['index']

##create a dummy variable to cross join on 
combined_centroid_df['join_key'] = "A"
mrt_stations_df['join_key'] = "A"

#Cross join to obtain combinations of all possible pairings between MRTs and Residential Centroids
centroid_station_combination_df = pd.merge(combined_centroid_df, mrt_stations_df, on='join_key')

In [12]:
centroid_station_combination_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192500 entries, 0 to 192499
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   Latitude_x   192500 non-null  float64
 1   Longitude_x  192500 non-null  float64
 2   index_x      192500 non-null  int64  
 3   join_key     192500 non-null  object 
 4   MRT.Name     192500 non-null  object 
 5   Latitude_y   192500 non-null  float64
 6   Longitude_y  192500 non-null  float64
 7   index_y      192500 non-null  int64  
dtypes: float64(4), int64(2), object(2)
memory usage: 11.7+ MB


In [13]:
coordinate_pair = namedtuple('coordinate_pair',['lat_x','lon_x','lat_y','lon_y'])

In [16]:
#Apply function to dataframe and store distances in new column 'euclidean distance'
centroid_station_combination_df['euclidean_distance'] = helper.haversine(centroid_station_combination_df['Latitude_x'], centroid_station_combination_df['Longitude_x'], centroid_station_combination_df['Latitude_y'], centroid_station_combination_df['Longitude_y'])

#Group by residential centroid, filter out the closest MRT by distance for each centroid into another dataframe and reset its index
result_df = centroid_station_combination_df.groupby('MRT.Name').apply(lambda group: group.nsmallest(5, 'euclidean_distance'),include_groups = False).reset_index(drop=True)

result_df['MRT.Name'] = pd.merge(result_df, mrt_stations_df, left_on='index_y', right_on='index')['MRT.Name']

#Create a new column 'coordinate_pair' to store coordinate pairs to pass to openrouteservice API direction query
# result_df['coordinate_pair'] = list(zip(result_df['Longitude_x'], result_df['Latitude_x'], result_df['Longitude_y'], result_df['Latitude_y']))
# result_df['coordinate_pair'] = result_df['coordinate_pair'].apply(lambda x: [[x[0], x[1]], [x[2], x[3]]])
result_df['coordinate_pair'] = result_df.apply(lambda x: coordinate_pair(x['Latitude_x'], x['Longitude_x'], x['Latitude_y'], x['Longitude_y']), axis=1)

#create an empty column 'route' to later store query response
result_df['cycle_route'] = np.nan
result_df['bus_route'] = np.nan

In [22]:
result_df.info()

counts = result_df.groupby('MRT.Name').size().reset_index(name='Count')

counts[counts['Count'] != 5]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 875 entries, 0 to 874
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Latitude_x          875 non-null    float64
 1   Longitude_x         875 non-null    float64
 2   index_x             875 non-null    int64  
 3   join_key            875 non-null    object 
 4   Latitude_y          875 non-null    float64
 5   Longitude_y         875 non-null    float64
 6   index_y             875 non-null    int64  
 7   euclidean_distance  875 non-null    float64
 8   MRT.Name            875 non-null    object 
 9   coordinate_pair     875 non-null    object 
 10  cycle_route         0 non-null      float64
 11  bus_route           0 non-null      float64
dtypes: float64(7), int64(2), object(3)
memory usage: 82.2+ KB


Unnamed: 0,MRT.Name,Count


In [23]:
one_map_email = os.getenv("ONE_MAP_EMAIL")
one_map_password = os.getenv("ONE_MAP_PASSWORD")
payload = {
        "email": one_map_email,
        "password": one_map_password
      }
api_key = requests.request("POST", "https://www.onemap.gov.sg/api/auth/post/getToken", json=payload)
api_key = api_key.json()["access_token"]

onemap = OneMap(api_key)

In [None]:
result_df['cycle_route'] = result_df['coordinate_pair'].apply(lambda x:helper.get_cycle_route(onemap,x))

In [None]:
result_df['distance'] = result_df['cycle_route'].apply(helper.get_distance)
result_df['cycle_duration'] = result_df['cycle_route'].apply(helper.get_time)

In [None]:
result_df['centroid_name'] = result_df.apply(helper.get_centroid_name, axis=1)

In [None]:
ors_key = os.getenv("ORS_API_KEY")

client = ors.Client(key= ors_key)

In [None]:
result_df['suitability'] = result_df['cycle_route'].apply(lambda x: helper.get_path_suitability(client,x))

In [None]:
result_df['steepness'] = result_df['cycle_route'].apply(lambda x: helper.get_path_steepness(client,x))

In [None]:
result_df[['bus_route', 'bus_duration']] = result_df.apply(lambda row: pd.Series(helper.apply_fetch_itinerary(row)), axis=1)
result_df['time_difference'] = result_df['cycle_duration'] - result_df['bus_duration']

new_result_df = result_df. loc[:, result_df.columns != 'route']
new_result_df.to_csv(r"..\data\five_point_Centroid_MRT pairing data.csv")


routes = result_df['cycle_route'].copy(deep = True)
routes.to_json(r'..\data\five_point_Centroid_MRT_cycle_routes.json', orient='records')

#result_df.to_csv(r"..\data\five_point_Centroid_MRT pairing data.csv")
