In [21]:
from dotenv import load_dotenv
import pandas as pd
import json
import os 
import numpy as np

In [22]:
load_dotenv()

True

In [23]:
directory = os.getcwd()

os.chdir(directory)

In [24]:
centroid_pairing_df = pd.read_csv(r"..\data\Cluster_data\five_point_Centroid_MRT pairing data fixed.csv")

In [25]:
centroid_pairing_df.drop(columns=['Unnamed: 0'], inplace=True)

In [26]:
centroid_pairing_df['steepness'] = abs(centroid_pairing_df['steepness'])

centroid_pairing_df['time_difference'] = -centroid_pairing_df['time_difference'] #Reflect time savings as a positive number

In [27]:
def calculate_weighted_score(dataframe,row1,row2,row3,row4):
    try:
        S = row1 * dataframe["time_difference"] + row2 * dataframe["distance"] + row3 * dataframe["suitability"] + row4 * dataframe["steepness"]
        return S
    except:
        return np.nan

In [28]:
centroid_pairing_df['weighted_score'] = calculate_weighted_score(centroid_pairing_df,0.4, -0.2, 0.8, -0.1)

In [29]:
cond = centroid_pairing_df['approximated?'] == 1

centroid_pairing_df[cond].shape

(17, 21)

In [30]:
centroid_pairing_df.isna().sum()

Latitude_x             0
Longitude_x            0
index_x                0
join_key               0
MRT.Name               0
Latitude_y             0
Longitude_y            0
index_y                0
euclidean_distance     0
coordinate_pair        0
cycle_route           17
bus_route              0
distance               0
cycle_duration         0
centroid_name          0
suitability           18
steepness             18
bus_duration           0
time_difference        0
approximated?          0
weighted_score        18
dtype: int64

In [31]:
agg_dict = {
    'time_difference': 'mean',
    'distance': 'mean',
    'steepness': 'mean',
    'suitability': 'mean',
    'approximated?': 'sum'
}

df = centroid_pairing_df.groupby('MRT.Name').agg(agg_dict)
df = df.reset_index()

df['Weighted_Score'] = calculate_weighted_score(df, 0.4, -0.2, 0.8, -0.1)

In [32]:
df

Unnamed: 0,MRT.Name,time_difference,distance,steepness,suitability,approximated?,Weighted_Score
0,ADMIRALTY MRT STATION,4.003333,0.8134,0.400000,7.631176,0.0,7.503594
1,ALJUNIED MRT STATION,3.650000,0.7636,0.000000,7.053749,0.0,6.950279
2,ANG MO KIO MRT STATION,3.270000,0.8558,0.666667,6.392720,2.0,6.184349
3,BAKAU LRT STATION,3.533333,0.5630,0.000000,7.506390,0.0,7.305845
4,BANGKIT LRT STATION,2.876667,0.8062,0.200000,6.197938,0.0,5.927777
...,...,...,...,...,...,...,...
170,WOODLANDS SOUTH MRT STATION,6.063333,0.8068,0.000000,6.161326,0.0,7.193034
171,WOODLEIGH MRT STATION,6.610000,0.7284,0.000000,5.764717,0.0,7.110094
172,YEW TEE MRT STATION,2.890000,0.6702,0.800000,7.385922,0.0,6.850698
173,YIO CHU KANG MRT STATION,6.250000,1.3158,0.225020,5.515098,0.0,6.626416


In [33]:
mrt_stations_df = pd.read_csv(r"..\data\Cluster_data\mrt_station_final.csv",usecols = [1,2,3])
mrt_stations_df.sort_values(by='MRT.Name', inplace=True)

mrt_stations_df

Unnamed: 0,MRT.Name,Latitude,Longitude
12,ADMIRALTY MRT STATION,1.440589,103.800990
9,ALJUNIED MRT STATION,1.316433,103.882906
52,ANG MO KIO MRT STATION,1.369429,103.849455
115,BAKAU LRT STATION,1.387994,103.905415
54,BANGKIT LRT STATION,1.380022,103.772647
...,...,...,...
158,WOODLANDS SOUTH MRT STATION,1.427488,103.792730
80,WOODLEIGH MRT STATION,1.339190,103.870818
77,YEW TEE MRT STATION,1.397298,103.747358
79,YIO CHU KANG MRT STATION,1.381499,103.845171


In [34]:
mrt_stations_df = pd.merge(mrt_stations_df, df, left_on='MRT.Name',right_on = "MRT.Name",how = "left")

mrt_stations_df.describe()

Unnamed: 0,MRT.Name,Latitude,Longitude,time_difference,distance,steepness,suitability,approximated?,Weighted_Score
0,ADMIRALTY MRT STATION,1.440589,103.800990,4.003333,0.8134,0.400000,7.631176,0.0,7.503594
1,ALJUNIED MRT STATION,1.316433,103.882906,3.650000,0.7636,0.000000,7.053749,0.0,6.950279
2,ANG MO KIO MRT STATION,1.369429,103.849455,3.270000,0.8558,0.666667,6.392720,2.0,6.184349
3,BAKAU LRT STATION,1.387994,103.905415,3.533333,0.5630,0.000000,7.506390,0.0,7.305845
4,BANGKIT LRT STATION,1.380022,103.772647,2.876667,0.8062,0.200000,6.197938,0.0,5.927777
...,...,...,...,...,...,...,...,...,...
170,WOODLANDS SOUTH MRT STATION,1.427488,103.792730,6.063333,0.8068,0.000000,6.161326,0.0,7.193034
171,WOODLEIGH MRT STATION,1.339190,103.870818,6.610000,0.7284,0.000000,5.764717,0.0,7.110094
172,YEW TEE MRT STATION,1.397298,103.747358,2.890000,0.6702,0.800000,7.385922,0.0,6.850698
173,YIO CHU KANG MRT STATION,1.381499,103.845171,6.250000,1.3158,0.225020,5.515098,0.0,6.626416


In [35]:
mrt_stations_df.describe()

Unnamed: 0,Latitude,Longitude,time_difference,distance,steepness,suitability,approximated?,Weighted_Score
count,175.0,175.0,175.0,175.0,175.0,175.0,175.0,175.0
mean,1.345903,103.839643,4.61759,1.22548,0.200003,6.790858,0.097143,7.014627
std,0.045893,0.065953,4.857827,1.332713,0.237551,0.61961,0.450725,2.10242
min,1.265389,103.636991,-14.72,0.4166,0.0,5.434801,0.0,-2.814775
25%,1.307012,103.794565,2.828333,0.7146,0.0,6.362244,0.0,6.410384
50%,1.33919,103.848649,4.073333,0.8708,0.196469,6.764457,0.0,6.950279
75%,1.385059,103.889302,5.776667,1.1594,0.4,7.322598,0.0,7.601777
max,1.449051,103.987884,33.546667,10.7538,1.0,8.202216,3.0,18.825533


In [37]:
ranking = mrt_stations_df.sort_values(by='Weighted_Score',ascending = False)
ranking = ranking.reset_index(drop = True)
ranking['rank'] = ranking.index

ranking 

Unnamed: 0,MRT.Name,Latitude,Longitude,time_difference,distance,steepness,suitability,approximated?,Weighted_Score,rank
0,CHANGI AIRPORT MRT STATION,1.357479,103.987884,33.546667,4.8778,0.064148,7.986052,0.0,18.825533,0
1,GARDENS BY THE BAY MRT STATION,1.278487,103.867455,29.670000,3.4940,0.057370,6.797914,0.0,16.601794,1
2,JOO KOON MRT STATION,1.327694,103.678085,17.430000,3.2196,0.000000,6.370842,0.0,11.424753,2
3,PUNGGOL MRT STATION,1.414927,103.910166,12.706667,1.4062,0.192515,8.202216,0.0,11.343948,3
4,MARINA SOUTH PIER MRT STATION,1.271027,103.862448,16.436667,3.5842,0.049921,6.735707,0.0,11.241400,4
...,...,...,...,...,...,...,...,...,...,...
170,DOVER MRT STATION,1.311405,103.778638,-2.440000,2.6326,0.013534,6.781917,0.0,3.921660,170
171,TUAS LINK MRT STATION,1.340882,103.636991,-9.876667,10.7538,0.000000,6.250349,0.0,-1.101148,171
172,TUAS CRESCENT MRT STATION,1.321027,103.649078,-12.996667,7.8296,0.000000,5.987545,0.0,-1.974551,172
173,GUL CIRCLE MRT STATION,1.319471,103.660530,-14.503333,6.8710,0.000000,5.742133,0.0,-2.581827,173


In [39]:
ranking.to_csv("../data/Cluster_data/5_cluster_mrt_ranking.csv", index = False)