### Calculate distances for POI
Purpose: To calculate distances from addresses to POI
- distance2cbd

In [54]:
import pandas as pd


class Coordinate:
    def __init__(self, lon, lat):
        self.lon = lon
        self.lat = lat

    def get_lon(self):
        return self.lon
    
    def get_lat(self):
        return self.lat
    
    def get_lat_lon(self):
        return (self.lat, self.lon)

In [55]:
df_hdb = pd.read_csv('../data/modified/hdb_coordinates.csv')
df_hdb = df_hdb.drop(columns=['Unnamed: 0'], axis=1)
df_hdb

Unnamed: 0,address,latitude,longitude
0,174 ANG MO KIO AVE 4,1.375097,103.837619
1,541 ANG MO KIO AVE 10,1.373922,103.855621
2,163 ANG MO KIO AVE 4,1.373552,103.838169
3,446 ANG MO KIO AVE 10,1.367761,103.855357
4,557 ANG MO KIO AVE 10,1.371626,103.857736
...,...,...,...
9571,605D TAMPINES ST 61,1.361790,103.935872
9572,601A TAMPINES AVE 9,1.360759,103.936833
9573,604B TAMPINES AVE 9,1.361407,103.935627
9574,26B ST. GEORGE'S LANE,1.321669,103.862718


##### distance2cbd

In [56]:
from geopy.distance import geodesic

def calculate_distance(start: Coordinate, end: Coordinate) -> float:
    return geodesic(start.get_lat_lon(), end.get_lat_lon()).km

In [57]:
CENTRAL_BUSINESS_DISTRICT = Coordinate(lon=103.851784, lat=1.287953)
A_174AMKAVE4 = Coordinate(lat=1.37509746867904, lon=103.83761896123)

print(calculate_distance(A_174AMKAVE4, CENTRAL_BUSINESS_DISTRICT))

9.764086519543062


In [58]:
df_hdb['distance2cbd'] = df_hdb.apply(lambda row: calculate_distance(CENTRAL_BUSINESS_DISTRICT, Coordinate(lat=row['latitude'], lon=row['longitude'])), axis=1)
df_hdb.head(10)

Unnamed: 0,address,latitude,longitude,distance2cbd
0,174 ANG MO KIO AVE 4,1.375097,103.837619,9.764087
1,541 ANG MO KIO AVE 10,1.373922,103.855621,9.515643
2,163 ANG MO KIO AVE 4,1.373552,103.838169,9.585589
3,446 ANG MO KIO AVE 10,1.367761,103.855357,8.833708
4,557 ANG MO KIO AVE 10,1.371626,103.857736,9.275781
5,603 ANG MO KIO AVE 5,1.380201,103.835756,10.355083
6,709 ANG MO KIO AVE 8,1.371137,103.847662,9.209514
7,333 ANG MO KIO AVE 1,1.361343,103.851699,8.115046
8,109 ANG MO KIO AVE 4,1.370097,103.837688,9.21749
9,564 ANG MO KIO AVE 3,1.369848,103.859404,9.095193


In [64]:
df_hdb = df_hdb.drop(['latitude', 'longitude'], axis=1)
df_hdb.head()

Unnamed: 0,address,distance2cbd
0,174 ANG MO KIO AVE 4,9.764087
1,541 ANG MO KIO AVE 10,9.515643
2,163 ANG MO KIO AVE 4,9.585589
3,446 ANG MO KIO AVE 10,8.833708
4,557 ANG MO KIO AVE 10,9.275781


#### Add distance2cbd to training dataset

In [61]:
df_working_data = pd.read_csv('../data/modified/hdb_working_data.csv')
df_working_data.shape


(211395, 21)

In [62]:
df_working_data.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,...,address,latitude,longitude,sora,mrt_stations_within_1km,nearest_mrt_station,bto_within_4km,bto_supply_within_4km,pri_schs_within_1km,count_pri_schs_within_1km
0,2015-01,ANG MO KIO,3 ROOM,174,ANG MO KIO AVE 4,07 TO 09,60.0,Improved,1986,70.0,...,174 ANG MO KIO AVE 4,1.375097,103.837619,0.129019,[],Yio Chu Kang MRT Station (1099.56m),12,6587,"['Ang Mo Kio Primary School (676.95m)', ""CHIJ ...",3
1,2015-01,ANG MO KIO,3 ROOM,541,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1981,65.0,...,541 ANG MO KIO AVE 10,1.373922,103.855621,0.129019,['Ang Mo Kio MRT Station (811.53m)'],Ang Mo Kio MRT Station (811.53m),39,23252,['Jing Shan Primary School (431.03m)'],1
2,2015-01,ANG MO KIO,3 ROOM,163,ANG MO KIO AVE 4,01 TO 03,69.0,New Generation,1980,64.0,...,163 ANG MO KIO AVE 4,1.373552,103.838169,0.129019,[],Yio Chu Kang MRT Station (1183.8m),10,4941,"['Ang Mo Kio Primary School (495.36m)', ""CHIJ ...",3
3,2015-01,ANG MO KIO,3 ROOM,446,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1979,63.0,...,446 ANG MO KIO AVE 10,1.367761,103.855357,0.129019,['Ang Mo Kio MRT Station (703.32m)'],Ang Mo Kio MRT Station (703.32m),34,20043,"['Jing Shan Primary School (611.18m)', 'Teck G...",3
4,2015-01,ANG MO KIO,3 ROOM,557,ANG MO KIO AVE 10,07 TO 09,68.0,New Generation,1980,64.0,...,557 ANG MO KIO AVE 10,1.371626,103.857736,0.129019,['Ang Mo Kio MRT Station (939.42m)'],Ang Mo Kio MRT Station (939.42m),45,26356,['Jing Shan Primary School (627.43m)'],1


In [65]:
df_working_data = pd.merge(df_working_data, df_hdb, on='address', how='left')
df_working_data.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,...,latitude,longitude,sora,mrt_stations_within_1km,nearest_mrt_station,bto_within_4km,bto_supply_within_4km,pri_schs_within_1km,count_pri_schs_within_1km,distance2cbd
0,2015-01,ANG MO KIO,3 ROOM,174,ANG MO KIO AVE 4,07 TO 09,60.0,Improved,1986,70.0,...,1.375097,103.837619,0.129019,[],Yio Chu Kang MRT Station (1099.56m),12,6587,"['Ang Mo Kio Primary School (676.95m)', ""CHIJ ...",3,9.764087
1,2015-01,ANG MO KIO,3 ROOM,541,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1981,65.0,...,1.373922,103.855621,0.129019,['Ang Mo Kio MRT Station (811.53m)'],Ang Mo Kio MRT Station (811.53m),39,23252,['Jing Shan Primary School (431.03m)'],1,9.515643
2,2015-01,ANG MO KIO,3 ROOM,163,ANG MO KIO AVE 4,01 TO 03,69.0,New Generation,1980,64.0,...,1.373552,103.838169,0.129019,[],Yio Chu Kang MRT Station (1183.8m),10,4941,"['Ang Mo Kio Primary School (495.36m)', ""CHIJ ...",3,9.585589
3,2015-01,ANG MO KIO,3 ROOM,446,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1979,63.0,...,1.367761,103.855357,0.129019,['Ang Mo Kio MRT Station (703.32m)'],Ang Mo Kio MRT Station (703.32m),34,20043,"['Jing Shan Primary School (611.18m)', 'Teck G...",3,8.833708
4,2015-01,ANG MO KIO,3 ROOM,557,ANG MO KIO AVE 10,07 TO 09,68.0,New Generation,1980,64.0,...,1.371626,103.857736,0.129019,['Ang Mo Kio MRT Station (939.42m)'],Ang Mo Kio MRT Station (939.42m),45,26356,['Jing Shan Primary School (627.43m)'],1,9.275781


In [67]:
df_working_data['distance2cbd'].isna().sum()

0

In [None]:
df_working_data.to_csv('../data/modified/hdb_working_data.csv', index=False)