# Imports

In [2]:
import requests
import pandas as pd
import numpy as np
from tqdm import tqdm
from geopy.distance import geodesic
import time
import ast
tqdm.pandas()

import warnings
warnings.filterwarnings("ignore")

# Request API from data.gov.sg
Link: https://data.gov.sg/dataset/resale-flat-prices

In [3]:
url = 'https://data.gov.sg/api/action/datastore_search?resource_id=f1765b54-a209-4718-8d38-a39237f502b3&limit=200000'
r = requests.get(url)
data = r.json()
# data

In [4]:
len(data['result']['records'])

149326

In [5]:
df = pd.DataFrame(data['result']['records'])
df.head()

Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,street_name,resale_price,month,remaining_lease,lease_commence_date,storey_range,_id,block
0,ANG MO KIO,2 ROOM,Improved,44,ANG MO KIO AVE 10,232000,2017-01,61 years 04 months,1979,10 TO 12,1,406
1,ANG MO KIO,3 ROOM,New Generation,67,ANG MO KIO AVE 4,250000,2017-01,60 years 07 months,1978,01 TO 03,2,108
2,ANG MO KIO,3 ROOM,New Generation,67,ANG MO KIO AVE 5,262000,2017-01,62 years 05 months,1980,01 TO 03,3,602
3,ANG MO KIO,3 ROOM,New Generation,68,ANG MO KIO AVE 10,265000,2017-01,62 years 01 month,1980,04 TO 06,4,465
4,ANG MO KIO,3 ROOM,New Generation,67,ANG MO KIO AVE 5,265000,2017-01,62 years 05 months,1980,01 TO 03,5,601


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149326 entries, 0 to 149325
Data columns (total 12 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   town                 149326 non-null  object
 1   flat_type            149326 non-null  object
 2   flat_model           149326 non-null  object
 3   floor_area_sqm       149326 non-null  object
 4   street_name          149326 non-null  object
 5   resale_price         149326 non-null  object
 6   month                149326 non-null  object
 7   remaining_lease      149326 non-null  object
 8   lease_commence_date  149326 non-null  object
 9   storey_range         149326 non-null  object
 10  _id                  149326 non-null  int64 
 11  block                149326 non-null  object
dtypes: int64(1), object(11)
memory usage: 13.7+ MB


In [7]:
df['storey_range'].unique()

array(['10 TO 12', '01 TO 03', '04 TO 06', '07 TO 09', '13 TO 15',
       '19 TO 21', '22 TO 24', '16 TO 18', '34 TO 36', '37 TO 39',
       '49 TO 51', '28 TO 30', '25 TO 27', '40 TO 42', '31 TO 33',
       '46 TO 48', '43 TO 45'], dtype=object)

In [8]:
# Data manipulation
float_cols = ['floor_area_sqm', 'resale_price']
df[float_cols] = df[float_cols].astype(float)

int_cols = ['lease_commence_date']
df[int_cols] = df[int_cols].astype(float)

def get_mths_remaining_lease(lease_str):
    split_lst = lease_str.split(' ')
    if len(split_lst) == 4:
        return int(split_lst[0]) * 12 + int(split_lst[2])
    else:
        return int(split_lst[0]) * 12
    
df['remaining_lease_mth'] = df['remaining_lease'].apply(get_mths_remaining_lease)
df['storey_range_lower'] = df['storey_range'].apply(lambda x: int(x.split(' ')[0]))

df.drop(['remaining_lease'], axis = 1, inplace = True)
df.head()

Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,street_name,resale_price,month,lease_commence_date,storey_range,_id,block,remaining_lease_mth,storey_range_lower
0,ANG MO KIO,2 ROOM,Improved,44.0,ANG MO KIO AVE 10,232000.0,2017-01,1979.0,10 TO 12,1,406,736,10
1,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 4,250000.0,2017-01,1978.0,01 TO 03,2,108,727,1
2,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,262000.0,2017-01,1980.0,01 TO 03,3,602,749,1
3,ANG MO KIO,3 ROOM,New Generation,68.0,ANG MO KIO AVE 10,265000.0,2017-01,1980.0,04 TO 06,4,465,745,4
4,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,265000.0,2017-01,1980.0,01 TO 03,5,601,749,1


In [9]:
df['address_str'] = df['block'] + " " + df['street_name']

# Feature Engineering

In [10]:
# Get subset to test here first
# test_df = df[:100]
final_df = df.copy()

## Coordinates

In [11]:
# Download lat long from onemap API
def get_coordinates(address):
    for i in range(10):
        try:
            req = requests.get('https://developers.onemap.sg/commonapi/search?searchVal='+address+'&returnGeom=Y&getAddrDetails=Y&pageNum=1')
            resultsdict = eval(req.text)
            if len(resultsdict['results']) > 0:
                return (resultsdict['results'][0]['LATITUDE'], resultsdict['results'][0]['LONGITUDE'])
            else:
                return np.nan
            break
        except:
            time.sleep(10)

final_df['lat_lon'] = final_df['address_str'].progress_apply(get_coordinates)

100%|██████████| 149326/149326 [4:27:58<00:00,  9.29it/s]  


In [12]:
# Save checkpoint 1 (4h 30mins for 149,326 rows)
final_df.to_csv('checkpoint_1.csv', index=False)


In [3]:
# Read in checkpoint 1
final_df = pd.read_csv('checkpoint_1.csv')
# df_read_in.info()
final_df['lat_lon'] = final_df['lat_lon'].apply(ast.literal_eval).apply(lambda x: (float(x[0]), float(x[1]))) # Change to tuple of floats

In [6]:
def get_coordinates(address):
    for i in range(10):
        try:
            req = requests.get('https://developers.onemap.sg/commonapi/search?searchVal='+address+'&returnGeom=Y&getAddrDetails=Y&pageNum=1')
            resultsdict = eval(req.text)
            if len(resultsdict['results']) > 0:
                return (resultsdict['results'][0]['LATITUDE'], resultsdict['results'][0]['LONGITUDE'])
            else:
                return np.nan
            break
        except:
            time.sleep(10)

## MRT Stations
List of MRT stations obtained on 3/3/2023 from https://en.wikipedia.org/wiki/List_of_Singapore_MRT_stations

In [7]:
mrt_df = pd.read_excel('MRT_List.xlsx')
mrt_df['MRT Stations'] = mrt_df['MRT Stations'] + ' MRT Station'
mrt_df['MRT_stn_lat_long'] = mrt_df['MRT Stations'].progress_apply(get_coordinates) # Download lat long from onemap API
mrt_df = mrt_df.dropna()
mrt_df = mrt_df.reset_index(drop = True)
mrt_df

100%|██████████| 146/146 [00:12<00:00, 11.44it/s]


Unnamed: 0,MRT Stations,MRT_stn_lat_long
0,Admiralty MRT Station,"(1.44058856161847, 103.800990519771)"
1,Aljunied MRT Station,"(1.3164326118157, 103.882906044385)"
2,Ang Mo Kio MRT Station,"(1.36942855699191, 103.849455226442)"
3,Bartley MRT Station,"(1.34250117805245, 103.880177899184)"
4,Bayfront MRT Station,"(1.28187378879209, 103.859079764874)"
...,...,...
138,Woodleigh MRT Station,"(1.33919004519388, 103.87081830915)"
139,Xilin MRT Station,"(1.32890694938858, 103.964902569595)"
140,Yew Tee MRT Station,"(1.39747594171731, 103.747418249132)"
141,Yio Chu Kang MRT Station,"(1.38168259989517, 103.844991053696)"


In [8]:
def get_mrt_shortest_and_1km_radius(place_coord):
    if pd.isna(place_coord) == True:
        return np.nan
    else:
        dist_df = mrt_df['MRT_stn_lat_long'].apply(lambda x: geodesic(place_coord, x).meters)
        min_dist = dist_df.min()
        mrt_stn = mrt_df.loc[dist_df.idxmin(),'MRT Stations']
        num_within_1km_radius = sum(dist_df <= 1000)
        return (min_dist, mrt_stn, num_within_1km_radius)

output = final_df['lat_lon'].progress_apply(get_mrt_shortest_and_1km_radius)
final_df['closest_mrt_dist'] = output.apply(lambda x: x[0])
final_df['closest_mrt'] = output.apply(lambda x: x[1])
final_df['num_mrt_1km_radius'] = output.apply(lambda x: x[2])
final_df.head()

100%|██████████| 149326/149326 [45:57<00:00, 54.15it/s] 


Unnamed: 0.1,Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,street_name,resale_price,month,lease_commence_date,storey_range,_id,block,remaining_lease_mth,storey_range_lower,address_str,lat_lon,closest_mrt_dist,closest_mrt,num_mrt_1km_radius
0,0,ANG MO KIO,2 ROOM,Improved,44.0,ANG MO KIO AVE 10,232000.0,2017-01,1979.0,10 TO 12,1,406,736,10,406 ANG MO KIO AVE 10,"(1.36200453938712, 103.853879910407)",957.269782,Ang Mo Kio MRT Station,1
1,1,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 4,250000.0,2017-01,1978.0,01 TO 03,2,108,727,1,108 ANG MO KIO AVE 4,"(1.37094273993861, 103.837974822369)",166.828293,Mayflower MRT Station,2
2,2,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,262000.0,2017-01,1980.0,01 TO 03,3,602,749,1,602 ANG MO KIO AVE 5,"(1.38070883044887, 103.835368226602)",532.154773,Lentor MRT Station,1
3,3,ANG MO KIO,3 ROOM,New Generation,68.0,ANG MO KIO AVE 10,265000.0,2017-01,1980.0,04 TO 06,4,465,745,4,465 ANG MO KIO AVE 10,"(1.3662010408294, 103.857200967235)",932.964388,Ang Mo Kio MRT Station,1
4,4,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,265000.0,2017-01,1980.0,01 TO 03,5,601,749,1,601 ANG MO KIO AVE 5,"(1.38104134784496, 103.835131744823)",498.418981,Lentor MRT Station,1


## Schools

In [9]:
school_df_dic = pd.read_excel('Schools.xlsx', sheet_name = None)
school_df = pd.DataFrame()
for sch_type, df in school_df_dic.items():
    sub_df = pd.DataFrame({'Type': sch_type, 'School': df.iloc[:,0]})
    school_df = pd.concat([school_df, sub_df], ignore_index = True, axis = 0)
    
school_df['lat_long'] = school_df['School'].progress_apply(get_coordinates)
school_df = school_df.dropna()
school_df

100%|██████████| 373/373 [00:41<00:00,  8.89it/s]


Unnamed: 0,Type,School,lat_long
0,Pri,Admiralty Primary School,"(1.4426347903311, 103.800040119743)"
1,Pri,Ahmad Ibrahim Primary School,"(1.43315271543517, 103.832942401086)"
2,Pri,Ai Tong School,"(1.3605834338904, 103.833020333986)"
3,Pri,Alexandra Primary School,"(1.29133439161334, 103.824424680531)"
4,Pri,Anchor Green Primary School,"(1.39036998654612, 103.887165375933)"
...,...,...,...
368,Uni,Nanyang Technological University,"(1.35154257621121, 103.687214243669)"
369,Uni,Singapore Management University,"(1.29509045190873, 103.850566200283)"
370,Uni,Singapore University of Technology and Design,"(1.34078410232498, 103.962542022484)"
371,Uni,Singapore Institute of Technology,"(1.34342966240204, 103.932354333699)"


In [10]:
sch_df_dic = {}
for sch_type in school_df['Type'].unique():
    sub_df = school_df[school_df['Type'] == sch_type]
    sub_df = sub_df.reset_index(drop = True)
    sch_df_dic[sch_type] = sub_df

In [11]:
def get_sch_shortest_and_1km_radius(place_coord, sch_type):
    if pd.isna(place_coord) == True:
        return np.nan
    else:
        sub_df = sch_df_dic[sch_type]
        dist_df = sub_df['lat_long'].apply(lambda x: geodesic(place_coord, x).meters)
        min_dist = dist_df.min()
        nearest_sch = sub_df.loc[dist_df.idxmin(), 'School']
        num_within_1km_radius = sum(dist_df <= 1000)
        return (min_dist, nearest_sch, num_within_1km_radius)

for sch_type in sch_df_dic.keys():
    output = final_df['lat_lon'].progress_apply(lambda x: get_sch_shortest_and_1km_radius(x, sch_type))
    final_df[f'closest_{sch_type}_sch_dist'] = output.apply(lambda x: x[0])
    final_df[f'closest_{sch_type}_sch'] = output.apply(lambda x: x[1])
    final_df[f'num_{sch_type}_sch_1km_radius'] = output.apply(lambda x: x[2])
final_df.head()

100%|██████████| 149326/149326 [54:49<00:00, 45.39it/s]
100%|██████████| 149326/149326 [46:32<00:00, 53.47it/s] 
100%|██████████| 149326/149326 [06:01<00:00, 412.62it/s]
100%|██████████| 149326/149326 [04:01<00:00, 618.97it/s]
100%|██████████| 149326/149326 [02:24<00:00, 1032.31it/s]


Unnamed: 0.1,Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,street_name,resale_price,month,lease_commence_date,storey_range,...,num_Sec_sch_1km_radius,closest_JC_sch_dist,closest_JC_sch,num_JC_sch_1km_radius,closest_MI_sch_dist,closest_MI_sch,num_MI_sch_1km_radius,closest_Uni_sch_dist,closest_Uni_sch,num_Uni_sch_1km_radius
0,0,ANG MO KIO,2 ROOM,Improved,44.0,ANG MO KIO AVE 10,232000.0,2017-01,1979.0,10 TO 12,...,1,1401.612589,Eunoia Junior College,0,1779.216231,ITE College Central,0,7408.201434,Singapore Management University,0
1,1,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 4,250000.0,2017-01,1978.0,01 TO 03,...,5,1041.037313,Eunoia Junior College,0,1726.056136,Nanyang Polytechnic,0,8371.125772,Singapore University of Social Sciences,0
2,2,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,262000.0,2017-01,1980.0,01 TO 03,...,3,1172.923989,Anderson Serangoon Junior College,0,1599.601569,Nanyang Polytechnic,0,8802.805943,Singapore University of Social Sciences,0
3,3,ANG MO KIO,3 ROOM,New Generation,68.0,ANG MO KIO AVE 10,265000.0,2017-01,1980.0,04 TO 06,...,1,1827.233537,Eunoia Junior College,0,1295.710648,ITE College Central,0,7897.637286,Singapore Management University,0
4,4,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,265000.0,2017-01,1980.0,01 TO 03,...,3,1205.413198,Anderson Serangoon Junior College,0,1625.299098,Nanyang Polytechnic,0,8807.296192,Singapore University of Social Sciences,0


In [13]:
# Save checkpoint 2
final_df.to_csv('checkpoint_2.csv', index=False)

In [8]:
# Read in checkpoint 2
final_df = pd.read_csv('checkpoint_2.csv')
# df_read_in.info()
final_df['lat_lon'] = final_df['lat_lon'].apply(ast.literal_eval).apply(lambda x: (float(x[0]), float(x[1]))) # Change to tuple of floats

## Malls

In [7]:
malls_df_dic = pd.read_excel('Malls.xlsx', sheet_name = None)

malls_df = pd.DataFrame()
for mall_type, df in malls_df_dic.items():
    sub_df = df.iloc[:,0]
    malls_df = pd.concat([malls_df, sub_df], ignore_index = True, axis = 0)
    
malls_df.columns = ['Malls']

print(malls_df.shape)
malls_df['lat_long'] = malls_df['Malls'].progress_apply(get_coordinates) # progress_apply
malls_df = malls_df.dropna()
malls_df = malls_df.reset_index(drop = True)

malls_df

(167, 1)


100%|█████████████████████████████████████████| 167/167 [00:13<00:00, 12.70it/s]


Unnamed: 0,Malls,lat_long
0,100 AM,"(1.27468281482263, 103.843488359469)"
1,313@Somerset,"(1.30101436404056, 103.838360664485)"
2,Aperia,"(1.3097112065077, 103.864326436447)"
3,Balestier Hill Shopping Centre,"(1.32559594839311, 103.842571612968)"
4,Bugis Cube,"(1.2981408343975, 103.855635339249)"
...,...,...
155,Gek Poh Shopping Centre,"(1.34874357136408, 103.697732091001)"
156,Rochester Mall,"(1.30540765569962, 103.788446680148)"
157,Taman Jurong Shopping Centre,"(1.33484487471259, 103.720462024278)"
158,West Coast Plaza,"(1.30369748971099, 103.766131294678)"


In [9]:
def get_mall_shortest_and_1km_radius(place_coord):
    if pd.isna(place_coord) == True:
        return np.nan
    else:
        dist_df = malls_df['lat_long'].apply(lambda x: geodesic(place_coord, x).meters)
        min_dist = dist_df.min()
        mall = malls_df.loc[dist_df.idxmin(), 'Malls']
        num_within_1km_radius = sum(dist_df <= 1000)
        return (min_dist, mall, num_within_1km_radius)

output = final_df['lat_lon'].progress_apply(get_mall_shortest_and_1km_radius)
final_df['closest_mall_dist'] = output.apply(lambda x: x[0])
final_df['closest_mall'] = output.apply(lambda x: x[1])
final_df['num_mall_1km_radius'] = output.apply(lambda x: x[2])
final_df.head()

100%|███████████████████████████████████| 149326/149326 [28:21<00:00, 87.76it/s]


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,street_name,resale_price,month,lease_commence_date,...,num_JC_sch_1km_radius,closest_MI_sch_dist,closest_MI_sch,num_MI_sch_1km_radius,closest_Uni_sch_dist,closest_Uni_sch,num_Uni_sch_1km_radius,closest_mall_dist,closest_mall,num_mall_1km_radius
0,0,0,ANG MO KIO,2 ROOM,Improved,44.0,ANG MO KIO AVE 10,232000.0,2017-01,1979.0,...,0,1779.216231,ITE College Central,0,7408.201434,Singapore Management University,0,1013.992058,AMK Hub,0
1,1,1,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 4,250000.0,2017-01,1978.0,...,0,1726.056136,Nanyang Polytechnic,0,8371.125772,Singapore University of Social Sciences,0,894.265975,Broadway Plaza,1
2,2,2,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,262000.0,2017-01,1980.0,...,0,1599.601569,Nanyang Polytechnic,0,8802.805943,Singapore University of Social Sciences,0,1525.573038,Broadway Plaza,0
3,3,3,ANG MO KIO,3 ROOM,New Generation,68.0,ANG MO KIO AVE 10,265000.0,2017-01,1980.0,...,0,1295.710648,ITE College Central,0,7897.637286,Singapore Management University,0,893.795634,myVillage At Serangoon Garden,1
4,4,4,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,265000.0,2017-01,1980.0,...,0,1625.299098,Nanyang Polytechnic,0,8807.296192,Singapore University of Social Sciences,0,1569.306143,Broadway Plaza,0


## Parks

In [10]:
parks_df = pd.read_excel('Parks.xlsx')
parks_df = pd.DataFrame({'Parks': parks_df.iloc[:,0]})
parks_df['lat_long'] = parks_df['Parks'].progress_apply(get_coordinates)
parks_df = parks_df.dropna()
parks_df = parks_df.reset_index(drop = True)
parks_df

100%|███████████████████████████████████████████| 73/73 [00:05<00:00, 12.51it/s]


Unnamed: 0,Parks,lat_long
0,Admiralty Park,"(1.45399059016631, 103.795875774662)"
1,Ang Mo Kio Town Garden East,"(1.37069218596975, 103.850477038504)"
2,Ang Mo Kio Town Garden West,"(1.37416758787361, 103.842948749695)"
3,Bedok Town Park,"(1.33439933330273, 103.922348727352)"
4,Bishan-Ang Mo Kio Park,"(1.36517069591469, 103.83629822663)"
...,...,...
65,Yishun Neighbourhood Park,"(1.43774180535158, 103.835167452882)"
66,Yishun Park,"(1.44144025471558, 103.835602638346)"
67,Yishun Pond Park,"(1.42782773147436, 103.840061845933)"
68,Youth Olympic Park,"(1.28895335576015, 103.860431957375)"


In [11]:
def get_park_shortest_and_1km_radius(place_coord):
    if pd.isna(place_coord) == True:
        return np.nan
    else:
        dist_df = parks_df['lat_long'].apply(lambda x: geodesic(place_coord, x).meters)
        min_dist = dist_df.min()
        park = parks_df.loc[dist_df.idxmin(), 'Parks']
        num_within_1km_radius = sum(dist_df <= 1000)
        return (min_dist, park, num_within_1km_radius)

output = final_df['lat_lon'].progress_apply(get_park_shortest_and_1km_radius)
final_df['closest_park_dist'] = output.apply(lambda x: x[0])
final_df['closest_park'] = output.apply(lambda x: x[1])
final_df['num_park_1km_radius'] = output.apply(lambda x: x[2])
final_df.head()

100%|██████████████████████████████████| 149326/149326 [12:50<00:00, 193.91it/s]


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,town,flat_type,flat_model,floor_area_sqm,street_name,resale_price,month,lease_commence_date,...,num_MI_sch_1km_radius,closest_Uni_sch_dist,closest_Uni_sch,num_Uni_sch_1km_radius,closest_mall_dist,closest_mall,num_mall_1km_radius,closest_park_dist,closest_park,num_park_1km_radius
0,0,0,ANG MO KIO,2 ROOM,Improved,44.0,ANG MO KIO AVE 10,232000.0,2017-01,1979.0,...,0,7408.201434,Singapore Management University,0,1013.992058,AMK Hub,0,1032.586022,Ang Mo Kio Town Garden East,0
1,1,1,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 4,250000.0,2017-01,1978.0,...,0,8371.125772,Singapore University of Social Sciences,0,894.265975,Broadway Plaza,1,658.451197,Ang Mo Kio Town Garden West,2
2,2,2,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,262000.0,2017-01,1980.0,...,0,8802.805943,Singapore University of Social Sciences,0,1525.573038,Broadway Plaza,0,1111.238041,Ang Mo Kio Town Garden West,0
3,3,3,ANG MO KIO,3 ROOM,New Generation,68.0,ANG MO KIO AVE 10,265000.0,2017-01,1980.0,...,0,7897.637286,Singapore Management University,0,893.795634,myVillage At Serangoon Garden,1,898.087299,Ang Mo Kio Town Garden East,1
4,4,4,ANG MO KIO,3 ROOM,New Generation,67.0,ANG MO KIO AVE 5,265000.0,2017-01,1980.0,...,0,8807.296192,Singapore University of Social Sciences,0,1569.306143,Broadway Plaza,0,1155.199842,Ang Mo Kio Town Garden West,0


# Save Dataset into checkpoint3.csv

In [12]:
# Save checkpoint 3
final_df.to_csv('checkpoint_3.csv', index=False)

In [13]:
# Read in checkpoint 3
final_df = pd.read_csv('checkpoint_3.csv')
# df_read_in.info()
final_df['lat_lon'] = final_df['lat_lon'].apply(ast.literal_eval).apply(lambda x: (float(x[0]), float(x[1]))) # Change to tuple of floats

In [14]:
final_df.columns

Index(['Unnamed: 0.2', 'Unnamed: 0.1', 'Unnamed: 0', 'town', 'flat_type',
       'flat_model', 'floor_area_sqm', 'street_name', 'resale_price', 'month',
       'lease_commence_date', 'storey_range', '_id', 'block',
       'remaining_lease_mth', 'storey_range_lower', 'address_str', 'lat_lon',
       'closest_mrt_dist', 'closest_mrt', 'num_mrt_1km_radius',
       'closest_Pri_sch_dist', 'closest_Pri_sch', 'num_Pri_sch_1km_radius',
       'closest_Sec_sch_dist', 'closest_Sec_sch', 'num_Sec_sch_1km_radius',
       'closest_JC_sch_dist', 'closest_JC_sch', 'num_JC_sch_1km_radius',
       'closest_MI_sch_dist', 'closest_MI_sch', 'num_MI_sch_1km_radius',
       'closest_Uni_sch_dist', 'closest_Uni_sch', 'num_Uni_sch_1km_radius',
       'closest_mall_dist', 'closest_mall', 'num_mall_1km_radius',
       'closest_park_dist', 'closest_park', 'num_park_1km_radius'],
      dtype='object')