# Extract HDB Lat-Long data

This notebook uses OneMap's API to generate the properties' Lat and Long coordinates.

In [1]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics

import re

In [2]:
df = pd.read_csv("resale-flat-prices 2/resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv")

In [4]:
df['remaining_lease'] = df['remaining_lease'].str.slice(stop=2).astype(int)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148277 entries, 0 to 148276
Data columns (total 11 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   month                148277 non-null  object 
 1   town                 148277 non-null  object 
 2   flat_type            148277 non-null  object 
 3   block                148277 non-null  object 
 4   street_name          148277 non-null  object 
 5   storey_range         148277 non-null  object 
 6   floor_area_sqm       148277 non-null  float64
 7   flat_model           148277 non-null  object 
 8   lease_commence_date  148277 non-null  int64  
 9   remaining_lease      148277 non-null  int64  
 10  resale_price         148277 non-null  float64
dtypes: float64(2), int64(2), object(7)
memory usage: 12.4+ MB


### Using OneMap's API to generate the corresponding geographical information

In [9]:
import json
import requests
import urllib.parse

address = df['street_name'][4]+" "+df['block'][4]
query_address = urllib.parse.quote(address)
query_string = 'https://developers.onemap.sg/commonapi/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y&pageNum=1'
resp = requests.get(query_string)
        
#Convert JSON into Python Object 
data=json.loads(resp.content)

In [10]:
data

{'found': 2,
 'totalNumPages': 1,
 'pageNum': 1,
 'results': [{'SEARCHVAL': 'MY FIRST SKOOL',
   'BLK_NO': '601',
   'ROAD_NAME': 'ANG MO KIO AVENUE 5',
   'BUILDING': 'MY FIRST SKOOL',
   'ADDRESS': '601 ANG MO KIO AVENUE 5 MY FIRST SKOOL SINGAPORE 560601',
   'POSTAL': '560601',
   'X': '28201.7824866865',
   'Y': '40334.0512122996',
   'LATITUDE': '1.38104134784496',
   'LONGITUDE': '103.835131744823',
   'LONGTITUDE': '103.835131744823'},
  {'SEARCHVAL': 'YIO CHU KANG GREEN',
   'BLK_NO': '601',
   'ROAD_NAME': 'ANG MO KIO AVENUE 5',
   'BUILDING': 'YIO CHU KANG GREEN',
   'ADDRESS': '601 ANG MO KIO AVENUE 5 YIO CHU KANG GREEN SINGAPORE 560601',
   'POSTAL': '560601',
   'X': '28201.7822445426',
   'Y': '40334.0520295352',
   'LATITUDE': '1.38104135523576',
   'LONGITUDE': '103.835131742647',
   'LONGTITUDE': '103.835131742647'}]}

#### Take note that the run-time expected from the cell below is about 6h

In [16]:
import time

for i in range(df.shape[0]):
    if (df['Latitude'].isnull().iloc[i]):
        address = df['street_name'][i]+" "+df['block'][i]
        query_address = urllib.parse.quote(address)
        query_string = 'https://developers.onemap.sg/commonapi/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y&pageNum=1'
        resp = requests.get(query_string)

        #Convert JSON into Python Object 
        data=json.loads(resp.content)

        if(data['found'] == 0): 
            print("no data in row:", i)
        else:
            df.loc[i, 'Longitude'] = data['results'][-1]['LONGITUDE']
            df.loc[i, 'Latitude'] = data['results'][-1]['LATITUDE']
            df.loc[i, 'Address'] = data['results'][-1]['ADDRESS']
            df.loc[i, 'Postal code'] = data['results'][-1]['POSTAL']

        if((i+1)%250==0):
            print("Sleeping...", i)
            time.sleep(15)
    else: continue

Sleeping... 124499
Sleeping... 124749
Sleeping... 124999
Sleeping... 125249
Sleeping... 125499
Sleeping... 125749
Sleeping... 125999
Sleeping... 126249
Sleeping... 126499
Sleeping... 126749
Sleeping... 126999
Sleeping... 127249
Sleeping... 127499
Sleeping... 127749
Sleeping... 127999
Sleeping... 128249
Sleeping... 128499
Sleeping... 128749
Sleeping... 128999
Sleeping... 129249
Sleeping... 129499
Sleeping... 129749
Sleeping... 129999
Sleeping... 130249
Sleeping... 130499
Sleeping... 130749
Sleeping... 130999
Sleeping... 131249
Sleeping... 131499
Sleeping... 131749
Sleeping... 131999
Sleeping... 132249
Sleeping... 132499
Sleeping... 132749
Sleeping... 132999
Sleeping... 133249
Sleeping... 133499
Sleeping... 133749
Sleeping... 133999
Sleeping... 134249
Sleeping... 134499
Sleeping... 134749
Sleeping... 134999
Sleeping... 135249
Sleeping... 135499
Sleeping... 135749
Sleeping... 135999
Sleeping... 136249
Sleeping... 136499
Sleeping... 136749
Sleeping... 136999
Sleeping... 137249
Sleeping... 

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148277 entries, 0 to 148276
Data columns (total 15 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   month                148277 non-null  object 
 1   town                 148277 non-null  object 
 2   flat_type            148277 non-null  object 
 3   block                148277 non-null  object 
 4   street_name          148277 non-null  object 
 5   storey_range         148277 non-null  object 
 6   floor_area_sqm       148277 non-null  float64
 7   flat_model           148277 non-null  object 
 8   lease_commence_date  148277 non-null  int64  
 9   remaining_lease      148277 non-null  int64  
 10  resale_price         148277 non-null  float64
 11  Longitude            148277 non-null  object 
 12  Latitude             148277 non-null  object 
 13  Address              148277 non-null  object 
 14  Postal code          148277 non-null  object 
dtypes: float64(2), in

In [19]:
df = df[df['Latitude'].isnull()==False]
df.to_csv('df_with_latlong.csv',index=False)