In [176]:
#import statements

import csv
import pandas as pd
import numpy as np


import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.dummy import DummyClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
# Load scikit's random forest classifier library
from sklearn.ensemble import RandomForestClassifier

# Section 1: Data Cleaning and Exploration 
### In this section, we will:
- Import the data
- Clean the data
- Combine the data
- Create and/or generalise relevant variables 
- Find relevant details about the data worth exploring

In [177]:
'''Processing the data'''

#Source: https://data.gov.sg/dataset/resale-flat-prices?resource_id=f1765b54-a209-4718-8d38-a39237f502b3
flatPrice1990=pd.read_csv('resale-flat-prices-based-on-approval-date-1990-1999.csv')
flatPrice2000=pd.read_csv('resale-flat-prices-based-on-approval-date-2000-feb-2012.csv')
flatPrice2012=pd.read_csv('resale-flat-prices-based-on-registration-date-from-mar-2012-to-dec-2014.csv')
flatPrice2015=pd.read_csv('resale-flat-prices-based-on-registration-date-from-jan-2015-to-dec-2016.csv')
flatPrice2017=pd.read_csv('resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv')

#Source: https://data.gov.sg/dataset/consumer-price-index-monthly
CPI2019Base = pd.read_csv('consumer-price-index-2019-as-base-year-monthly.csv')

desc1990 = flatPrice1990.isnull().sum()
desc2000 = flatPrice2000.isnull().sum()
desc2012 = flatPrice2012.isnull().sum()
desc2015 = flatPrice2015.isnull().sum()
desc2017 = flatPrice2017.isnull().sum()

flatInfo = pd.concat([desc1990, desc2000, desc2012, desc2015, desc2017], axis=1)
flatInfo.columns = ["1990 to 1999", "2000 to 2012", "2012 to 2014", "2015 to 2016", "2017 onwards" ]
flatInfo

Unnamed: 0,1990 to 1999,2000 to 2012,2012 to 2014,2015 to 2016,2017 onwards
month,0.0,0.0,0.0,0,0
town,0.0,0.0,0.0,0,0
flat_type,0.0,0.0,0.0,0,0
block,0.0,0.0,0.0,0,0
street_name,0.0,0.0,0.0,0,0
storey_range,0.0,0.0,0.0,0,0
floor_area_sqm,0.0,0.0,0.0,0,0
flat_model,0.0,0.0,0.0,0,0
lease_commence_date,0.0,0.0,0.0,0,0
resale_price,0.0,0.0,0.0,0,0


In [178]:
# Making the collated dataframe and verifying its datatypes
flatPriceCollated = pd.concat([flatPrice1990, flatPrice2000, flatPrice2012, flatPrice2015, flatPrice2017], join = "inner")
display(flatPriceCollated.sample(10))
flatPriceCollated.dtypes

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
3398,2012-04,JURONG WEST,5 ROOM,625,JURONG WEST ST 61,01 TO 05,110.0,Improved,2001,466000.0
167969,1997-08,WOODLANDS,3 ROOM,22,MARSILING DR,01 TO 03,67.0,NEW GENERATION,1977,159000.0
141078,2004-02,BUKIT BATOK,3 ROOM,227,BT BATOK CTRL,10 TO 12,67.0,New Generation,1985,155000.0
3551,2000-02,JURONG WEST,4 ROOM,909,JURONG WEST ST 91,04 TO 06,104.0,Model A,1989,227000.0
15908,2015-11,SEMBAWANG,5 ROOM,352A,CANBERRA RD,10 TO 12,110.0,Improved,2001,465000.0
49704,1993-09,ANG MO KIO,4 ROOM,226,ANG MO KIO AVE 1,10 TO 12,92.0,NEW GENERATION,1978,162000.0
134780,1996-09,ANG MO KIO,3 ROOM,639,ANG MO KIO AVE 6,07 TO 09,82.0,NEW GENERATION,1980,255000.0
218697,1998-10,WOODLANDS,3 ROOM,103,WOODLANDS ST 13,04 TO 06,67.0,NEW GENERATION,1985,109000.0
197235,1998-06,BUKIT PANJANG,4 ROOM,249,BANGKIT RD,01 TO 03,103.0,MODEL A,1989,238000.0
178325,1998-01,ANG MO KIO,3 ROOM,212,ANG MO KIO AVE 3,07 TO 09,67.0,NEW GENERATION,1977,186000.0


month                   object
town                    object
flat_type               object
block                   object
street_name             object
storey_range            object
floor_area_sqm         float64
flat_model              object
lease_commence_date      int64
resale_price           float64
dtype: object

In [179]:
#Attempting to understand what "remaining_lease" refers to
display(flatPrice2015.sample(3))
display(flatPrice2017.sample(3))

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price
6022,2015-05,JURONG EAST,3 ROOM,304,JURONG EAST ST 32,13 TO 15,67.0,New Generation,1983,67,320000.0
19575,2016-02,JURONG EAST,4 ROOM,306,JURONG EAST ST 32,04 TO 06,94.0,New Generation,1983,66,395000.0
28218,2016-07,KALLANG/WHAMPOA,4 ROOM,7,BOON KENG RD,31 TO 33,94.0,DBSS,2011,93,818000.0


Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price
87874,2021-01,BISHAN,5 ROOM,163,BISHAN ST 13,01 TO 03,121.0,Improved,1988,66 years 06 months,628000.0
97935,2021-05,KALLANG/WHAMPOA,4 ROOM,116A,JLN TENTERAM,19 TO 21,93.0,Model A,2017,95 years 06 months,736000.0
62669,2019-12,BUKIT MERAH,3 ROOM,52,HAVELOCK RD,10 TO 12,69.0,Model A,2013,92 years 06 months,630000.0


### **<u>Subsection 1.1: Generalising `remaining_lease` for the entire dataset</u>**
From the above, it seems like "remaining_lease" was a statistic which was introduced starting from the 2015-2016 csv. However, one may also compute an estimation for this variable via taking:

$$ \text{lease\_commence\_date} + 99 \text{ years} - \text{month (or pretty much, the date when the transaction was made)}$$


However, a shortcoming of this method of manual computation is the lack of precision; namely that we may only evaluate the remaining lease to the nearest year. Furthermore, it seems as if the method in which the data was stored differs between the `2015-2016` csv and the `2017 onwards` csvs, with the former storing the data to the nearest year, and the latter storing the data to the nearest month.  

These inconsistencies prove to be a problem, but oh well, what can we do about it. 

As such, noting that most of the data could only compute the `remaining_lease` to the nearest year ($\pm$ 1 year for the data before 2015), we will proceed by using the aforementioned; crude method. Sure, it does not provide a high degree of accuracy, but at least there is consistency if we apply this formula for all rows. 

In [180]:
# Adding columns representing the columns for the month and year
flatPriceCollated["month_no"] = (flatPriceCollated.month.str[5:]).astype(int)
flatPriceCollated["year_no"] = (flatPriceCollated.month.str[:4]).astype(int)

display(flatPriceCollated.sample(10))

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,month_no,year_no
179855,1998-01,JURONG WEST,4 ROOM,447,JURONG WEST ST 42,01 TO 03,103.0,MODEL A,1984,250000.0,1,1998
238329,2007-07,CHOA CHU KANG,EXECUTIVE,133,TECK WHYE LANE,10 TO 12,145.0,Apartment,1993,348000.0,7,2007
177265,2005-04,TOA PAYOH,4 ROOM,142,LOR 2 TOA PAYOH,22 TO 24,85.0,Model A,2001,355000.0,4,2005
78753,2002-02,WOODLANDS,5 ROOM,9,MARSILING DR,16 TO 18,120.0,Standard,1976,233000.0,2,2002
80804,1994-11,BUKIT MERAH,3 ROOM,40,BEO CRES,04 TO 06,59.0,STANDARD,1971,77000.0,11,1994
123432,1996-05,BUKIT BATOK,4 ROOM,536,BT BATOK ST 52,01 TO 03,107.0,MODEL A,1986,246000.0,5,1996
254443,1999-05,KALLANG/WHAMPOA,3 ROOM,86,WHAMPOA DR,13 TO 15,66.0,IMPROVED,1974,149000.0,5,1999
52942,2001-06,WOODLANDS,5 ROOM,36,MARSILING DR,13 TO 15,124.0,Standard,1978,254000.0,6,2001
220970,1998-11,CHOA CHU KANG,4 ROOM,422,CHOA CHU KANG AVE 4,01 TO 03,104.0,MODEL A,1992,242000.0,11,1998
89673,1995-03,YISHUN,4 ROOM,225,YISHUN ST 21,04 TO 06,93.0,NEW GENERATION,1985,169000.0,3,1995


In [181]:
# This is the aforementioned calculation for the remaining lease of a house with respect to when the house was resold. We 
# first implement the creation of the column "remaining_lease_manual"
def calcYearsLeft(row):
  row.remaining_lease_manual = row.lease_commence_date + 99 - (row.year_no + row.month_no/12)
  return row

flatPriceCollated["remaining_lease_manual"] = 0
flatPriceCollated = flatPriceCollated.apply(calcYearsLeft, axis='columns')
display(flatPriceCollated.sample(10))
flatPriceCollated.describe()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual
286574,1999-12,TAMPINES,4 ROOM,402,TAMPINES ST 41,01 TO 03,94.0,SIMPLIFIED,1985,258000.0,12,1999,84.0
350748,2011-04,GEYLANG,3 ROOM,58,CIRCUIT RD,07 TO 09,56.0,Standard,1969,255000.0,4,2011,56.666667
8527,2017-06,CLEMENTI,4 ROOM,420,CLEMENTI AVE 1,37 TO 39,91.0,Model A,2013,735000.0,6,2017,94.5
56255,1993-12,BUKIT BATOK,4 ROOM,207,BT BATOK ST 21,01 TO 03,104.0,MODEL A,1983,155000.0,12,1993,88.0
31832,2016-09,PUNGGOL,4 ROOM,193,EDGEFIELD PLAINS,01 TO 03,90.0,Premium Apartment,2003,355000.0,9,2016,85.25
63265,1994-03,ANG MO KIO,3 ROOM,577,ANG MO KIO AVE 10,10 TO 12,67.0,NEW GENERATION,1980,90000.0,3,1994,84.75
313572,2010-02,PASIR RIS,EXECUTIVE,238,PASIR RIS ST 21,04 TO 06,146.0,Apartment,1993,455000.0,2,2010,81.833333
7219,2000-04,ANG MO KIO,3 ROOM,202,ANG MO KIO AVE 3,07 TO 09,67.0,New Generation,1977,177000.0,4,2000,75.666667
10122,2000-05,BEDOK,5 ROOM,605,BEDOK RESERVOIR RD,07 TO 09,125.0,Improved,1982,305000.0,5,2000,80.583333
38948,1992-12,BUKIT MERAH,1 ROOM,33,TAMAN HO SWEE,04 TO 06,29.0,IMPROVED,1969,10200.0,12,1992,75.0


Unnamed: 0,floor_area_sqm,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual
count,864616.0,864616.0,864616.0,864616.0,864616.0,864616.0
mean,95.678732,1987.590754,302883.5,6.593214,2004.966207,81.075112
std,25.966473,9.939253,155576.2,3.410459,8.467052,10.146425
min,28.0,1966.0,5000.0,1.0,1990.0,43.166667
25%,73.0,1980.0,185000.0,4.0,1998.0,74.5
50%,93.0,1986.0,282000.0,7.0,2004.0,82.75
75%,113.0,1995.0,395000.0,10.0,2011.0,89.333333
max,307.0,2019.0,1360000.0,12.0,2022.0,100.583333


In [182]:
# verifying the validity of my proposed computation

# adding month_no and year_no columns for the 2015, 2017 dataframes
flatPrice2015["month_no"] = (flatPrice2015.month.str[5:]).astype(int)
flatPrice2015["year_no"] = (flatPrice2015.month.str[:4]).astype(int)

flatPrice2017["month_no"] = (flatPrice2017.month.str[5:]).astype(int)
flatPrice2017["year_no"] = (flatPrice2017.month.str[:4]).astype(int)

# Finding the difference between our calculated value and the provided value
flatPrice2015["remaining_lease_manual"] = 0
flatPrice2015New = flatPrice2015.apply(calcYearsLeft, axis='columns')
flatPrice2015New["remaining_lease_difference"] = abs(flatPrice2015New.remaining_lease_manual - flatPrice2015New.remaining_lease)
display(flatPrice2015New.describe())

def calcYearsDiff2017(row):
  #       72 years 06 months	
  #index: 0123456789
  years = int(str(row.remaining_lease)[:2])
  months = int(str(row.remaining_lease)[9:11]) if (len(row.remaining_lease) > 12) else 0
  row.remaining_lease_difference = abs(row.remaining_lease_manual - ( years + months/12 ) )
  return row

flatPrice2017["remaining_lease_manual"] = 0
flatPrice2017New = flatPrice2017.apply(calcYearsLeft, axis='columns')
flatPrice2017New["remaining_lease_difference"] = 0
flatPrice2017New = flatPrice2017New.apply(calcYearsDiff2017, axis='columns')
display(flatPrice2017New.describe())

Unnamed: 0,floor_area_sqm,lease_commence_date,remaining_lease,resale_price,month_no,year_no,remaining_lease_manual,remaining_lease_difference
count,37153.0,37153.0,37153.0,37153.0,37153.0,37153.0,37153.0,37153.0
mean,97.020386,1990.920195,73.913116,436862.8,6.63548,2015.521438,73.8458,0.347312
std,24.19836,10.86233,10.885456,135805.2,3.307996,0.499547,10.84662,0.235746
min,31.0,1966.0,48.0,190000.0,1.0,2015.0,48.083333,0.0
25%,74.0,1984.0,66.0,340000.0,4.0,2015.0,66.25,0.166667
50%,96.0,1989.0,72.0,408000.0,7.0,2016.0,71.75,0.333333
75%,111.0,2000.0,83.0,495000.0,9.0,2016.0,82.75,0.5
max,280.0,2013.0,97.0,1150000.0,12.0,2016.0,96.666667,1.166667


Unnamed: 0,floor_area_sqm,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual,remaining_lease_difference
count,118413.0,118413.0,118413.0,118413.0,118413.0,118413.0,118413.0
mean,97.837401,1995.101568,460645.8,6.736912,2019.203702,74.336457,0.5653616
std,24.120851,13.444761,159298.3,3.407568,1.464333,13.332818,0.2899117
min,31.0,1966.0,140000.0,1.0,2017.0,43.166667,7.105427e-14
25%,82.0,1985.0,345000.0,4.0,2018.0,63.666667,0.3333333
50%,94.0,1996.0,430000.0,7.0,2019.0,74.416667,0.5833333
75%,113.0,2005.0,540000.0,10.0,2021.0,84.833333,0.8333333
max,249.0,2019.0,1360000.0,12.0,2022.0,97.333333,1.583333


As we may see from the results, the mean of `remaining_lease_difference` hovers around 0.3 to 0.6, which is expected. This shows that our method of computing the `remaining_lease` has a very low deviation from the ones provided in the government dataset. As such, this variable is valid to use in our analysis of the dataset. 

### **<u>Subsection 1.2: Creating a variable `resale_price_adjusted` to adjust for inflation</u>**

One notes that due to the substantial inflation which occured in the last 30 years, data regarding the resale price is likely inaccurate due to the lack of adjustment for inflation. As such, we will make a column `resale_price_adjusted` which adjusts the `resale_price` to the equivalent price today. 
To do this, we will refer to the `CPI2019Base` dataframe. 

In [183]:
display(CPI2019Base)


Unnamed: 0,month,level_1,value
0,1961-01,All Items,24.542
1,1961-01,All Items Less Imputed Rentals On Owner-occupi...,na
2,1961-01,All Items Less Accommodation,na
3,1961-02,All Items,24.565
4,1961-02,All Items Less Imputed Rentals On Owner-occupi...,na
...,...,...,...
2188,2021-10,All Items Less Imputed Rentals On Owner-occupi...,102.927
2189,2021-10,All Items Less Accommodation,103.31
2190,2021-11,All Items,103.959
2191,2021-11,All Items Less Imputed Rentals On Owner-occupi...,104.079


The `CPI`, or Consumer Price Index, "is a measure that examines the weighted average of prices of a basket of consumer goods and services, such as transportation, food, and medical care." (Source: Investopedia). Notably, the `value` column in the dataframe may be caluclated as follows: 

$$
\frac{\text{Total Cost of All Products in an arbituary year}}{\text{Total Cost of All Products in 2019}}
$$

For the sake of getting using a metric which represents the overall inflation of Singapore, we choose to use `level_1 = "All Items"`



In [184]:
mask = CPI2019Base.level_1.str.endswith("All Items") == True

CPI2019AllItems = CPI2019Base[mask]
CPI2019AllItems["month_no"] = (CPI2019AllItems.month.str[5:]).astype(int)
CPI2019AllItems["year_no"] = (CPI2019AllItems.month.str[:4]).astype(int)
CPI2019AllItems = CPI2019AllItems.reset_index()

CPI2019AllItems.isnull().sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CPI2019AllItems["month_no"] = (CPI2019AllItems.month.str[5:]).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CPI2019AllItems["year_no"] = (CPI2019AllItems.month.str[:4]).astype(int)


index       0
month       0
level_1     0
value       0
month_no    0
year_no     0
dtype: int64

Perhaps out of coincidence, when `level_1 = "All Items"`, the CPI metric exists for all months. Coincidence? Maybe. But Ill take it. Lets make a column `multiplier` which stores a metric representing how much we need to scale up the `resale_price` to accomodate for inflation in respect to today. 

In [185]:
currCPI = CPI2019AllItems.value.iloc[-1]

CPI2019AllItems.value = CPI2019AllItems.value.astype("float")

CPI2019AllItems["multiplier"] = float(currCPI) / CPI2019AllItems.value

CPI2019AllItems


Unnamed: 0,index,month,level_1,value,month_no,year_no,multiplier
0,0,1961-01,All Items,24.542,1,1961,4.235963
1,3,1961-02,All Items,24.565,2,1961,4.231997
2,6,1961-03,All Items,24.585,3,1961,4.228554
3,9,1961-04,All Items,24.187,4,1961,4.298135
4,12,1961-05,All Items,24.053,5,1961,4.322080
...,...,...,...,...,...,...,...
726,2178,2021-07,All Items,101.672,7,2021,1.022494
727,2181,2021-08,All Items,102.231,8,2021,1.016903
728,2184,2021-09,All Items,102.657,9,2021,1.012683
729,2187,2021-10,All Items,102.950,10,2021,1.009801


In [186]:
CPI2019AllItems = CPI2019AllItems[["month","multiplier"]]
flatPriceCollated = pd.merge(flatPriceCollated,CPI2019AllItems,on="month", how="outer")
flatPriceCollated.dropna(inplace= True)
flatPriceCollated["resale_price_adjusted"] = flatPriceCollated.resale_price * flatPriceCollated.multiplier
flatPriceCollated

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual,multiplier
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977.0,9000.0,1.0,1990.0,85.916667,1.677678
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,IMPROVED,1977.0,6000.0,1.0,1990.0,85.916667,1.677678
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977.0,8000.0,1.0,1990.0,85.916667,1.677678
3,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,07 TO 09,31.0,IMPROVED,1977.0,6000.0,1.0,1990.0,85.916667,1.677678
4,1990-01,ANG MO KIO,3 ROOM,216,ANG MO KIO AVE 1,04 TO 06,73.0,NEW GENERATION,1976.0,47200.0,1.0,1990.0,84.916667,1.677678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
860467,2021-11,YISHUN,EXECUTIVE,792,YISHUN RING RD,10 TO 12,144.0,Apartment,1987.0,690000.0,11.0,2021.0,64.083333,1.000000
860468,2021-11,YISHUN,EXECUTIVE,611,YISHUN ST 61,10 TO 12,142.0,Apartment,1987.0,680000.0,11.0,2021.0,64.083333,1.000000
860469,2021-11,YISHUN,EXECUTIVE,614,YISHUN ST 61,01 TO 03,142.0,Apartment,1987.0,632000.0,11.0,2021.0,64.083333,1.000000
860470,2021-11,YISHUN,EXECUTIVE,837,YISHUN ST 81,01 TO 03,145.0,Maisonette,1988.0,755000.0,11.0,2021.0,65.083333,1.000000


Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual,multiplier,resale_price_adjusted
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977.0,9000.0,1.0,1990.0,85.916667,1.677678,15099.102734
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,IMPROVED,1977.0,6000.0,1.0,1990.0,85.916667,1.677678,10066.068489
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977.0,8000.0,1.0,1990.0,85.916667,1.677678,13421.424652
3,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,07 TO 09,31.0,IMPROVED,1977.0,6000.0,1.0,1990.0,85.916667,1.677678,10066.068489
4,1990-01,ANG MO KIO,3 ROOM,216,ANG MO KIO AVE 1,04 TO 06,73.0,NEW GENERATION,1976.0,47200.0,1.0,1990.0,84.916667,1.677678,79186.405448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
860467,2021-11,YISHUN,EXECUTIVE,792,YISHUN RING RD,10 TO 12,144.0,Apartment,1987.0,690000.0,11.0,2021.0,64.083333,1.000000,690000.000000
860468,2021-11,YISHUN,EXECUTIVE,611,YISHUN ST 61,10 TO 12,142.0,Apartment,1987.0,680000.0,11.0,2021.0,64.083333,1.000000,680000.000000
860469,2021-11,YISHUN,EXECUTIVE,614,YISHUN ST 61,01 TO 03,142.0,Apartment,1987.0,632000.0,11.0,2021.0,64.083333,1.000000,632000.000000
860470,2021-11,YISHUN,EXECUTIVE,837,YISHUN ST 81,01 TO 03,145.0,Maisonette,1988.0,755000.0,11.0,2021.0,65.083333,1.000000,755000.000000


### **<u>(ozy) Subsection 1.3: Converting the Locations of the HDB blocks into coordinates</u>**

In this assignment, we also plan to explore the data based on its geographical attributes (ie. latitude and longitude). As such, we need to retrieve the approximate coordinates of each HDB block based on their address by making use of OneMapAPI.

In [187]:
def getcoordinates(block, street_name):
    address = block + street_name
    req = requests.get('https://developers.onemap.sg/commonapi/search?searchVal='+address+'&returnGeom=Y&getAddrDetails=Y&pageNum=1')
    resultsdict = eval(req.text)
    if len(resultsdict['results'])>0:
        return resultsdict['results'][0]['LATITUDE'], resultsdict['results'][0]['LONGITUDE']
    else:
        pass


'''

'''

'\n\n'

### **<u>Subsection 1.4: The actual data exploration</u>**