In [29]:
#import statements

import csv
import pandas as pd
import numpy as np
import requests
import time

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.dummy import DummyClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
# Load scikit's random forest classifier library
from sklearn.ensemble import RandomForestClassifier

# Section 1: Data Cleaning and Exploration 
### In this section, we will:
- Import the data
- Clean the data
- Combine the data
- Create and/or generalise relevant variables 
- Find relevant details about the data worth exploring

In [3]:
'''Processing the data'''

#Source: https://data.gov.sg/dataset/resale-flat-prices?resource_id=f1765b54-a209-4718-8d38-a39237f502b3
flatPrice1990=pd.read_csv('resale-flat-prices-based-on-approval-date-1990-1999.csv')
flatPrice2000=pd.read_csv('resale-flat-prices-based-on-approval-date-2000-feb-2012.csv')
flatPrice2012=pd.read_csv('resale-flat-prices-based-on-registration-date-from-mar-2012-to-dec-2014.csv')
flatPrice2015=pd.read_csv('resale-flat-prices-based-on-registration-date-from-jan-2015-to-dec-2016.csv')
flatPrice2017=pd.read_csv('resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv')

#Source: https://data.gov.sg/dataset/consumer-price-index-monthly
CPI2019Base = pd.read_csv('consumer-price-index-2019-as-base-year-monthly.csv')

desc1990 = flatPrice1990.isnull().sum()
desc2000 = flatPrice2000.isnull().sum()
desc2012 = flatPrice2012.isnull().sum()
desc2015 = flatPrice2015.isnull().sum()
desc2017 = flatPrice2017.isnull().sum()

flatInfo = pd.concat([desc1990, desc2000, desc2012, desc2015, desc2017], axis=1)
flatInfo.columns = ["1990 to 1999", "2000 to 2012", "2012 to 2014", "2015 to 2016", "2017 onwards" ]
flatInfo

Unnamed: 0,1990 to 1999,2000 to 2012,2012 to 2014,2015 to 2016,2017 onwards
month,0.0,0.0,0.0,0,0
town,0.0,0.0,0.0,0,0
flat_type,0.0,0.0,0.0,0,0
block,0.0,0.0,0.0,0,0
street_name,0.0,0.0,0.0,0,0
storey_range,0.0,0.0,0.0,0,0
floor_area_sqm,0.0,0.0,0.0,0,0
flat_model,0.0,0.0,0.0,0,0
lease_commence_date,0.0,0.0,0.0,0,0
resale_price,0.0,0.0,0.0,0,0


In [4]:
# Making the collated dataframe and verifying its datatypes
flatPriceCollated = pd.concat([flatPrice1990, flatPrice2000, flatPrice2012, flatPrice2015, flatPrice2017], join = "inner")
display(flatPriceCollated.sample(10))
flatPriceCollated.dtypes

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price
69199,2001-11,TAMPINES,EXECUTIVE,941,TAMPINES AVE 5,04 TO 06,146.0,Maisonette,1988,447000.0
275602,2008-11,JURONG WEST,5 ROOM,415,JURONG WEST ST 42,01 TO 03,136.0,Model A,1985,320000.0
260005,1999-06,JURONG WEST,5 ROOM,221,BOON LAY PL,04 TO 06,109.0,STANDARD,1979,215000.0
279108,2009-01,BUKIT BATOK,3 ROOM,227,BT BATOK CTRL,01 TO 03,67.0,New Generation,1985,235000.0
6026,2017-05,ANG MO KIO,3 ROOM,635,ANG MO KIO AVE 6,10 TO 12,68.0,New Generation,1980,290000.0
113348,1996-01,JURONG EAST,4 ROOM,329,JURONG EAST AVE 1,07 TO 09,104.0,MODEL A,1983,255000.0
33335,2013-10,PUNGGOL,5 ROOM,105C,EDGEFIELD PLAINS,10 TO 12,110.0,Improved,2003,545888.0
338725,2010-10,KALLANG/WHAMPOA,5 ROOM,56,GEYLANG BAHRU,01 TO 03,117.0,Standard,1974,475000.0
335360,2010-09,HOUGANG,5 ROOM,310,HOUGANG AVE 5,07 TO 09,121.0,Improved,1984,403000.0
298909,2009-09,PASIR RIS,4 ROOM,738,PASIR RIS DR 10,01 TO 03,104.0,Model A,1996,308000.0


month                   object
town                    object
flat_type               object
block                   object
street_name             object
storey_range            object
floor_area_sqm         float64
flat_model              object
lease_commence_date      int64
resale_price           float64
dtype: object

In [5]:
#Attempting to understand what "remaining_lease" refers to
display(flatPrice2015.sample(3))
display(flatPrice2017.sample(3))

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price
35105,2016-11,PUNGGOL,4 ROOM,615C,EDGEFIELD PLAINS,07 TO 09,92.0,Model A,2012,95,420000.0
1332,2015-02,BEDOK,3 ROOM,532,BEDOK NTH ST 3,13 TO 15,68.0,New Generation,1980,63,305000.0
12927,2015-09,TOA PAYOH,4 ROOM,138,POTONG PASIR AVE 3,01 TO 03,111.0,Model A,1984,67,550000.0


Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price
97637,2021-05,GEYLANG,3 ROOM,99,OLD AIRPORT RD,04 TO 06,56.0,Standard,1969,46 years 09 months,275000.0
79938,2020-09,TAMPINES,5 ROOM,726,TAMPINES ST 71,10 TO 12,124.0,Improved,1997,76 years,538000.0
38593,2018-10,WOODLANDS,5 ROOM,897A,WOODLANDS DR 50,10 TO 12,118.0,Improved,1997,77 years 11 months,400000.0


### **<u>Subsection 1.1: Generalising `remaining_lease` for the entire dataset</u>**
From the above, it seems like "remaining_lease" was a statistic which was introduced starting from the 2015-2016 csv. However, one may also compute an estimation for this variable via taking:

$$ \text{lease\_commence\_date} + 99 \text{ years} - \text{month (or pretty much, the date when the transaction was made)}$$


However, a shortcoming of this method of manual computation is the lack of precision; namely that we may only evaluate the remaining lease to the nearest year. Furthermore, it seems as if the method in which the data was stored differs between the `2015-2016` csv and the `2017 onwards` csvs, with the former storing the data to the nearest year, and the latter storing the data to the nearest month.  

These inconsistencies prove to be a problem, but oh well, what can we do about it. 

As such, noting that most of the data could only compute the `remaining_lease` to the nearest year ($\pm$ 1 year for the data before 2015), we will proceed by using the aforementioned; crude method. Sure, it does not provide a high degree of accuracy, but at least there is consistency if we apply this formula for all rows. 

In [6]:
# Adding columns representing the columns for the month and year
flatPriceCollated["month_no"] = (flatPriceCollated.month.str[5:]).astype(int)
flatPriceCollated["year_no"] = (flatPriceCollated.month.str[:4]).astype(int)

display(flatPriceCollated.sample(10))

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,month_no,year_no
357931,2011-07,YISHUN,4 ROOM,314,YISHUN RING RD,07 TO 09,108.0,Model A,1995,400000.0,7,2011
100890,2021-06,TAMPINES,5 ROOM,518B,TAMPINES CTRL 7,04 TO 06,114.0,DBSS,2008,723000.0,6,2021
63015,1994-02,YISHUN,4 ROOM,323,YISHUN CTRL,10 TO 12,108.0,MODEL A,1988,175000.0,2,1994
299584,2009-09,TAMPINES,3 ROOM,849,TAMPINES ST 83,01 TO 03,70.0,Simplified,1987,230000.0,9,2009
52735,2019-07,BUKIT BATOK,4 ROOM,536,BT BATOK ST 52,07 TO 09,107.0,Model A,1986,352000.0,7,2019
204067,1998-07,YISHUN,3 ROOM,287,YISHUN AVE 6,01 TO 03,74.0,MODEL A,1985,143000.0,7,1998
247857,1999-04,HOUGANG,3 ROOM,528,HOUGANG AVE 6,04 TO 06,74.0,MODEL A,1987,149000.0,4,1999
191980,2005-10,TAMPINES,4 ROOM,488B,TAMPINES ST 45,04 TO 06,103.0,Model A,1989,238000.0,10,2005
182192,2005-06,TOA PAYOH,5 ROOM,45,LOR 5 TOA PAYOH,04 TO 06,123.0,Improved,1993,360000.0,6,2005
3835,2015-04,ANG MO KIO,5 ROOM,354,ANG MO KIO ST 32,10 TO 12,110.0,Improved,2001,695000.0,4,2015


In [7]:
# This is the aforementioned calculation for the remaining lease of a house with respect to when the house was resold. We 
# first implement the creation of the column "remaining_lease_manual"
def calcYearsLeft(row):
  row.remaining_lease_manual = row.lease_commence_date + 99 - (row.year_no + row.month_no/12)
  return row

flatPriceCollated["remaining_lease_manual"] = 0
flatPriceCollated = flatPriceCollated.apply(calcYearsLeft, axis='columns')
display(flatPriceCollated.sample(10))
flatPriceCollated.describe()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual
198523,1998-06,PASIR RIS,4 ROOM,213,PASIR RIS ST 21,04 TO 06,106.0,MODEL A,1993,285000.0,6,1998,93.5
29732,2013-07,WOODLANDS,4 ROOM,627,WOODLANDS AVE 6,01 TO 03,90.0,Model A2,1998,385000.0,7,2013,83.416667
309954,2010-01,BUKIT PANJANG,4 ROOM,174,LOMPANG RD,04 TO 06,101.0,Model A,1997,353000.0,1,2010,85.916667
154502,2004-07,TAMPINES,4 ROOM,811,TAMPINES AVE 4,10 TO 12,91.0,New Generation,1985,225000.0,7,2004,79.416667
58240,2001-08,KALLANG/WHAMPOA,3 ROOM,463,CRAWFORD LANE,13 TO 15,60.0,Improved,1982,163000.0,8,2001,79.333333
189801,1998-04,CLEMENTI,4 ROOM,348,CLEMENTI AVE 5,10 TO 12,92.0,NEW GENERATION,1979,260000.0,4,1998,79.666667
51948,2019-06,SERANGOON,3 ROOM,1,LOR LEW LIAN,10 TO 12,64.0,Improved,1978,301000.0,6,2019,57.5
129842,2003-10,BUKIT BATOK,5 ROOM,110,BT BATOK WEST AVE 6,22 TO 24,132.0,Model A,1985,365000.0,10,2003,80.166667
31427,1992-06,JURONG EAST,5 ROOM,309,JURONG EAST ST 32,10 TO 12,131.0,MODEL A,1983,127000.0,6,1992,89.5
278158,1999-10,BUKIT PANJANG,4 ROOM,432,BT PANJANG RING RD,01 TO 03,104.0,MODEL A,1989,225000.0,10,1999,88.166667


Unnamed: 0,floor_area_sqm,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual
count,864616.0,864616.0,864616.0,864616.0,864616.0,864616.0
mean,95.678732,1987.590754,302883.5,6.593214,2004.966207,81.075112
std,25.966473,9.939253,155576.2,3.410459,8.467052,10.146425
min,28.0,1966.0,5000.0,1.0,1990.0,43.166667
25%,73.0,1980.0,185000.0,4.0,1998.0,74.5
50%,93.0,1986.0,282000.0,7.0,2004.0,82.75
75%,113.0,1995.0,395000.0,10.0,2011.0,89.333333
max,307.0,2019.0,1360000.0,12.0,2022.0,100.583333


In [8]:
# verifying the validity of my proposed computation

# adding month_no and year_no columns for the 2015, 2017 dataframes
flatPrice2015["month_no"] = (flatPrice2015.month.str[5:]).astype(int)
flatPrice2015["year_no"] = (flatPrice2015.month.str[:4]).astype(int)

flatPrice2017["month_no"] = (flatPrice2017.month.str[5:]).astype(int)
flatPrice2017["year_no"] = (flatPrice2017.month.str[:4]).astype(int)

# Finding the difference between our calculated value and the provided value
flatPrice2015["remaining_lease_manual"] = 0
flatPrice2015New = flatPrice2015.apply(calcYearsLeft, axis='columns')
flatPrice2015New["remaining_lease_difference"] = abs(flatPrice2015New.remaining_lease_manual - flatPrice2015New.remaining_lease)
display(flatPrice2015New.describe())

def calcYearsDiff2017(row):
  #       72 years 06 months	
  #index: 0123456789
  years = int(str(row.remaining_lease)[:2])
  months = int(str(row.remaining_lease)[9:11]) if (len(row.remaining_lease) > 12) else 0
  row.remaining_lease_difference = abs(row.remaining_lease_manual - ( years + months/12 ) )
  return row

flatPrice2017["remaining_lease_manual"] = 0
flatPrice2017New = flatPrice2017.apply(calcYearsLeft, axis='columns')
flatPrice2017New["remaining_lease_difference"] = 0
flatPrice2017New = flatPrice2017New.apply(calcYearsDiff2017, axis='columns')
display(flatPrice2017New.describe())

Unnamed: 0,floor_area_sqm,lease_commence_date,remaining_lease,resale_price,month_no,year_no,remaining_lease_manual,remaining_lease_difference
count,37153.0,37153.0,37153.0,37153.0,37153.0,37153.0,37153.0,37153.0
mean,97.020386,1990.920195,73.913116,436862.8,6.63548,2015.521438,73.8458,0.347312
std,24.19836,10.86233,10.885456,135805.2,3.307996,0.499547,10.84662,0.235746
min,31.0,1966.0,48.0,190000.0,1.0,2015.0,48.083333,0.0
25%,74.0,1984.0,66.0,340000.0,4.0,2015.0,66.25,0.166667
50%,96.0,1989.0,72.0,408000.0,7.0,2016.0,71.75,0.333333
75%,111.0,2000.0,83.0,495000.0,9.0,2016.0,82.75,0.5
max,280.0,2013.0,97.0,1150000.0,12.0,2016.0,96.666667,1.166667


Unnamed: 0,floor_area_sqm,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual,remaining_lease_difference
count,118413.0,118413.0,118413.0,118413.0,118413.0,118413.0,118413.0
mean,97.837401,1995.101568,460645.8,6.736912,2019.203702,74.336457,0.5653616
std,24.120851,13.444761,159298.3,3.407568,1.464333,13.332818,0.2899117
min,31.0,1966.0,140000.0,1.0,2017.0,43.166667,7.105427e-14
25%,82.0,1985.0,345000.0,4.0,2018.0,63.666667,0.3333333
50%,94.0,1996.0,430000.0,7.0,2019.0,74.416667,0.5833333
75%,113.0,2005.0,540000.0,10.0,2021.0,84.833333,0.8333333
max,249.0,2019.0,1360000.0,12.0,2022.0,97.333333,1.583333


As we may see from the results, the mean of `remaining_lease_difference` hovers around 0.3 to 0.6, which is expected. This shows that our method of computing the `remaining_lease` has a very low deviation from the ones provided in the government dataset. As such, this variable is valid to use in our analysis of the dataset. 

### **<u>Subsection 1.2: Creating a variable `resale_price_adjusted` to adjust for inflation</u>**

One notes that due to the substantial inflation which occured in the last 30 years, data regarding the resale price is likely inaccurate due to the lack of adjustment for inflation. As such, we will make a column `resale_price_adjusted` which adjusts the `resale_price` to the equivalent price today. 
To do this, we will refer to the `CPI2019Base` dataframe. 

In [9]:
display(CPI2019Base)


Unnamed: 0,month,level_1,value
0,1961-01,All Items,24.542
1,1961-01,All Items Less Imputed Rentals On Owner-occupi...,na
2,1961-01,All Items Less Accommodation,na
3,1961-02,All Items,24.565
4,1961-02,All Items Less Imputed Rentals On Owner-occupi...,na
...,...,...,...
2188,2021-10,All Items Less Imputed Rentals On Owner-occupi...,102.927
2189,2021-10,All Items Less Accommodation,103.31
2190,2021-11,All Items,103.959
2191,2021-11,All Items Less Imputed Rentals On Owner-occupi...,104.079


The `CPI`, or Consumer Price Index, "is a measure that examines the weighted average of prices of a basket of consumer goods and services, such as transportation, food, and medical care." (Source: Investopedia). Notably, the `value` column in the dataframe may be caluclated as follows: 

$$
\frac{\text{Total Cost of All Products in an arbituary year}}{\text{Total Cost of All Products in 2019}}
$$

For the sake of getting using a metric which represents the overall inflation of Singapore, we choose to use `level_1 = "All Items"`



In [10]:
mask = CPI2019Base.level_1.str.endswith("All Items") == True

CPI2019AllItems = CPI2019Base[mask]
CPI2019AllItems["month_no"] = (CPI2019AllItems.month.str[5:]).astype(int)
CPI2019AllItems["year_no"] = (CPI2019AllItems.month.str[:4]).astype(int)
CPI2019AllItems = CPI2019AllItems.reset_index()

CPI2019AllItems.isnull().sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CPI2019AllItems["month_no"] = (CPI2019AllItems.month.str[5:]).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CPI2019AllItems["year_no"] = (CPI2019AllItems.month.str[:4]).astype(int)


index       0
month       0
level_1     0
value       0
month_no    0
year_no     0
dtype: int64

Perhaps out of coincidence, when `level_1 = "All Items"`, the CPI metric exists for all months. Coincidence? Maybe. But Ill take it. Lets make a column `multiplier` which stores a metric representing how much we need to scale up the `resale_price` to accomodate for inflation in respect to today. 

In [11]:
currCPI = CPI2019AllItems.value.iloc[-1]

CPI2019AllItems.value = CPI2019AllItems.value.astype("float")

CPI2019AllItems["multiplier"] = float(currCPI) / CPI2019AllItems.value

CPI2019AllItems


Unnamed: 0,index,month,level_1,value,month_no,year_no,multiplier
0,0,1961-01,All Items,24.542,1,1961,4.235963
1,3,1961-02,All Items,24.565,2,1961,4.231997
2,6,1961-03,All Items,24.585,3,1961,4.228554
3,9,1961-04,All Items,24.187,4,1961,4.298135
4,12,1961-05,All Items,24.053,5,1961,4.322080
...,...,...,...,...,...,...,...
726,2178,2021-07,All Items,101.672,7,2021,1.022494
727,2181,2021-08,All Items,102.231,8,2021,1.016903
728,2184,2021-09,All Items,102.657,9,2021,1.012683
729,2187,2021-10,All Items,102.950,10,2021,1.009801


In [12]:
CPI2019AllItems = CPI2019AllItems[["month","multiplier"]]
flatPriceCollated = pd.merge(flatPriceCollated,CPI2019AllItems,on="month", how="outer")
flatPriceCollated.dropna(inplace= True)
flatPriceCollated["resale_price_adjusted"] = flatPriceCollated.resale_price * flatPriceCollated.multiplier
flatPriceCollated

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual,multiplier,resale_price_adjusted
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977.0,9000.0,1.0,1990.0,85.916667,1.677678,15099.102734
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,IMPROVED,1977.0,6000.0,1.0,1990.0,85.916667,1.677678,10066.068489
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977.0,8000.0,1.0,1990.0,85.916667,1.677678,13421.424652
3,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,07 TO 09,31.0,IMPROVED,1977.0,6000.0,1.0,1990.0,85.916667,1.677678,10066.068489
4,1990-01,ANG MO KIO,3 ROOM,216,ANG MO KIO AVE 1,04 TO 06,73.0,NEW GENERATION,1976.0,47200.0,1.0,1990.0,84.916667,1.677678,79186.405448
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
860467,2021-11,YISHUN,EXECUTIVE,792,YISHUN RING RD,10 TO 12,144.0,Apartment,1987.0,690000.0,11.0,2021.0,64.083333,1.000000,690000.000000
860468,2021-11,YISHUN,EXECUTIVE,611,YISHUN ST 61,10 TO 12,142.0,Apartment,1987.0,680000.0,11.0,2021.0,64.083333,1.000000,680000.000000
860469,2021-11,YISHUN,EXECUTIVE,614,YISHUN ST 61,01 TO 03,142.0,Apartment,1987.0,632000.0,11.0,2021.0,64.083333,1.000000,632000.000000
860470,2021-11,YISHUN,EXECUTIVE,837,YISHUN ST 81,01 TO 03,145.0,Maisonette,1988.0,755000.0,11.0,2021.0,65.083333,1.000000,755000.000000


### **<u>Subsection 1.3: Converting the Locations of the HDB blocks into coordinates</u>**

In this assignment, we also plan to explore the data based on its geographical attributes (ie. latitude and longitude). As such, we need to retrieve the approximate coordinates of each HDB block based on their address by making use of OneMapAPI.

In [56]:
def getcoordinates(address):
    link = 'https://geocode.xyz/?locate='+address+'&region=SG&geoit=json&auth=100583375187098e15877067x52396'
    req = requests.get(link)
    result = eval(req.text)
    return result['latt'], result['longt']


fPC = flatPriceCollated.copy()
fPC = fPC.sample(5).reset_index()
latitudes, longitudes = [], []
fPC['address'] = fPC['block'] + ' ' + fPC['street_name']
for i in range(len(fPC)):
    latitude, longitude = getcoordinates(fPC.iloc[i]['address'])
    latitudes.append(latitude)
    longitudes.append(longitude)
fPC['Latitude'], fPC['Longitude'] = pd.Series(latitudes), pd.Series(longitudes)
fPC

Unnamed: 0,index,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,month_no,year_no,remaining_lease_manual,multiplier,resale_price_adjusted,address,Latitude,Longitude
0,432777,2004-03,TAMPINES,5 ROOM,898A,TAMPINES ST 81,13 TO 15,121.0,Improved,1988.0,310000.0,3.0,2004.0,82.75,1.366964,423758.924932,898A TAMPINES ST 81,1.36413,103.94057
1,749745,2017-03,SEMBAWANG,EXECUTIVE,360C,ADMIRALTY DR,07 TO 09,131.0,Premium Apartment,2001.0,468000.0,3.0,2017.0,82.75,1.049413,491125.050472,360C ADMIRALTY DR,1.44867,103.81469
2,578489,2009-06,SEMBAWANG,4 ROOM,318,SEMBAWANG VISTA,01 TO 03,100.0,Model A,1999.0,312000.0,6.0,2009.0,88.5,1.233452,384836.894747,318 SEMBAWANG VISTA,1.44623,103.81999
3,223714,1998-11,TOA PAYOH,5 ROOM,12,JOO SENG RD,10 TO 12,132.0,MODEL A,1984.0,395000.0,11.0,1998.0,84.083333,1.418596,560345.578101,12 JOO SENG RD,1.33563,103.88144
4,330991,2001-04,ANG MO KIO,4 ROOM,627,ANG MO KIO AVE 9,04 TO 06,112.0,Model A,1993.0,288000.0,4.0,2001.0,90.666667,1.374919,395976.670061,627 ANG MO KIO AVE 9,1.38357,103.83826


### **<u>Subsection 1.4: The actual data exploration</u>**