# Data Pre-Processing Code for final_hdb_resale_price.csv

This is a separate notebook containing some of the code for data pre-processing of the final_hdb_resale_price.csv file.
The code has been separated from the main notebook to make it easier to read and understand, and allow the main notebook to run more efficiently.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# import folium 
import requests
import json
from geopy.distance import geodesic

hdb_resale_prices = pd.read_csv("resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv")

### Merging Latitude and Longitude Data into Resale Data

In [6]:

df_coordinates = pd.read_csv('lat_long.csv')
df_coordinates = df_coordinates.rename(columns={'Latitude':0, 'Longitude':1})
df_coordinates.rename(columns={"Unnamed: 0": 'index'}, inplace=True)
df_coordinates.head()

Unnamed: 0,index,month,town,flat_type,storey_range,floor_area_sqm,flat_model,resale_price,remaining_lease_months,address,0,1
0,1,2017-01,ANG MO KIO,3 ROOM,01 TO 03,67.0,New Generation,250000.0,727.0,108 ANG MO KIO AVE 4,1.370943,103.837975
1,2,2017-01,ANG MO KIO,3 ROOM,01 TO 03,67.0,New Generation,262000.0,749.0,602 ANG MO KIO AVE 5,1.380709,103.835368
2,3,2017-01,ANG MO KIO,3 ROOM,04 TO 06,68.0,New Generation,265000.0,744.0,465 ANG MO KIO AVE 10,1.366201,103.857201
3,4,2017-01,ANG MO KIO,3 ROOM,01 TO 03,67.0,New Generation,265000.0,749.0,601 ANG MO KIO AVE 5,1.381041,103.835132
4,5,2017-01,ANG MO KIO,3 ROOM,01 TO 03,68.0,New Generation,275000.0,756.0,150 ANG MO KIO AVE 5,1.376807,103.842018


In [7]:
df_coordinates.set_index(['index'])
df_combined = hdb_resale_prices.merge(df_coordinates, how='left', on=["month", "flat_type", "storey_range", "floor_area_sqm", "flat_model", "resale_price", "town"])
df_combined = df_combined.drop(columns="index")
df_combined = df_combined.rename(columns={0:'Latitude', 1:'Longitude'})
df_combined.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price,remaining_lease_months,address,Latitude,Longitude
0,2017-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61 years 04 months,232000.0,,,,
1,2017-01,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,01 TO 03,67.0,New Generation,1978,60 years 07 months,250000.0,727.0,108 ANG MO KIO AVE 4,1.370943,103.837975
2,2017-01,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,262000.0,749.0,602 ANG MO KIO AVE 5,1.380709,103.835368
3,2017-01,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,04 TO 06,68.0,New Generation,1980,62 years 01 month,265000.0,744.0,465 ANG MO KIO AVE 10,1.366201,103.857201
4,2017-01,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,265000.0,749.0,601 ANG MO KIO AVE 5,1.381041,103.835132


### Incorporating Inflation Rate into Resale Prices

In [8]:
# here is where we should store the code for processing and creating new columns for the resale data
valid_rows = [10,11,14]
cpi_df = pd.read_csv("cpi-2012-to-2022.csv", skiprows=lambda x: x not in valid_rows, index_col=0)

cpi_df = cpi_df.transpose()
cpi_df

Unnamed: 0,All Items,Housing & Utilities
2022,108.37,106.406
2021,102.119,101.1
2020,99.818,99.714
2019,100.0,100.0
2018,99.438,101.003
2017,99.004,102.372
2016,98.436,105.034
2015,98.963,109.499
2014,99.483,113.517
2013,98.474,113.388


In [9]:
# convert datetime index to year
cpi_df.index = pd.DatetimeIndex(cpi_df.index).year

In [10]:
#add year column to dataframe
df_combined['year'] = pd.DatetimeIndex(df_combined['month']).year

In [11]:
# add new column to hdb_resale_prices dataframe to store consumer price index for housing and utilities of corresponding year
latest_year = 2022
df_combined['cpi_multiplier'] = df_combined["year"].apply(lambda x: cpi_df["Housing & Utilities"].loc[min(x, latest_year)] / 100)

In [12]:
# add new column to hdb_resale_prices dataframe to store adjusted resale price
df_combined['cpi_adjusted_price'] = df_combined["resale_price"] * df_combined["cpi_multiplier"] 

In [13]:
df_combined

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price,remaining_lease_months,address,Latitude,Longitude,year,cpi_multiplier,cpi_adjusted_price
0,2017-01,ANG MO KIO,2 ROOM,406,ANG MO KIO AVE 10,10 TO 12,44.0,Improved,1979,61 years 04 months,232000.0,,,,,2017,1.02372,237503.04
1,2017-01,ANG MO KIO,3 ROOM,108,ANG MO KIO AVE 4,01 TO 03,67.0,New Generation,1978,60 years 07 months,250000.0,727.0,108 ANG MO KIO AVE 4,1.370943,103.837975,2017,1.02372,255930.00
2,2017-01,ANG MO KIO,3 ROOM,602,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,262000.0,749.0,602 ANG MO KIO AVE 5,1.380709,103.835368,2017,1.02372,268214.64
3,2017-01,ANG MO KIO,3 ROOM,465,ANG MO KIO AVE 10,04 TO 06,68.0,New Generation,1980,62 years 01 month,265000.0,744.0,465 ANG MO KIO AVE 10,1.366201,103.857201,2017,1.02372,271285.80
4,2017-01,ANG MO KIO,3 ROOM,601,ANG MO KIO AVE 5,01 TO 03,67.0,New Generation,1980,62 years 05 months,265000.0,749.0,601 ANG MO KIO AVE 5,1.381041,103.835132,2017,1.02372,271285.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150108,2023-01,YISHUN,5 ROOM,511B,YISHUN ST 51,07 TO 09,113.0,Improved,2017,93 years 07 months,680000.0,1123.0,511B YISHUN ST 51,1.415600,103.842414,2023,1.06406,723560.80
150109,2023-01,YISHUN,5 ROOM,760,YISHUN ST 72,04 TO 06,122.0,Improved,1987,63 years 03 months,548000.0,759.0,760 YISHUN ST 72,1.425795,103.832725,2023,1.06406,583104.88
150110,2023-01,YISHUN,EXECUTIVE,344,YISHUN AVE 11,01 TO 03,145.0,Maisonette,1988,64 years 07 months,755000.0,775.0,344 YISHUN AVE 11,1.429031,103.844304,2023,1.06406,803365.30
150111,2023-01,YISHUN,EXECUTIVE,387,YISHUN RING RD,10 TO 12,145.0,Apartment,1988,64 years 06 months,770000.0,774.0,387 YISHUN RING RD,1.429547,103.847279,2023,1.06406,819326.20


### No. of BTO per year

In [None]:
# Create the dictionary
dictionary ={2017:14464, 2018 : 17556, 2019 : 12114, 2020 : 7314, 2021: 13756,2022:20064,2023: 20000}

# Add a new column named 'Price'
df_combined['no.of bto'] = df_combined['year'].map(dictionary)

# Print the DataFrame"
print(df_combined)

### Demand for resale

In [None]:
resale_application = {2017:20894,2018:22005,2019:24772,2020:26436,2021:26436,2022:30370}

In [None]:
df_combined['resale application'] = df_combined['year'].map(resale_application)

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
sns.countplot(x=df_combined['year'], data=df_combined)
abs_values = df_combined['year'].value_counts(ascending=False).values
ax.bar_label(container=ax.containers[0], labels=abs_values)
ax.set_title('Number of Resale Applications Per Year')
plt.show()

In [None]:
(df_combined['year']==2022).sum()

In [None]:
df_combined.loc[df_combined['year'] == 2017]['resale application']/(df_combined['year']==2017).sum()

In [None]:
df_combined.loc[df_combined['year'] == 2019]['resale application']/(df_combined['year']==2019).sum()

In [None]:
df_combined.loc[df_combined['year'] == 2020]['resale application']/(df_combined['year']==2020).sum()

In [None]:
df_combined.loc[df_combined['year'] == 2021]['resale application']/(df_combined['year']==2021).sum()

In [None]:
df_combined.loc[df_combined['year'] == 2022]['resale application']/(df_combined['year']==2022).sum()

In [None]:
demand_ratio = {2017:1.02,2018:1.02,2019:1.12,2020:1.13,2021:0.91,2022:1.14}

In [None]:
# deriving demand ratio of resale flats
df_combined['demand ratio'] = df_combined['year'].map(demand_ratio)

In [None]:
# hdb_resale_prices.to_excel(r'resaleWithDemandAndBtoSupply.xlsx', index=False)

<h3>Remaining Lease Months</h3>

In [None]:
for i in range(len(df_combined)):
    if "months" in df_combined.loc[i,"remaining_lease"]:
        
        df_combined.loc[i,"remaining_lease_months"]= int(str(df_combined.loc[i,"remaining_lease"]).split(" ")[0])*12 +int(str(df_combined.loc[i,"remaining_lease"]).split(" ")[2])
    else:
        df_combined.loc[i,"remaining_lease_months"]= int(str(df_combined.loc[i,"remaining_lease"]).split("years")[0])*12

In [None]:
df_combined= df_combined.drop(columns=["lease_commence_date","remaining_lease"])

<h3>Storey Range</h3>

In [None]:
for i in range(len(df_combined)):
    total = int(str(df_combined.loc[i,'storey_range']).split('TO')[1])+ int(str(df_combined.loc[i,'storey_range']).split('TO')[0])
    df_combined.loc[i,'storey_range'] = total / 2

<h3>Geo Coding</h3>

In [None]:
df_combined['address'] = df_combined['block'] + " " + df_combined['street_name']

In [None]:
df_combined= df_combined.drop(columns=["block","street_name"])

In [None]:
addressList = list(df_combined['address'])

<h3>Distance from nearest mrt</h3>

In [None]:
list_of_mrt = ["Jurong East MRT",
              "Bukit Batok MRT",
              "Bukit Gombak MRT",
              "Choa Chu Kang MRT",
              "Yew Tee MRT",
              "Kranji MRT",
              "Marsiling MRT",
              "Woodlands MRT",
              "Admiralty MRT",
              "Sembawang MRT",
              "Canberra MRT",
              "Admiralty MRT",
              "Sembawang MRT",
              "Canberra MRT",
              "Yishun MRT",
              "Khatib MRT",
              "Yio Chu Kang MRT",
              "Ang Mo Kio MRT",
              "Bishan MRT",
              "Braddell MRT",
              "Toa Payoh MRT",
              "Novena MRT",
              "Newton MRT",
              "Orchard MRT",
              "Somerset MRT",
              "Dhoby Ghaut MRT",
              "City Hall MRT",
              "Raffles Place MRT",
              "Marina Bay MRT",
              "Marina South Pier MRT",
               "Tuas Link MRT",
               "Tuas West Road MRT",
               "Tuas Crescent MRT",
               "Gul Circle MRT",
               "Joo Koon MRT",
               "Pioneer MRT",
               "Boon Lay MRT",
               "Lakeside MRT",
               "Chinese Garden MRT",
               "Clementi MRT",
               "Dover MRT",
               "Buona Vista MRT",
               "Commonwealth MRT",
               "Queenstown MRT",
               "Redhill MRT",
               "Tiong Bahru MRT",
               "Outram Park MRT",
               "Tanjong Pagar MRT",
               "Raffles Place MRT",
               "Bugis MRT",
               "Lavender MRT",
               "Kallang MRT",
               "Aljunied MRT",
               "Paya Lebar MRT",
               "Eunos MRT",
               "Kembangan MRT",
               "Bedok MRT",
               "Tanah Merah MRT",
               "Simei MRT",
               "Tampines MRT",
               "Pasir Ris MRT",
               "Expo MRT",
               "Changi Airport MRT",
               "HarbourFront MRT",
               "Chinatown MRT",
               "Clarke Quay MRT",
               "Little India MRT",
               "Farrer Park MRT",
               "Boon Keng MRT",
               "Potong Pasir MRT",
               "Woodleigh MRT",
               "Serangoon MRT",
               "Kovan MRT",
               "Hougang MRT",
               "Buangkok MRT",
               "Sengkang MRT",
               "Punggol MRT",
               "Punggol Coast MRT",
               "Haw Par Villa MRT",
               "Holland Village MRT",
               "Kent Ridge MRT",
               "one-north MRT",
               "Farrer Road MRT",
               "Botanic Gardens MRT",
               "Caldecott MRT",
               "Marymount MRT",
               "Bishan MRT",
               "Lorong Chuan MRT",
               "Bartley MRT",
               "Tai Seng MRT",
               "MacPherson MRT",
               "Dakota MRT",
               "Mountbatten MRT",
               "Stadium MRT",
               "Nicoll Highway MRT",
               "Promenade MRT",
               "Esplanade MRT",
               "Bras Basah MRT",
               "Pasir Panjang MRT",
               "Labrador Park MRT",
               "Telok Blangah MRT",
               "Bayfront MRT",
               "Bukit Panjang MRT",
               "Cashew MRT",
               "Hillview MRT",
               "Beauty World MRT",
               "King Albert Park MRT",
               "Sixth Avenue MRT",
               "Tan Kah Kee MRT",
               "Stevens MRT",
               "Rochor MRT",
               "Downtown MRT",
               "Telok Ayer MRT",
               "Fort Canning MRT",
               "Bencoolen MRT",
               "Jalan Besar MRT",
               "Bendemeer MRT",
               "Geylang Bahru MRT",
               "Mattar MRT",
               "Ubi MRT",
               "Kaki Bukit MRT",
               "Bedok North MRT",
               "Bedok Reservoir MRT",
               "Tampines West MRT",
               "Tampines East MRT",
               "Upper Changi MRT",
               "Woodlands North MRT",
               "Woodlands South MRT",
               "Springleaf MRT",
               "Lentor MRT",
               "Mayflower MRT",
               "Bright Hill MRT",
               "Upper Thomson MRT",
               "Napier MRT",
               "Orchard Boulevard MRT",
               "Great World MRT",
               "Havelock MRT",
               "Maxwell MRT",
               "Shenton Way MRT",
               "Gardens by the Bay MRT"
               
              ]


In [None]:

mrt_lat = []
mrt_long = []

for i in range(0, len(list_of_mrt)):
    query_address = list_of_mrt[i]
    query_string = 'https://developers.onemap.sg/commonapi/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y'
    resp = requests.get(query_string)

    data_mrt=json.loads(resp.content)
    
    if data_mrt['found'] != 0:
        mrt_lat.append(data_mrt["results"][0]["LATITUDE"])
        mrt_long.append(data_mrt["results"][0]["LONGITUDE"])

        print (str(query_address)+",Lat: "+data_mrt['results'][0]['LATITUDE'] +" Long: "+data_mrt['results'][0]['LONGITUDE'])

    else:
        mrt_lat.append('NotFound')
        mrt_lat.append('NotFound')
        print ("No Results")

In [None]:
mrt_location = pd.DataFrame({
    'MRT': list_of_mrt,
    'latitude': mrt_lat,
    'longitude': mrt_long
})

mrt_location

In [None]:
list_of_mrt_coordinates = []
list_of_lat = df_coordinates[0]
list_of_long = df_coordinates[1]
mrt_lat = mrt_location['latitude']
mrt_long = mrt_location['longitude']


list_of_coordinates = []
for lat, long in zip(list_of_lat, list_of_long):
    list_of_coordinates.append((lat,long))
for lat, long in zip(mrt_lat, mrt_long):
    list_of_mrt_coordinates.append((lat, long))

In [None]:
# nearest mrt 
# pip install geopy
from geopy.distance import geodesic

list_of_dist_mrt = []
min_dist_mrt = []

for origin in list_of_coordinates:
    for destination in range(0, len(list_of_mrt_coordinates)):
        list_of_dist_mrt.append(geodesic(origin,list_of_mrt_coordinates[destination]).meters)
    shortest = (min(list_of_dist_mrt))
    min_dist_mrt.append(shortest)
    list_of_dist_mrt.clear()

In [None]:
df_combined["Distance to nearest MRT"] = min_dist_mrt

<h3>Distance to CBD</h3>

In [None]:
# Central Business District (CBD) The CBD is Singapore's business and financial district, and home to leading international businesses and financial institutions. It spans from Raffles Place along Shenton Way / Robinson Road / Cecil Street to the Tanjong Pagar and Anson subzones.
list_of_cbd=[
    "Shenton Way",
    "Raffles Place",
    "Robinson Road",
    "Cecil Street",
    "Tanjong Pagar"
]

In [None]:
cbd_lat = []
cbd_long = []

for i in range(0, len(list_of_cbd)):
    query_address = list_of_cbd[i]
    query_string = 'https://developers.onemap.sg/commonapi/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y'
    resp = requests.get(query_string)

    data_cbd=json.loads(resp.content)
    
    if data_cbd['found'] != 0:
        cbd_lat.append(data_cbd["results"][0]["LATITUDE"])
        cbd_long.append(data_cbd["results"][0]["LONGITUDE"])

        print (str(query_address)+",Lat: "+data_cbd['results'][0]['LATITUDE'] +" Long: "+data_cbd['results'][0]['LONGITUDE'])

    else:
        cbd_lat.append('NotFound')
        cbd_long.append('NotFound')
        print ("No Results")


In [None]:
cbd_location = pd.DataFrame({
    'latitude': cbd_lat,
    'longitude': cbd_long
})

In [None]:
# Lists of all the coordinates we will need to iterate through
list_of_cbd_coordinates = []
list_of_lat = df_coordinates[0]
list_of_long = df_coordinates[1]
cbd_lat = cbd_location['latitude']
cbd_long = cbd_location['longitude']
# Zipping the respective Lat and Long lists together as a list of tuples
list_of_coordinates = []
for lat, long in zip(list_of_lat, list_of_long):
    list_of_coordinates.append((lat,long))
for lat, long in zip(cbd_lat, cbd_long):
    list_of_cbd_coordinates.append((lat, long))

In [None]:


list_of_dist_cbd = []
min_dist_cbd = []

for origin in list_of_coordinates:
    for destination in range(0, len(list_of_cbd_coordinates)):
        list_of_dist_cbd.append(geodesic(origin,list_of_cbd_coordinates[destination]).meters)
    shortest = (min(list_of_dist_cbd))
    min_dist_cbd.append(shortest)
    list_of_cbd.clear()

In [None]:
df_combined["Distance to CBD"] = min_dist_cbd

In [None]:
list_of_malls = [

"100 AM",
"313@Somerset",
"Aperia",
"Balestier Hill Shopping Centre",
"Bugis Cube",
"Bugis Junction",
"Bugis+",
"Capitol Piazza",
"Cathay Cineleisure Orchard",
"Clarke Quay Central",
"The Centrepoint",
"City Square Mall",
"City Gate Mall",
"CityLink Mall",
"Duo",
"Far East Plaza",
"Funan",
"Great World City",
"HDB Hub",
"Holland Village Shopping Mall",
"ION Orchard",
"Junction 8",
"Knightsbridge",
"Liat Towers",
"Lucky Plaza",
"Marina Bay Sands",
"The Shoppes at Marina Bay Sands",
"Marina Bay Link Mall",
"Marina Square",
"Millenia Walk",
"Mustafa Shopping Centre",
"Ngee Ann City",
"Orchard Central",
"Orchard Gateway",
"Orchard Plaza",
"Midpoint Orchard",
"Palais Renaissance",
"People's Park Centre",
"People's Park Complex",
"Plaza Singapura",
"PoMo",
"Raffles City",
"Scotts Square",
"Shaw House and Centre",
"Sim Lim Square",
"Singapore Shopping Centre",
"The South Beach",
"Square 2",
"Sunshine Plaza",
"Suntec City",
"Tanglin Mall",
"Tanjong Pagar Centre",
"Tekka Centre",
"The Adelphi",
"The Paragon",
"Tiong Bahru Plaza",
"The Poiz",
"Thomson Plaza",
"United Square",
"Thomson V",
"Velocity@Novena Square",
"Wheelock Place",
"Wisma Atria",
"Zhongshan Mall",
"Bedok Mall",
"Century Square",
"Our Tampines Hub",
"Changi City Point",
"Downtown East",
"Djitsun Mall Bedok",
"Eastpoint Mall",
"Jewel Changi Airport",
"KINEX",
"Katong Shopping Centre",
"Katong Square",
"Kallang Wave Mall",
"Leisure Park Kallang",
"i12 Katong",
"Parkway Parade",
"Paya Lebar Square",
"Paya Lebar Quarter",
"Roxy Square",
"Singpost Centre",
"Tampines 1",
"Tampines Mall",
"White Sands",
"City Plaza",
"Elias Mall",
"Loyang Point",
"888 Plaza",
"Admiralty Place",
"AMK Hub",
"Canberra Plaza",
"Causeway Point",
"Woodlands Civic Centre",
"Broadway Plaza",
"Djitsun Mall",
"Jubilee Square",
"Junction 8",
"Junction Nine",
"Marsiling Mall",
"Northpoint City",
"Sembawang Shopping Centre",
"Sun Plaza",
"Vista Point",
"Wisteria Mall",
"Woodlands Mart",
"Woodlands North Plaza",
"Waterway Point",
"Compass One",
"Hougang Mall",
"Heartland Mall",
"NEX",
"Buangkok Square",
"Greenwich V",
"Hougang",
"Hougang Green Shopping Mall",
"Hougang Rivercourt",
"myVillage At Serangoon Garden",
"Northshore Plaza",
"Oasis Terraces",
"Punggol Plaza",
"Rivervale Mall",
"Rivervale Plaza",
"The Seletar Mall",
"Upper Serangoon Shopping Centre",
"Beauty World Centre",
"Beauty World Plaza",
"Bukit Panjang Plaza",
"Bukit Timah Plaza",
"Fajar Shopping Centre",
"Greenridge Shopping Centre",
"Hillion Mall",
"HillV2",
"Junction 10",
"Keat Hong Shopping Centre",
"Limbang Shopping Centre",
"Lot One",
"Rail Mall",
"Sunshine Place",
"Teck Whye Shopping Centre",
"West Mall",
"Yew Tee Point",
"Yew Tee Square",
"VivoCity",
"HarbourFront Centre",
"Alexandra Retail Centre",
"321 Clementi",
"The Clementi Mall",
"IMM",
"JCube",
"Jem",
"Westgate",
"Jurong Point",
"Pioneer Mall",
"The Star Vista",
"Alexandra Central",
"Anchorpoint",
"OD Mall",
"Boon Lay Shopping Centre",
"Grantral Mall",
"Fairprice Hub",
"Gek Poh Shopping Centre",
"Rochester Mall",
"Taman Jurong Shopping Centre",
"West Coast Plaza",
"Queensway Shopping Centre"]

In [None]:
mall_lat = []
mall_long = []

for i in range(0, len(list_of_malls)):
    query_address = list_of_malls[i]
    query_string = 'https://developers.onemap.sg/commonapi/search?searchVal='+str(query_address)+'&returnGeom=Y&getAddrDetails=Y'
    resp = requests.get(query_string)

    data_mall=json.loads(resp.content)
    
    if data_mall['found'] != 0:
        mall_lat.append(data_mall["results"][0]["LATITUDE"])
        mall_long.append(data_mall["results"][0]["LONGITUDE"])

        print (str(query_address)+",Lat: "+data_mall['results'][0]['LATITUDE'] +" Long: "+data_mall['results'][0]['LONGITUDE'])

    else:
        mall_lat.append('NotFound')
        mall_long.append('NotFound')
        print ("No Results ," + query_address)
# no clarke quay central, no OD mall, no city gate mall, holland village shopping mall, mustafa shopping centre, PoMo, ShawHouse and Centre

In [None]:
mall_location = pd.DataFrame({
    'mall': list_of_malls,
    'latitude': mall_lat,
    'longitude': mall_long
})

In [None]:
# Lists of all the coordinates we will need to iterate through
list_of_mall_coordinates = []
list_of_lat = df_coordinates[0]
list_of_long = df_coordinates[1]
mall_lat = mall_location['latitude']
mall_long = mall_location['longitude']
# Zipping the respective Lat and Long lists together as a list of tuples
list_of_coordinates = []
for lat, long in zip(list_of_lat, list_of_long):
    list_of_coordinates.append((lat,long))
for lat, long in zip(cbd_lat, cbd_long):
    list_of_mall_coordinates.append((lat, long))

In [None]:
list_of_dist_mall = []
min_dist_mall = []

for origin in list_of_coordinates:
    for destination in range(0, len(list_of_mall_coordinates)):
        list_of_dist_mall.append(geodesic(origin,list_of_mall_coordinates[destination]).meters)
    shortest = (min(list_of_dist_cbd))
    min_dist_mall.append(shortest)
    list_of_dist_mall.clear()

In [None]:
df_combined["Distance to nearest mall"] = min_dist_mall