In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%reload_ext autoreload

In [3]:
%%html
<style>
  table {margin-left: 0 !important;}
</style>

In [4]:
import sys
sys.path.append('../')

In [5]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import Pipeline

from utils.processing import *
from task2_recommender import * 


In [6]:
pd.set_option('display.max_columns', 100) 

## Load and Preprocess Data

In [7]:
def drop_cols_for_q2(df):
    drop_columns = [
        'address',
        'title',
        'listing_id',
        'property_name',
        'total_num_units',
        'available_unit_types',
        'property_details_url',
        'elevation',
        'tenure',
        'property_type',
        'floor_level',
        'furnishing',
        'built_year',
        'subzone', 
        'planning_area', 
        'region'
    ]
    return df.drop(columns=drop_columns)

In [8]:
df = pd.read_csv('../data/train.csv')
df = preprocess(df)

adfs = read_aux_csv('../data')
df = join_aux(df, adfs)

df_with_listing_info = df.copy()
df = drop_cols_for_q2(df)
df.head(1)

Unnamed: 0,num_beds,num_baths,size_sqft,lat,lng,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density
0,3.0,2.0,1184,1.2819,103.825948,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326


In [9]:
# Standardisation of all dimensions
pipe = Pipeline([('scaler', StandardScaler())])
X_transformed = pipe.fit_transform(df)

----------

## Top k Recommendations

In Task 2, we fit a K-Nearest Neighbour model to the dataset to retrieve listings that are similar to the input listing, and output them as recommendations. There are mainly 3 variations of the model in this notebook. Before we move on to explore the different variations of the model, feel free to refer to the task2_recommender.py file to look at the model class and the input parameters needed for better understanding. 

### 1. Algorithm and Metric Setting

In this setting, users have control over the type of algorithm and metric in the model. The following are some algorithms and metrics that users can specify in the model. Do note that the metric list is not exhaustive and more metrics can be found here: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.distance_metrics.html#sklearn.metrics.pairwise.distance_metrics 

**Algorithm**:
* ball_tree
* kd_tree
* brute
* auto (model will decide the most appropriate algorithm to use based on the values passed during the fit method)

**Metric**:
* euclidean
* cosine
* manhattan


Given the relatively small dataset (less than 20,000 records), we have used brute-force algorithm and opted for one of the more popular euclidean distance metric below. Feel free to change the algorithm and metric in the cell below. All available features in the above dataframe (df) are used in this model variation.

In [10]:
# Explore parameters here
row_idx = 0 # row index of listing user wants to find similar recommendations to
num_recommendations = 5 # number of recommendations user wants to receive
algorithm = 'brute'
metric = 'euclidean'

In [11]:
model1 = knn(df_with_listing_info, X_transformed, pipe)
reco1 = model1.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, algorithm=algorithm, metric=metric)
reco1

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,property_type_public,tenure_low_year,2003.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/membina-118-de...,1.2819,103.825948,0,tiong bahru station,bukit merah,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326,c
4094,288001,hdb flat for sale in 26d jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2009.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.282239,103.825114,0,tiong bahru station,bukit merah,932400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.487173,0,0,0,0,1,0,0,0,0.248094,3.406006,0,0,1,1,0,2.380521,1,0,0,0,0,0.520372,0.3508,15120,43101.482326,c
19103,970335,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,1013200.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
3500,264414,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,974400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
15107,786786,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,990,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,982700.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c


In [12]:
# We can try another example with a different listing (e.g. row_idx = 1000). 
# Since we only need to find the nearest neighbours with existing model, we can set refit_model = False.
row_idx = 1000
model1.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, refit_model=False, algorithm=algorithm, metric=metric)

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
18499,943201,4 bed condo for sale in the lilium,31 how sun road,the lilium,property_type_private,tenure_high_year,2021.0,4.0,4.0,1292,,unspecified,"studio, 2, 3, 4, 5 br",80.0,https://www.99.co/singapore/condos-apartments/...,1.345552,103.881514,0,upper paya lebar,serangoon,2730000.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.370288,1,0,0,0,0,0,0,0,0.61717,3.166211,0,0,1,0,1,1.205022,1,0,0,0,0,0.949835,0.8986,16920,18829.290007,ne
1515,170990,4 bed condo for sale in the lilium,29 how sun road,the lilium,property_type_private,tenure_high_year,2021.0,4.0,4.0,1291,,unspecified,"studio, 2, 3, 4, 5 br",80.0,https://www.99.co/singapore/condos-apartments/...,1.345552,103.881514,0,upper paya lebar,serangoon,2791200.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.370288,1,0,0,0,0,0,0,0,0.61717,3.166211,0,0,1,0,1,1.205022,1,0,0,0,0,0.949835,0.8986,16920,18829.290007,ne
3495,264218,4 bed condo for sale in the lilium,31 how sun road,the lilium,property_type_private,tenure_high_year,2021.0,4.0,4.0,1291,,unspecified,"studio, 2, 3, 4, 5 br",80.0,https://www.99.co/singapore/condos-apartments/...,1.345552,103.881514,0,upper paya lebar,serangoon,2812200.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.370288,1,0,0,0,0,0,0,0,0.61717,3.166211,0,0,1,0,1,1.205022,1,0,0,0,0,0.949835,0.8986,16920,18829.290007,ne
19022,967076,4 bed condo for sale in the lilium,31 how sun road,the lilium,property_type_private,tenure_high_year,2021.0,4.0,4.0,1291,,unspecified,"studio, 2, 3, 4, 5 br",80.0,https://www.99.co/singapore/condos-apartments/...,1.345552,103.881514,0,upper paya lebar,serangoon,2602200.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.370288,1,0,0,0,0,0,0,0,0.61717,3.166211,0,0,1,0,1,1.205022,1,0,0,0,0,0.949835,0.8986,16920,18829.290007,ne
3465,262948,4 bed condo for sale in the lilium,29 how sun road,the lilium,property_type_private,tenure_high_year,2021.0,4.0,4.0,1915,,unspecified,"studio, 2, 3, 4, 5 br",80.0,https://www.99.co/singapore/condos-apartments/...,1.345552,103.881514,0,upper paya lebar,serangoon,3897900.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.370288,1,0,0,0,0,0,0,0,0.61717,3.166211,0,0,1,0,1,1.205022,1,0,0,0,0,0.949835,0.8986,16920,18829.290007,ne


### 2. User-specified Features Setting

Each user has their own definitions as to what properties are considered similar. Some users consider prices or location as the topmost priority when looking for similar properties, others may place more importance on floor level or type of housing (private vs public). In the previous variation, all features are used during model fitting. In this variation, user can specify features in user_preferences below to receive customised recommendations based on features that they consider as most important when deciding if a property is similar to another. The KNN model is fitted with only these input features.

In [13]:
print('These are the available features that user can specify as their preferences: \n'
      '\n'
      f'{[x for x in df.columns]}')

These are the available features that user can specify as their preferences: 

['num_beds', 'num_baths', 'size_sqft', 'lat', 'lng', 'price', 'property_type_private', 'property_type_public', 'tenure_high_year', 'tenure_low_year', 'floor_level_ground', 'floor_level_high', 'floor_level_low', 'floor_level_mid', 'floor_level_penthouse', 'floor_level_top', 'furnishing_partial', 'furnishing_unfurnished', 'furnishing_unspecified', 'nearest_mrt_distance_in_km', 'line_cc', 'line_ce', 'line_cg', 'line_dt', 'line_ew', 'line_ne', 'line_ns', 'line_te', 'nearest_pri_sch_distance_in_km', 'nearest_gep_pri_sch_distance_in_km', 'gep_pri_sch_within_1km', 'gep_pri_sch_within_1km_2km', 'gep_pri_sch_outside_2km', 'pri_sch_within_500m', 'pri_sch_outside_500m', 'nearest_com_centre_distance_in_km', 'cc_type_BN', 'cc_type_CR', 'cc_type_IEBP', 'cc_type_IEPB', 'cc_type_IHL', 'nearest_mall_distance_in_km', 'area_size', 'population', 'density']


In [14]:
# Explore parameters here
row_idx = 0
num_recommendations = 5
algorithm = 'brute'
metric = 'euclidean'

# New parameter in this variation
user_preferences = ['price', 'property_type_private', 'property_type_public', 'line_ew', 'size_sqft'] # feature names that user can specify as their preferences

**DO NOT NEED TO DO ANYTHING HERE**

Get feature index based on feature names indicated by user

In [15]:
feature_dict = dict(zip([x for x in df.columns], range(len(df.columns))))
feature_idx = []
for col in user_preferences:
    feature_idx.append(feature_dict[col])

In [16]:
model2 = knn(df_with_listing_info, X_transformed, pipe)
reco2 = model2.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, feature_idx=feature_idx, refit_model=True, algorithm=algorithm, metric=metric)
reco2[['listing_id', 'title', 'address', 'property_name'] + user_preferences]

Unnamed: 0,listing_id,title,address,property_name,price,property_type_private,property_type_public,line_ew,size_sqft
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,1081500.0,0,1,1,1184
2580,221487,hdb flat for sale in 55 strathmore avenue,alexandra / commonwealth (d3),hdb-queenstown,1102500.0,0,1,1,1184
13875,730422,hdb flat for sale in 55 strathmore avenue,alexandra / commonwealth (d3),hdb-queenstown,1102500.0,0,1,1,1184
1360,164375,hdb flat for sale in 48 strathmore avenue,alexandra / commonwealth (d3),forfar heights,1047900.0,0,1,1,1184
16865,867529,hdb flat for sale in 3d upper boon keng road,eunos / geylang / paya lebar (d14),kallang heights,1037400.0,0,1,1,1184


In [17]:
# We can try another example with a different listing (e.g. row_idx = 1000). 
# Since we only need to find the nearest neighbours with existing model, we can set refit_model = False.
row_idx = 1000
model2.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, feature_idx=feature_idx, refit_model=False, algorithm=algorithm, metric=metric) \
    [['listing_id', 'title', 'address', 'property_name', 'property_type_private', 'price'] + user_preferences]

Unnamed: 0,listing_id,title,address,property_name,property_type_private,price,price.1,property_type_private.1,property_type_public,line_ew,size_sqft
9526,536912,4 bed condo for sale in lentor modern,lentor central,lentor modern,1,2730000.0,2730000.0,1,0,0,1352
9470,533951,3 bed condo for sale in sunstone hill,250n pasir panjang road,sunstone hill,1,2754000.0,2754000.0,1,0,0,1356
9337,528211,4 bed condo for sale in piccadilly grand / pic...,1 northumberland road,piccadilly grand / piccadilly galleria,1,2717400.0,2717400.0,1,0,0,1378
14672,767061,2 bed condo for sale in waterscape at cavenagh,65c cavenagh road,waterscape at cavenagh,1,2709000.0,2709000.0,1,0,0,1324
5505,350218,4 bed condo for sale in caribbean at keppel bay,12 keppel bay drive,caribbean at keppel bay,1,2782500.0,2782500.0,1,0,0,1335


### 3. Controlled-Randomness Setting

Sometimes, users would like to rely on the recommender system to explore recommendations that are not so boring or expected. This means that we may not always want to recommend listings that are too similar to what they have input. In this variation, we introduce a controlled-randomness element in the recommendations users receive. 

The aim is to recommend a listing that is not obviously very similar to the input listing, nor should it be completely random and unsuitable to the user's profile/preferences. Hence, in this variation, users will receive some recommendations that are nearest neighbours to the input listing, and some recommendations that are randomly selected from neighbours that are further (but not too far). This controlled randomness element is affected by these 2 parameters: max_k and degree_of_randomisation.

* max_k: Upper limit of number of nearest neighbours of input listing that the user can receive
* degree_of_randomisation: Between 0 to 1. Determines how many of the recommendations will come from further neighbours. The higher the value, the more recommendations come from further neighbours.

Example:
* num_recommendations = 10
* max_k = 50
* degree_of_randomisation = 0.4

User will receive 10 recommendations in total. Out of the 10 recommendations, 4 recommendations (0.4 * 10) will come from random sampling of neighbours that are outside of top 10 but within top 50 (max_k) nearest neighbours. The remaining 6 recommendations will come from the 6 nearest neighbours. 


In [18]:
# Parameters
row_idx = 0
num_recommendations = 5
algorithm = 'brute'
metric = 'euclidean'

# New parameters in this variation
max_k = 50
degree_of_randomisation = 0.5

In [19]:
model3 = knn(df_with_listing_info, X_transformed, pipe)
reco3 = model3.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, 
                                       max_k=max_k, degree_of_randomisation=degree_of_randomisation, 
                                       refit_model=True, algorithm=algorithm, metric=metric)
reco3

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,property_type_public,tenure_low_year,2003.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/membina-118-de...,1.2819,103.825948,0,tiong bahru station,bukit merah,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326,c
4094,288001,hdb flat for sale in 26d jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2009.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.282239,103.825114,0,tiong bahru station,bukit merah,932400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.487173,0,0,0,0,1,0,0,0,0.248094,3.406006,0,0,1,1,0,2.380521,1,0,0,0,0,0.520372,0.3508,15120,43101.482326,c
2982,239845,hdb flat for sale in 25a jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2008.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.283496,103.826046,0,tiong bahru station,bukit merah,945000.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.318191,0,0,0,0,1,0,0,0,0.083865,3.248113,0,0,1,1,0,2.452032,1,0,0,0,0,0.349091,0.3508,15120,43101.482326,c
98,104837,hdb flat for sale in 101 henderson crescent,alexandra / commonwealth (d3),hdb-bukit merah,property_type_public,tenure_low_year,1970.0,3.0,1.0,792,,unspecified,"studio, 1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/hdbbukit-merah...,1.289096,103.822009,0,henderson hill,bukit merah,462000.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.588723,0,0,0,0,1,0,0,0,0.333798,3.436883,0,0,1,1,0,1.971473,1,0,0,0,0,0.665455,0.5953,13320,22375.272972,c
10201,566091,hdb flat for sale in 124a bukit merah view,alexandra / commonwealth (d3),hdb-bukit merah,property_type_public,tenure_low_year,1996.0,3.0,2.0,1173,,unspecified,"studio, 1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/hdbbukit-merah...,1.28598,103.823017,0,henderson hill,bukit merah,839000.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.443269,0,0,0,0,1,0,0,0,0.391265,3.439137,0,0,1,1,0,2.081823,1,0,0,0,0,0.481622,0.5953,13320,22375.272972,c


In [20]:
# We can try another example with a different listing (e.g. row_idx = 1000). 
# Since we only need to find the nearest neighbours with existing model, we can set refit_model = False.
row_idx = 1000
model3.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, max_k=max_k, 
                               degree_of_randomisation=degree_of_randomisation, refit_model=False, algorithm=algorithm, metric=metric)

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
18499,943201,4 bed condo for sale in the lilium,31 how sun road,the lilium,property_type_private,tenure_high_year,2021.0,4.0,4.0,1292,,unspecified,"studio, 2, 3, 4, 5 br",80.0,https://www.99.co/singapore/condos-apartments/...,1.345552,103.881514,0,upper paya lebar,serangoon,2730000.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.370288,1,0,0,0,0,0,0,0,0.61717,3.166211,0,0,1,0,1,1.205022,1,0,0,0,0,0.949835,0.8986,16920,18829.290007,ne
1515,170990,4 bed condo for sale in the lilium,29 how sun road,the lilium,property_type_private,tenure_high_year,2021.0,4.0,4.0,1291,,unspecified,"studio, 2, 3, 4, 5 br",80.0,https://www.99.co/singapore/condos-apartments/...,1.345552,103.881514,0,upper paya lebar,serangoon,2791200.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.370288,1,0,0,0,0,0,0,0,0.61717,3.166211,0,0,1,0,1,1.205022,1,0,0,0,0,0.949835,0.8986,16920,18829.290007,ne
2688,226400,3 bed condo for sale in the lilium,33 how sun road,the lilium,property_type_private,tenure_high_year,2021.0,3.0,3.0,1227,,unspecified,"studio, 2, 3, 4, 5 br",80.0,https://www.99.co/singapore/condos-apartments/...,1.345552,103.881514,0,upper paya lebar,serangoon,2684600.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.370288,1,0,0,0,0,0,0,0,0.61717,3.166211,0,0,1,0,1,1.205022,1,0,0,0,0,0.949835,0.8986,16920,18829.290007,ne
14619,764649,4 bed house for sale in paya lebar gardens,upper paya lebar road,paya lebar gardens,property_type_private,tenure_high_year,1963.0,4.0,4.0,1850,,unspecified,"3, 4, 5 br",,https://www.99.co/singapore/houses/paya-lebar-...,1.344092,103.882801,0,tai seng,hougang,3360000.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.341059,1,0,0,0,0,0,0,0,0.644079,3.35946,0,0,1,0,1,1.4134,1,0,0,0,0,1.151982,1.7962,13830,7699.588019,ne
13371,710306,2 bed condo for sale in paradise palms,505 dunman road,paradise palms,property_type_private,tenure_high_year,2003.0,2.0,2.0,915,,unspecified,"2, 3, 4 br",56.0,https://www.99.co/singapore/condos-apartments/...,1.30915,103.892274,0,geylang east,geylang,1590800.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.362947,1,0,0,0,0,0,0,0,0.510165,2.176674,0,0,1,0,1,1.011564,1,0,0,0,0,0.631921,2.5784,29920,11604.095563,c


---------------------

## Result Evaluation

In this section, we will evaluate
1. results of the 3 model variations implemented in the previous section
2. results of using different algorithms and metrics in the KNN model

### 1. Evaluation of all 3 Model Variations

In this sub-section, we compare and evaluate the top 5 recommendations from each of the above model variation. We will use row index 0 as the input listing. 

In [22]:
# row_idx = 0
# We first look at the information of the input listing
df_with_listing_info.iloc[[0]]

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
0,100043,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,property_type_public,tenure_low_year,2003.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/membina-118-de...,1.2819,103.825948,0,tiong bahru station,bukit merah,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326,c


In [23]:
# model1
reco1

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,property_type_public,tenure_low_year,2003.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/membina-118-de...,1.2819,103.825948,0,tiong bahru station,bukit merah,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326,c
4094,288001,hdb flat for sale in 26d jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2009.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.282239,103.825114,0,tiong bahru station,bukit merah,932400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.487173,0,0,0,0,1,0,0,0,0.248094,3.406006,0,0,1,1,0,2.380521,1,0,0,0,0,0.520372,0.3508,15120,43101.482326,c
19103,970335,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,1013200.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
3500,264414,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,974400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
15107,786786,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,990,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,982700.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c


**Model 1**: As recommendations from model 1 come from the 5 nearest neighbours of input listing in the euclidean space of all features, the recommendations are naturally quite similar to the input listing. All of them share the same address with the input listing at Alexandra / Commonwealth. Although some recommendations have different property names, but they are all located in the vicnity of the input listing with less than 5 minutes walking distance. Furthermore, in these recommendations, all categorical variables that are used in the model share similar value with the input listing (e.g. pri_sch_within_500m, line_ew, property_type_public, etc.). Even for continuous variables (e.g. price, size_sqft, nearest_mrt_distance_in_km), the values are largely similar to those in the input listing. 

In [24]:
# model2
reco2[['listing_id', 'title', 'address', 'property_name', 'nearest_pri_sch_distance_in_km', 'nearest_com_centre_distance_in_km'] + user_preferences]

Unnamed: 0,listing_id,title,address,property_name,nearest_pri_sch_distance_in_km,nearest_com_centre_distance_in_km,price,property_type_private,property_type_public,line_ew,size_sqft
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,0.261718,2.47968,1081500.0,0,1,1,1184
2580,221487,hdb flat for sale in 55 strathmore avenue,alexandra / commonwealth (d3),hdb-queenstown,0.346509,0.870604,1102500.0,0,1,1,1184
13875,730422,hdb flat for sale in 55 strathmore avenue,alexandra / commonwealth (d3),hdb-queenstown,0.346509,0.870604,1102500.0,0,1,1,1184
1360,164375,hdb flat for sale in 48 strathmore avenue,alexandra / commonwealth (d3),forfar heights,0.246214,0.803303,1047900.0,0,1,1,1184
16865,867529,hdb flat for sale in 3d upper boon keng road,eunos / geylang / paya lebar (d14),kallang heights,1.304347,2.374375,1037400.0,0,1,1,1184


**Model 2**: In model 2, user was able to specify the features that they would like their recommendations to be based on. To recap, these are the preferences that user has stated: price, property_type, MRT east-west line, size sqft. As such, in comparison to model 1's recommendations, model 2's recommendations are more similar to input listing in these features. For example, some of the recommendations in model 2 are not as near to Membina 118 (property name of input listing) as the recommendations in model 1. In fact, those listings at Kallang Heights and Fofar Heights are more than 10 minutes drive from Membina 118. 

| Features | Input Listing        | Model 1 (Mean)    | Model2 (Mean)  |
|----------|----------------------|---------------------|------------------|
|price     | 1,081,500 | 996,840 | 1,074,360 |
|size_sqft | 1184 | 1102.2 | 1184 |
|line_ew | 1 | 1 | 1 |
|property_type_public | 1 | 1 | 1 
|nearest_pri_sch_distance_in_km| 0.261718 | 0.297758 | 0.501059 |
|nearest_com_centre_distance_in_km| 2.47968 | 2.58746 | 1.479713 |

In the above table, the first 4 rows are features that are stated by user in model 2 as input parameters. As described above, the table show that for these 4 features (especially for continuous features like price and size_sqft), the average value in model2's recommendations are closer to the actual feature values in input listing. The opposite is true too. In the last 2 rows, these features were not used in model 2 and indeed, the average value of these features in model 2's recommendations are much further from the actual values in input listing. Whereas model 1, which considers these features, still has average values that are quite similar to the actual values in input listing.

In [25]:
# model3
reco3

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,property_type_public,tenure_low_year,2003.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/membina-118-de...,1.2819,103.825948,0,tiong bahru station,bukit merah,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326,c
4094,288001,hdb flat for sale in 26d jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2009.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.282239,103.825114,0,tiong bahru station,bukit merah,932400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.487173,0,0,0,0,1,0,0,0,0.248094,3.406006,0,0,1,1,0,2.380521,1,0,0,0,0,0.520372,0.3508,15120,43101.482326,c
2982,239845,hdb flat for sale in 25a jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2008.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.283496,103.826046,0,tiong bahru station,bukit merah,945000.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.318191,0,0,0,0,1,0,0,0,0.083865,3.248113,0,0,1,1,0,2.452032,1,0,0,0,0,0.349091,0.3508,15120,43101.482326,c
98,104837,hdb flat for sale in 101 henderson crescent,alexandra / commonwealth (d3),hdb-bukit merah,property_type_public,tenure_low_year,1970.0,3.0,1.0,792,,unspecified,"studio, 1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/hdbbukit-merah...,1.289096,103.822009,0,henderson hill,bukit merah,462000.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.588723,0,0,0,0,1,0,0,0,0.333798,3.436883,0,0,1,1,0,1.971473,1,0,0,0,0,0.665455,0.5953,13320,22375.272972,c
10201,566091,hdb flat for sale in 124a bukit merah view,alexandra / commonwealth (d3),hdb-bukit merah,property_type_public,tenure_low_year,1996.0,3.0,2.0,1173,,unspecified,"studio, 1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/hdbbukit-merah...,1.28598,103.823017,0,henderson hill,bukit merah,839000.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.443269,0,0,0,0,1,0,0,0,0.391265,3.439137,0,0,1,1,0,2.081823,1,0,0,0,0,0.481622,0.5953,13320,22375.272972,c


**Model 3**: In model 3, we have set max_k = 50 and degree_of_randomisation = 0.5. Therefore, the first 2 recommendations (listing 776886 and 288001) overlapped with top 2 recommendations in model 1. Whereas the remaining 3 recommendations (listing 563769, 190807, 801055) are randomly selected listings that are between the 6th to 50th closest neighbours. In the table below, we compare the values of some of the features in these 2 types of recommendations

| Features | Input Listing        | Model 3 - 1st & 2nd recommendations (Mean)    | Model 3 - 3rd to 5th recommendations (Mean)  |
|----------|----------------------|---------------------|------------------|
|price     | 1,081,500 | 1,006,950 | 462,000 |
|size_sqft | 1184 | 1076.5 | 791 |
|num_beds  | 3 | 3 | 2.5|
|line_ew | 1 | 1 | 1 |
|property_type_public | 1 | 1 | 1 
|nearest_mall_distance_in_km| 0.51927 | 0.51982 | 0.89821 |
|nearest_pri_sch_distance_in_km| 0.261718 | 0.254905 | 0.31334 |
|nearest_com_centre_distance_in_km| 2.47968 | 2.43010 | 2.09488 |

Based on the table, the 3 recommendations that are randomly sampled in a larger neighbourhood of the input listing are obviously less similar to the input listing than the first 2 recommendations. 

**Which of the 3 variations performs best?**

Now that we understand how each variation and the confirmed the results shown by each variation, we now move on to evaluate which model works best. The answer largely depends on the intention of the user. If the intention of a user is to casually browse and explore properties, as perhaps the user has only very recently started their journey in looking for properties or has no specific preferences at the earlier stage, then model 1 is likely to be suitable. If a user has clear preferences, and aims to minimise time wastage while looking for properties with specific characteristics, then model 2 is most suitable. If an experienced user frequents the property listings platform regularly and finds it difficult to look for listings that interest them, that may mean that the user may need to expand their scope of search. In this scenario, model 3 would be more suitable in achieving this intention. 

### 2. Evaluation of Algorithms and Metrics in KNN Model

In the previous section, we introduced the various algorithms and metrics that can be used in KNN model. In the above reco1, we have opted for brute-force algorithm and euclidean distance metric. In this sub-section, we will compare the recommendations by varying the choice of algorithms and metrics. 

In [26]:
# Parameters
metric = 'euclidean'
row_idx = 0
num_recommendations = 5

In [27]:
# Init model
model4 = knn(df_with_listing_info, X_transformed, pipe)

In [28]:
%%time
# Algorithm: Ball Tree
algorithm = 'ball_tree'

ball_tree_reco = model4.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, refit_model=True, algorithm=algorithm, metric=metric)
ball_tree_reco

CPU times: user 55.9 ms, sys: 3.04 ms, total: 59 ms
Wall time: 58.3 ms


Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,property_type_public,tenure_low_year,2003.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/membina-118-de...,1.2819,103.825948,0,tiong bahru station,bukit merah,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326,c
4094,288001,hdb flat for sale in 26d jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2009.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.282239,103.825114,0,tiong bahru station,bukit merah,932400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.487173,0,0,0,0,1,0,0,0,0.248094,3.406006,0,0,1,1,0,2.380521,1,0,0,0,0,0.520372,0.3508,15120,43101.482326,c
19103,970335,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,1013200.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
3500,264414,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,974400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
15107,786786,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,990,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,982700.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c


In [29]:
%%time
# Algorithm: KD Tree
algorithm = 'kd_tree'

kd_tree_reco = model4.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, refit_model=True, algorithm=algorithm, metric=metric)
kd_tree_reco

CPU times: user 54.8 ms, sys: 4.03 ms, total: 58.8 ms
Wall time: 58.4 ms


Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,property_type_public,tenure_low_year,2003.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/membina-118-de...,1.2819,103.825948,0,tiong bahru station,bukit merah,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326,c
4094,288001,hdb flat for sale in 26d jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2009.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.282239,103.825114,0,tiong bahru station,bukit merah,932400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.487173,0,0,0,0,1,0,0,0,0.248094,3.406006,0,0,1,1,0,2.380521,1,0,0,0,0,0.520372,0.3508,15120,43101.482326,c
19103,970335,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,1013200.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
3500,264414,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,974400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
15107,786786,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,990,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,982700.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c


In [30]:
%%time
# Algorithm: Brute force
algorithm = 'brute'

brute_reco = model4.get_top_recommendations(df.iloc[[row_idx]], k=num_recommendations, refit_model=True, algorithm=algorithm, metric=metric)
brute_reco

CPU times: user 57.7 ms, sys: 8.17 ms, total: 65.9 ms
Wall time: 11.6 ms


Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IEPB,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
14895,776886,hdb flat for sale in 118b jalan membina,alexandra / commonwealth (d3),membina 118,property_type_public,tenure_low_year,2003.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/membina-118-de...,1.2819,103.825948,0,tiong bahru station,bukit merah,1081500.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.49163,0,0,0,0,1,0,0,0,0.261718,3.344506,0,0,1,1,0,2.47968,1,0,0,0,0,0.519278,0.3508,15120,43101.482326,c
4094,288001,hdb flat for sale in 26d jalan membina,alexandra / commonwealth (d3),membina court,property_type_public,tenure_low_year,2009.0,3.0,2.0,969,,unspecified,"1, 2, 3, 4, 5 br",,https://www.99.co/singapore/hdb/membina-court-...,1.282239,103.825114,0,tiong bahru station,bukit merah,932400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.487173,0,0,0,0,1,0,0,0,0.248094,3.406006,0,0,1,1,0,2.380521,1,0,0,0,0,0.520372,0.3508,15120,43101.482326,c
19103,970335,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,1013200.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
3500,264414,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,1184,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,974400.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c
15107,786786,hdb flat for sale in 119d kim tian road,alexandra / commonwealth (d3),kim tian 119,property_type_public,tenure_low_year,2002.0,3.0,2.0,990,,unspecified,3 br,,https://www.99.co/singapore/hdb/kim-tian-119-d...,1.281975,103.827937,0,tiong bahru station,bukit merah,982700.0,0,1,0,1,0,0,0,0,0,0,0,0,1,0.480625,0,0,0,0,1,0,0,0,0.326326,3.151773,0,0,1,1,0,2.692371,1,0,0,0,0,0.492161,0.3508,15120,43101.482326,c


**Comparison of algorithms**: For the input listing row index 0, all algorithms return the same set of recommendations, i.e., the 5 nearest neighbours are exactly similar. 
~ more to add on