In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%reload_ext autoreload

In [3]:
%%html
<style>
  table {margin-left: 0 !important;}
</style>

In [4]:
import sys
sys.path.append('../')

In [5]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import Pipeline
from sklearn.impute import KNNImputer

from utils.processing import *
from task2_recommender import * 


In [6]:
pd.set_option('display.max_columns', 100) 

## Load and Preprocess Data

In [7]:
def drop_cols_for_q2(df):
    drop_columns = [
        'address',
        'title',
        'listing_id',
        'property_name',
        'property_details_url',
        'available_unit_types',
        'elevation',
        'tenure',
        'property_type',
        'floor_level',
        'furnishing',
        'subzone', 
        'planning_area',
        'region'
    ]
    return df.drop(columns=drop_columns)

In [8]:
df = pd.read_csv('../data/train.csv')
df = preprocess(df)

adfs = read_aux_csv('../data')
df = join_aux(df, adfs)

df_with_listing_info = df.copy()
df = drop_cols_for_q2(df)

In [9]:
def transform_data(df):
    pipe = Pipeline(
        [
            ('standard_scaler', StandardScaler()),
            ('knn', KNNImputer())
            ]
    )

    return pd.DataFrame(data=pipe.fit_transform(df), columns=pipe.get_feature_names_out())


In [10]:
# Preprocessing
df_transformed = transform_data(df)
X_transformed = df_transformed.values

In [11]:
df_transformed.head()

Unnamed: 0,built_year,num_beds,num_baths,size_sqft,total_num_units,lat,lng,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density
0,-1.437729,-0.096413,-0.436932,-0.302759,-0.753828,1.602844,-0.109367,-0.539401,-1.320956,1.477317,-0.718749,-1.160403,-0.095482,-0.302025,-0.192802,-0.190715,-0.069559,-0.076103,-0.417402,-0.332735,0.614818,-0.387375,-0.524097,-0.052798,-0.039887,-0.546662,-0.514928,-0.476923,1.622855,-0.211686,-0.870002,0.992342,-0.402533,-0.546126,0.764998,1.27011,-1.27011,1.332502,-0.87939,2.087855,-0.596672,-0.360066,-0.322523,-0.162035,0.851992,1.39252
1,-1.185214,0.683956,-0.436932,-0.074573,-0.878901,0.703158,0.60124,-0.436656,-1.320956,1.477317,-0.718749,0.86177,-0.095482,-0.302025,-0.192802,-0.190715,-0.069559,-0.076103,-0.417402,-0.332735,0.614818,1.698473,-0.524097,-0.052798,-0.039887,-0.546662,-0.514928,2.096775,-0.616198,-0.211686,-1.154203,-1.187419,2.484269,-0.546126,-1.307193,1.27011,-1.27011,0.454867,1.137151,-0.47896,-0.596672,-0.360066,-0.451613,-0.41023,-0.264521,0.704546
2,0.70865,0.683956,2.279106,0.667033,-0.928348,-0.885707,0.974276,1.163514,0.757028,-0.676903,1.391306,-1.160403,-0.095482,-0.302025,5.186681,-0.190715,-0.069559,-0.076103,2.39577,-0.332735,-1.626497,0.953139,1.908045,-0.052798,-0.039887,-0.546662,-0.514928,-0.476923,-0.616198,-0.211686,0.2766,-0.559189,-0.402533,1.83108,-1.307193,-0.787333,0.787333,0.238947,1.137151,-0.47896,-0.596672,-0.360066,0.058179,-0.021297,-0.51754,-0.752887
3,0.771778,-0.096413,-0.436932,-0.38064,0.764496,-0.593195,-0.736686,-0.088274,0.757028,-0.676903,1.391306,-1.160403,-0.095482,-0.302025,-0.192802,-0.190715,-0.069559,-0.076103,2.39577,-0.332735,-1.626497,-0.114437,1.908045,-0.052798,-0.039887,-0.546662,-0.514928,-0.476923,-0.616198,-0.211686,0.646883,-0.840072,-0.402533,1.83108,-1.307193,-0.787333,0.787333,-0.290565,1.137151,-0.47896,-0.596672,-0.360066,0.213661,-0.4579,-0.678862,-0.316117
4,0.961165,-0.876783,-1.115941,-0.492749,-0.070292,-1.419758,0.009711,-0.272442,0.757028,-0.676903,-0.718749,0.86177,-0.095482,-0.302025,-0.192802,-0.190715,-0.069559,-0.076103,-0.417402,-0.332735,0.614818,-0.752526,-0.524097,-0.052798,-0.039887,-0.546662,1.942018,-0.476923,-0.616198,-0.211686,-0.517673,-0.246889,-0.402533,-0.546126,0.764998,1.27011,-1.27011,-0.043654,-0.87939,2.087855,-0.596672,-0.360066,-0.680793,-0.630405,-0.937824,-1.175036


----------

## Top k Recommendations

In Task 2, we fit a K-Nearest Neighbour model to the dataset to retrieve listings that are similar to the input listing, and output them as recommendations. There are 4 variations of the recommender system in this notebook. Before we move on to explore the different variations of the model, feel free to refer to the task2_recommender.py file to look at the model class and the input parameters needed for better understanding. 

### 1. Default Setting

In this default setting, we have used all available features in the above dataframe (df) as dimensions in the vector space where the KNN algorithm is employed. However, users have control over the type of algorithm and metric in the model. The following are some algorithms and metrics that users can specify in the model. Do note that the metric list is not exhaustive and more metrics can be found here: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.distance_metrics.html#sklearn.metrics.pairwise.distance_metrics 

**Algorithm**:
* ball_tree
* kd_tree
* brute
* auto (model will decide the most appropriate algorithm to use based on the values passed during the fit method)

**Metric**:
* euclidean
* cosine
* manhattan

By default, the KNN algorithm in sklearn package decides which of the following algorithms is most appropriate for neighbors calculation: 1. BallTree, 2. KDTree, or  3. Brute force, and defaults to euclidean distance as the distance metric.

In [12]:
# Explore parameters here
row_idx = 20141 # row index of listing user wants to find similar recommendations to
num_recommendations = 5 # number of recommendations user wants to receive
algorithm = 'brute'
metric = 'euclidean'

In [13]:
model1 = knn(df_with_listing_info, X_transformed)
reco1 = model1.get_top_recommendations(df_with_listing_info.iloc[[row_idx]], k=num_recommendations, algorithm=algorithm, metric=metric)
reco1

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
3009,862265,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,883.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1680000.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c
10318,360279,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,710.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1560300.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c
14926,347958,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,710.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1502600.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c
7196,546727,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,710.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1419600.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c
8155,636916,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,710.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1419600.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c


### 2. Similar Property Setting

In this second variation, the algorithm works the same way as the first variation except that users are able to input an additional parameter to the get_top_recommendations function. The additional parameter, “return_different_property”, accepts boolean values. By default, it is set as false. Users are able to set it as true in this variation, which ensures that the k recommendations users requested for are the k nearest neighbors to the input property listing after excluding neighbors that are from the same property as the input listing. This variation is known as the “similar property setting” as the recommender system recommends similar properties to the input listing but not the exact same one.

In [15]:
model2 = knn(df_with_listing_info, X_transformed)
reco2 = model2.get_top_recommendations(df_with_listing_info.iloc[[row_idx]], k=num_recommendations, return_different_property=True, 
                                       algorithm=algorithm, metric=metric)
reco2

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
15995,118782,2 bed condo for sale in 8 farrer suites,8 sing joo walk,8 farrer suites,property_type_private,tenure_high_year,2015.0,2.0,2.0,893.0,,unspecified,"studio, 1, 2, 3 br",34.0,https://www.99.co/singapore/condos-apartments/...,1.315224,103.855321,0,kampong java,kallang,1687800.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.240052,0,0,0,0,0,1,0,0,0.561541,2.039068,0,0,1,0,1,1.428804,1,0,0,0,0.464164,1.052,10530,10009.505703,c
18775,896559,1 bed condo for sale in jool suites,2 sing joo walk,jool suites,property_type_private,tenure_high_year,2014.0,1.0,1.0,388.0,,unspecified,"1, 2 br",52.0,https://www.99.co/singapore/condos-apartments/...,1.315224,103.855321,0,kampong java,kallang,756000.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.240052,0,0,0,0,0,1,0,0,0.561541,2.039068,0,0,1,0,1,1.428804,1,0,0,0,0.464164,1.052,10530,10009.505703,c
16725,282431,1 bed condo for sale in vibes @ upper serangoon,488 upper serangoon road,vibes @ upper serangoon,property_type_private,tenure_high_year,2016.0,1.0,1.0,398.0,,unspecified,"1, 2 br",60.0,https://www.99.co/singapore/condos-apartments/...,1.327089,103.868218,0,geylang bahru,kallang,661500.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.493582,0,0,0,0,0,1,0,0,0.570186,3.787752,0,0,1,0,1,2.305314,1,0,0,0,0.503352,0.7355,11280,15336.505778,c
9789,893611,1 bed condo for sale in sunshine lodge,510 upper serangoon road,sunshine lodge,property_type_private,tenure_high_year,1998.0,1.0,1.0,592.0,,unspecified,"1, 3, 4 br",30.0,https://www.99.co/singapore/condos-apartments/...,1.327089,103.868218,0,geylang bahru,kallang,879900.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.493582,0,0,0,0,0,1,0,0,0.570186,3.787752,0,0,1,0,1,2.305314,1,0,0,0,0.503352,0.7355,11280,15336.505778,c
16504,343022,2 bed condo for sale in 8 farrer suites,8 sing joo walk,8 farrer suites,property_type_private,tenure_high_year,2015.0,2.0,3.0,560.0,,fully,"studio, 1, 2, 3 br",34.0,https://www.99.co/singapore/condos-apartments/...,1.315224,103.855321,0,kampong java,kallang,1039500.0,1,0,1,0,0,0,0,0,0,0,0,0,0,0.240052,0,0,0,0,0,1,0,0,0.561541,2.039068,0,0,1,0,1,1.428804,1,0,0,0,0.464164,1.052,10530,10009.505703,c


### 3. User-specified Features Setting

Each user has their own definitions as to what properties are considered similar. Some users consider prices or location as the topmost priority when looking for similar properties, others may place more importance on floor level or type of housing (private vs public). In the previous variation, all features are used during model fitting. In this variation, user can specify features in user_preferences below to receive customised recommendations based on features that they consider as most important when deciding if a property is similar to another. The KNN model is fitted with only these input features.

In [16]:
print('These are the available features that user can specify as their preferences: \n'
      '\n'
      f'{[x for x in df.columns]}')

These are the available features that user can specify as their preferences: 

['built_year', 'num_beds', 'num_baths', 'size_sqft', 'total_num_units', 'lat', 'lng', 'price', 'property_type_private', 'property_type_public', 'tenure_high_year', 'tenure_low_year', 'floor_level_ground', 'floor_level_high', 'floor_level_low', 'floor_level_mid', 'floor_level_penthouse', 'floor_level_top', 'furnishing_partial', 'furnishing_unfurnished', 'furnishing_unspecified', 'nearest_mrt_distance_in_km', 'line_cc', 'line_ce', 'line_cg', 'line_dt', 'line_ew', 'line_ne', 'line_ns', 'line_te', 'nearest_pri_sch_distance_in_km', 'nearest_gep_pri_sch_distance_in_km', 'gep_pri_sch_within_1km', 'gep_pri_sch_within_1km_2km', 'gep_pri_sch_outside_2km', 'pri_sch_within_500m', 'pri_sch_outside_500m', 'nearest_com_centre_distance_in_km', 'cc_type_BN', 'cc_type_CR', 'cc_type_IEBP', 'cc_type_IHL', 'nearest_mall_distance_in_km', 'area_size', 'population', 'density']


In [17]:
# Explore parameters here
row_idx = 20141
num_recommendations = 5
algorithm = 'brute'
metric = 'euclidean'

# New parameter in this variation
user_preferences = ['price', 'nearest_pri_sch_distance_in_km', 'tenure_high_year', 'line_ne'] # feature names that user can specify as their preferences

**DO NOT NEED TO DO ANYTHING HERE**

Get feature index based on feature names indicated by user

In [18]:
feature_dict = dict(zip([x for x in df.columns], range(len(df.columns))))
feature_idx = []
for col in user_preferences:
    feature_idx.append(feature_dict[col])

In [19]:
model3 = knn(df_with_listing_info, X_transformed)
reco3 = model3.get_top_recommendations(df_with_listing_info.iloc[[row_idx]], k=num_recommendations, feature_idx=feature_idx,
                                       return_different_property=True, algorithm=algorithm, metric=metric)
reco3[['listing_id', 'title', 'address', 'property_name'] + user_preferences]

Unnamed: 0,listing_id,title,address,property_name,price,nearest_pri_sch_distance_in_km,tenure_high_year,line_ne
9740,473098,2 bed condo for sale in parc sophia,10 adis road,parc sophia,1417500.0,0.669447,1,1
1987,341828,3 bed condo for sale in rangoon apartments,190 rangoon road,rangoon apartments,1942500.0,0.582438,1,1
2202,188481,3 bed condo for sale in rangoon apartments,190 rangoon road,rangoon apartments,1942500.0,0.582438,1,1
9186,499790,3 bed condo for sale in nouvelle park,131 poh huat road west,nouvelle park,2079000.0,0.743337,1,1
15396,195498,3 bed condo for sale in the florentine,70 florence road,the florentine,1680000.0,0.734674,1,1


### 4. Controlled-Randomness Setting

Sometimes, users would like to rely on the recommender system to explore recommendations that are not so boring or expected. This means that we may not always want to recommend listings that are too similar to what they have input. In this variation, we introduce a controlled-randomness element in the recommendations users receive. 

The aim is to recommend a listing that is not obviously very similar to the input listing, nor should it be completely random and unsuitable to the user's profile/preferences. Hence, in this variation, users will receive some recommendations that are nearest neighbours to the input listing, and some recommendations that are randomly selected from neighbours that are further (but not too far). This controlled randomness element is affected by these 2 parameters: max_k and degree_of_randomisation.

* max_k: Upper limit of number of nearest neighbours of input listing that the user can receive
* degree_of_randomisation: Between 0 to 1. Determines how many of the recommendations will come from further neighbours. The higher the value, the more recommendations come from further neighbours.

Example:
* num_recommendations = 10
* max_k = 50
* degree_of_randomisation = 0.4

User will receive 10 recommendations in total. Out of the 10 recommendations, 4 recommendations (0.4 * 10) will come from random sampling of neighbours that are outside of top 10 but within top 50 (max_k) nearest neighbours. The remaining 6 recommendations will come from the 6 nearest neighbours. 


In [20]:
# Parameters
row_idx = 20141
num_recommendations = 5
algorithm = 'brute'
metric = 'euclidean'

# New parameters in this variation
max_k = 100
degree_of_randomisation = 0.5

# Set random seed
np.random.seed(42)

In [21]:
model4 = knn(df_with_listing_info, X_transformed)
reco4 = model4.get_top_recommendations(df_with_listing_info.iloc[[row_idx]], k=num_recommendations, feature_idx=feature_idx,
                                       return_different_property=True, max_k=max_k, degree_of_randomisation=degree_of_randomisation, algorithm=algorithm, metric=metric)
reco4

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
9740,473098,2 bed condo for sale in parc sophia,10 adis road,parc sophia,property_type_private,tenure_high_year,2013.0,2.0,1.0,732.0,,partial,"1, 2 br",152.0,https://www.99.co/singapore/condos-apartments/...,1.301446,103.847551,0,mount emily,rochor,1417500.0,1,0,1,0,0,0,0,0,0,0,1,0,0,0.314673,1,0,0,0,0,1,1,0,0.669447,0.669447,1,0,0,0,1,0.672457,0,0,0,1,0.212993,0.194,1630,8402.061856,c
1987,341828,3 bed condo for sale in rangoon apartments,190 rangoon road,rangoon apartments,property_type_private,tenure_high_year,1997.0,3.0,2.0,1453.0,low,unfurnished,3 br,17.0,https://www.99.co/singapore/condos-apartments/...,1.317401,103.853037,0,kampong java,kallang,1942500.0,1,0,1,0,0,0,1,0,0,0,0,1,0,0.510789,0,0,0,0,0,1,0,0,0.582438,2.002171,0,0,1,0,1,1.110582,1,0,0,0,0.789777,1.052,10530,10009.505703,c
2202,188481,3 bed condo for sale in rangoon apartments,190 rangoon road,rangoon apartments,property_type_private,tenure_high_year,1997.0,3.0,2.0,1453.0,,unfurnished,3 br,17.0,https://www.99.co/singapore/condos-apartments/...,1.317401,103.853037,0,kampong java,kallang,1942500.0,1,0,1,0,0,0,0,0,0,0,0,1,0,0.510789,0,0,0,0,0,1,0,0,0.582438,2.002171,0,0,1,0,1,1.110582,1,0,0,0,0.789777,1.052,10530,10009.505703,c
14257,456825,3 bed condo for sale in trilive,111 tampines road,trilive,property_type_private,tenure_high_year,2018.0,3.0,2.0,883.0,high,partial,"studio, 1, 2, 3, 4 br",217.0,https://www.99.co/singapore/condos-apartments/...,1.361795,103.891552,0,lorong ah soo,hougang,1764000.0,1,0,1,0,0,1,0,0,0,0,1,0,0,0.743204,0,0,0,0,0,1,0,0,0.437168,2.26241,0,0,1,1,0,0.806184,0,0,1,0,0.339494,1.5155,32320,21326.294952,ne
10638,889617,2 bed condo for sale in mackenzie 88,88 mackenzie road,mackenzie 88,property_type_private,tenure_high_year,2009.0,2.0,2.0,849.0,,partial,"1, 2 br",55.0,https://www.99.co/singapore/condos-apartments/...,1.306466,103.847541,0,mackenzie,rochor,1575000.0,1,0,1,0,0,0,0,0,0,0,1,0,0,0.269969,0,0,0,1,0,1,0,0,0.780942,1.133781,0,1,0,0,1,1.196624,0,0,0,1,0.343036,0.0557,110,1974.86535,c


---------------------

## Result Evaluation

In this section, we will evaluate top 5 recommendations of the 4 model variations implemented in the previous section by using row index 20141 as input listing.

In [23]:
# We first look at the information of the input listing
row_idx = 20141
df_with_listing_info.iloc[[row_idx]]

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
20141,868680,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,883.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,2087400.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c


In [24]:
# model1
reco1

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
3009,862265,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,883.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1680000.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c
10318,360279,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,710.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1560300.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c
14926,347958,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,710.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1502600.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c
7196,546727,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,710.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1419600.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c
8155,636916,2 bed condo for sale in mori,223 guillemard road,mori,property_type_private,tenure_high_year,2026.0,2.0,2.0,710.0,,unspecified,"1, 2, 3, 4 br",137.0,https://www.99.co/singapore/condos-apartments/...,1.315948,103.857589,0,lavender,kallang,1419600.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.443603,0,0,0,0,0,1,0,0,0.660929,2.178475,0,0,1,0,1,1.641446,1,0,0,0,0.535185,0.7569,9690,12802.21958,c


**Model 1**: All 5 recommendations are different listings of 2 bed condo (Mori). This is not surprising as there are multiple listings of the same condo in the dataset, and therefore almost all features of the recommendations are exactly similar to the input listing, such as latitude, longitude, nearest mrt distance, etc. The only differences between these 5 recommendations and the input listing are apartment-specific features like price and size, as different sellers ask for different selling prices and the sizes of different apartments in the same condo can still differ. This variation of the recommender system is most suitable for would-be homeowners who are bent on buying a specific property that they are interested in. Since they are able to view listings from the same property with only slight apartment-specific differences, they can reach out to sellers of the recommended property listings to negotiate for the best deal. For example, if users are price-sensitive, then they are able to easily find the lowest price listing from these recommendations.

In [25]:
#model 2
reco2

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price,property_type_private,property_type_public,tenure_high_year,tenure_low_year,floor_level_ground,floor_level_high,floor_level_low,floor_level_mid,floor_level_penthouse,floor_level_top,furnishing_partial,furnishing_unfurnished,furnishing_unspecified,nearest_mrt_distance_in_km,line_cc,line_ce,line_cg,line_dt,line_ew,line_ne,line_ns,line_te,nearest_pri_sch_distance_in_km,nearest_gep_pri_sch_distance_in_km,gep_pri_sch_within_1km,gep_pri_sch_within_1km_2km,gep_pri_sch_outside_2km,pri_sch_within_500m,pri_sch_outside_500m,nearest_com_centre_distance_in_km,cc_type_BN,cc_type_CR,cc_type_IEBP,cc_type_IHL,nearest_mall_distance_in_km,area_size,population,density,region
15995,118782,2 bed condo for sale in 8 farrer suites,8 sing joo walk,8 farrer suites,property_type_private,tenure_high_year,2015.0,2.0,2.0,893.0,,unspecified,"studio, 1, 2, 3 br",34.0,https://www.99.co/singapore/condos-apartments/...,1.315224,103.855321,0,kampong java,kallang,1687800.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.240052,0,0,0,0,0,1,0,0,0.561541,2.039068,0,0,1,0,1,1.428804,1,0,0,0,0.464164,1.052,10530,10009.505703,c
18775,896559,1 bed condo for sale in jool suites,2 sing joo walk,jool suites,property_type_private,tenure_high_year,2014.0,1.0,1.0,388.0,,unspecified,"1, 2 br",52.0,https://www.99.co/singapore/condos-apartments/...,1.315224,103.855321,0,kampong java,kallang,756000.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.240052,0,0,0,0,0,1,0,0,0.561541,2.039068,0,0,1,0,1,1.428804,1,0,0,0,0.464164,1.052,10530,10009.505703,c
16725,282431,1 bed condo for sale in vibes @ upper serangoon,488 upper serangoon road,vibes @ upper serangoon,property_type_private,tenure_high_year,2016.0,1.0,1.0,398.0,,unspecified,"1, 2 br",60.0,https://www.99.co/singapore/condos-apartments/...,1.327089,103.868218,0,geylang bahru,kallang,661500.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.493582,0,0,0,0,0,1,0,0,0.570186,3.787752,0,0,1,0,1,2.305314,1,0,0,0,0.503352,0.7355,11280,15336.505778,c
9789,893611,1 bed condo for sale in sunshine lodge,510 upper serangoon road,sunshine lodge,property_type_private,tenure_high_year,1998.0,1.0,1.0,592.0,,unspecified,"1, 3, 4 br",30.0,https://www.99.co/singapore/condos-apartments/...,1.327089,103.868218,0,geylang bahru,kallang,879900.0,1,0,1,0,0,0,0,0,0,0,0,0,1,0.493582,0,0,0,0,0,1,0,0,0.570186,3.787752,0,0,1,0,1,2.305314,1,0,0,0,0.503352,0.7355,11280,15336.505778,c
16504,343022,2 bed condo for sale in 8 farrer suites,8 sing joo walk,8 farrer suites,property_type_private,tenure_high_year,2015.0,2.0,3.0,560.0,,fully,"studio, 1, 2, 3 br",34.0,https://www.99.co/singapore/condos-apartments/...,1.315224,103.855321,0,kampong java,kallang,1039500.0,1,0,1,0,0,0,0,0,0,0,0,0,0,0.240052,0,0,0,0,0,1,0,0,0.561541,2.039068,0,0,1,0,1,1.428804,1,0,0,0,0.464164,1.052,10530,10009.505703,c


**Model 2**: In the second variation, where the algorithm is able to recommend nearest neighbors after excluding listings that belong to the same property as the input listing, the top 5 recommendations are 1-2 bed condos located in Kallang – similar to where the input property listing is located at. A few more similarities between the 5 recommendations and the input listing are the MRT line nearest to them (north-east line), high number of years in tenure, and the nearest mall is around 0.5km away. There are also some dissimilarities: 3 out of 5 recommendations are 1 bed instead of 2 bed condos, and having sizes much smaller than the 883 sqft input listing. These likely have an effect on the prices, which results in the prices of the recommendations being far below the $2 million price of the input listing. 

In [26]:
# model3
reco3[['listing_id', 'title', 'address', 'property_name', 'planning_area', 'subzone', 'num_beds', 'nearest_mall_distance_in_km'] + user_preferences]

Unnamed: 0,listing_id,title,address,property_name,planning_area,subzone,num_beds,nearest_mall_distance_in_km,price,nearest_pri_sch_distance_in_km,tenure_high_year,line_ne
9740,473098,2 bed condo for sale in parc sophia,10 adis road,parc sophia,rochor,mount emily,2.0,0.212993,1417500.0,0.669447,1,1
1987,341828,3 bed condo for sale in rangoon apartments,190 rangoon road,rangoon apartments,kallang,kampong java,3.0,0.789777,1942500.0,0.582438,1,1
2202,188481,3 bed condo for sale in rangoon apartments,190 rangoon road,rangoon apartments,kallang,kampong java,3.0,0.789777,1942500.0,0.582438,1,1
9186,499790,3 bed condo for sale in nouvelle park,131 poh huat road west,nouvelle park,hougang,kovan,3.0,0.975097,2079000.0,0.743337,1,1
15396,195498,3 bed condo for sale in the florentine,70 florence road,the florentine,hougang,kovan,3.0,0.620523,1680000.0,0.734674,1,1


**Model 3**: In model 3, user was able to specify the features that they would like their recommendations to be based on. 

| Features | Input Listing        | Model 2 (Mean)    | Model 3 (Mean)  |
|----------|----------------------|---------------------|------------------|
|price     | 2,087,400 | 1,004,940 | 1,812,300 |
|nearest_pri_sch_distance_in_km | 0.6609 | 0.5649 | 0.6624 |
|tenure_high_year | 1 | 1 | 1 |
|line_ne | 1 | 1 | 1 
|density| 12,802 | 12,140 | 10,777 |
|nearest_mall_distance_in_km | 0.5352 | 0.4798 | 0.6776
|nearest_com_centre_distance_in_km| 1.6414 | 1.7794| 1.260 |

In the above table, we compare the mean values of some features of the 5 recommendations from variation 2 and 3 with those in the input listing. The first 4 features are user-specified features in variation 3 and we observe more similar values between variation 3 recommendations and the input listing. However, since variation 3 does not consider any other features, the mean values of recommendations from variation 2 are more similar to those in input listing on other non-user-specified features such as density, distance to nearest mall and com center. It is also interesting to note that since variation 3 does not consider latitude and longitude, only 2 out of 5 recommendations are located in Kallang, while the rest are in Hougang and Rochor where users can still access to the north-east MRT line.


In [27]:
# model4
reco4[['listing_id', 'title', 'address', 'property_name', 'nearest_com_centre_distance_in_km', 'size_sqft'] + user_preferences]

Unnamed: 0,listing_id,title,address,property_name,nearest_com_centre_distance_in_km,size_sqft,price,nearest_pri_sch_distance_in_km,tenure_high_year,line_ne
9740,473098,2 bed condo for sale in parc sophia,10 adis road,parc sophia,0.672457,732.0,1417500.0,0.669447,1,1
1987,341828,3 bed condo for sale in rangoon apartments,190 rangoon road,rangoon apartments,1.110582,1453.0,1942500.0,0.582438,1,1
2202,188481,3 bed condo for sale in rangoon apartments,190 rangoon road,rangoon apartments,1.110582,1453.0,1942500.0,0.582438,1,1
14257,456825,3 bed condo for sale in trilive,111 tampines road,trilive,0.806184,883.0,1764000.0,0.437168,1,1
10638,889617,2 bed condo for sale in mackenzie 88,88 mackenzie road,mackenzie 88,1.196624,849.0,1575000.0,0.780942,1,1


**Model 4**: In model 4, we have set max_k = 100 and degree_of_randomisation = 0.5. Therefore, the first 2 recommendations overlapped with top 2 recommendations in model 3. Whereas the remaining 3 recommendations are randomly selected listings that are between the 6th to 50th closest neighbours.

| Features | Input Listing        | Model 3 (Mean)  | Model 4 (Mean) |
|----------|----------------------|---------------------|------------------|
|price     | 2,087,400 | 1,812,300 | 1,728,300 |
|nearest_pri_sch_distance_in_km | 0.6609 | 0.6624  | 0.6104 |
|tenure_high_year | 1 | 1 | 1 |
|line_ne | 1 | 1 | 1 
|density| 12,802 | 10,777 | 10,344 |
|nearest_mall_distance_in_km | 0.5352 | 0.6776 | 0.4950 |
|nearest_com_centre_distance_in_km| 1.6414 | 1.260 | 0.9792 |

Based on the above table, we compare the recommendations between model 3 and 4. For the first 4 features, we expect the values of the variation 3 recommendations to be closer to the input listing since variation 4 recommendations include some “randomness”. However, since both variations do not consider the last 3 features when fitting the KNN algorithm, their values are considered generally far from the input listing, except for the nearest distance to mall for variation 4. 

**Which of the 4 variations performs best?**

We have evaluated 4 variations of the recommender system. The best variation depends largely on the users’ intentions. The first default variation works best when users are very keen on buying a particular property and would like to view different apartments within that property. The second similar property variation is suitable for users who want to view similar properties and are not particular on any features. The third user-specific feature variation is aimed at users who are clear on what apartment features are important to them when buying a home. The last variation that includes “randomness” is suitable for users who are already bored of very similar recommendations and want to explore potentially interesting listings. As demonstrated in the above evaluation, we are also able to mix different variations together depending on our intentions.
