# Task 2: Recommendation Engine

### Setting up the Notebook

In [1]:
import numpy as np
import pandas as pd

from src.utils import read_csv
from src.data_preprocessor import DataPreprocessor
from src.preprocessor_utils import remove_columns, convert_to_lowercase

from src.recommendation_utils import get_recommendation_weights

In [2]:
# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

### Load the Data

In [3]:
trainX, trainY = read_csv('data/train.csv', ylabel='price')
data_preprocessor = DataPreprocessor()
trainX, trainY = data_preprocessor.fit_transform_for_recommendations(trainX, trainY)

trainX.reset_index(drop=True, inplace=True)
trainY.reset_index(drop=True, inplace=True)

df = pd.concat([trainX, trainY], axis=1)

## Recommendation Setup

### Features for Property Similarity

- `subzone` - Boolean variable to represent if the properties have the same subzone
- `planning_area` - Boolean variable to represent if the properties have the same planning area
- `price` - Boolean variable to represent if the property prices are in the same discrete bin
- ...many more

### User Input and Preferences

<img src="images/99co_recommendation.jpg" style="width: 700px;"/>

[99.co](99.co) shows four possible priorities that the user can provide to sort 'similar listings'. We adapt the same in our setup. While the `get_top_recommendations` is capable of handling highly nuanced weightage of each feature, we provide xxx pre-defined settings below. Uncomment any one to continue, or make no changes to see the universal recommendations.

In [4]:
feature_list = ['subzone', 'planning_area', 'price',
                'property_type', 'num_beds', 'size_sqft',
                'floor_level', 'furnishing', 'tenure_left']
feature_weightage = {k:1 for k in feature_list}

########## Prioritize 'nearby' property ##########
# feature_weightage['subzone'] = 10
# feature_weightage['planning_area'] = 10

########## Prioritize units with similar 'price' ##########
# feature_weightage['price'] = 10

########## And a few more ##########

## Computing the Top Recommendations

In [41]:
def get_top_recommendations(row, df, feature_weightage, k=3):

    ######## Remove input from dataframe (To stop recommending the input itself)
    df = df.drop(row.name)
    
    ######## Get Recommendation Weights For Each Property
    df_weights = get_recommendation_weights(row, df, feature_weightage)

    ######## Filter Out Top Properties (More Than Requested)
    top_property_indices = df_weights.argsort()[-5*k:]
    
    ######## Introduce Randomness in Choice From The Top Recommendations
    prob = df_weights[top_property_indices]
    prob = prob/prob.sum()
    rec_indices = np.random.choice(df.index[top_property_indices], size=k, replace=False, p=prob)
    return np.array(rec_indices)

## Testing the Recommendation Engine

### Pick a Sample Listing as Input

In [42]:
# Pick a row id of choice
#row_id = 10
#row_id = 20
#row_id = 30
#row_id = 40
row_id = 50

# Get the row from the dataframe (an valid row ids will throw an error)
row = df.iloc[row_id]

# Just for printing it nicely, we create a new dataframe from this single row
data_preprocessor.inverse_transform(pd.DataFrame([row]))
# pd.DataFrame([row])

Unnamed: 0,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,total_num_units,lat,lng,subzone,planning_area,tenure_duration,is_freehold,price
50,8 lorong 25a geylang,zyanya,condo,freehold,2025.0,4.0,3.0,1313,,unspecified,34.0,1.313628,103.883109,aljunied,geylang,10000,True,2274800.0


### Compute and Display the recommendations

In [43]:
k = 3

recommendation_list = get_top_recommendations(row, df, feature_weightage, k=k)

data_preprocessor.inverse_transform(df.iloc[recommendation_list])
# df.iloc[recommendation_list]

15925    lentor modern
Name: property_name, dtype: object
169
15925    lentor modern
Name: property_name, dtype: object
169
13555    thomson 800
Name: property_name, dtype: object
1
[15924, 15924, 13554]


Unnamed: 0,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,floor_level,furnishing,total_num_units,lat,lng,subzone,planning_area,tenure_duration,is_freehold,price
15924,8 lorong 25a geylang,zyanya,condo,freehold,2025.0,4.0,3.0,1227,high,unfurnished,34.0,1.313628,103.883109,aljunied,geylang,10000,True,2301100.0
15924,8 lorong 25a geylang,zyanya,condo,freehold,2025.0,4.0,3.0,1227,high,unfurnished,34.0,1.313628,103.883109,aljunied,geylang,10000,True,2301100.0
13554,8 lorong 25a geylang,zyanya,condo,freehold,2025.0,4.0,3.0,1302,,partial,34.0,1.313628,103.883109,aljunied,geylang,10000,True,2100000.0
