# Task 2: Recommendation Engine

### Setting up the Notebook

In [1]:
import numpy as np
import pandas as pd

from src.utils import read_csv
from src.data_preprocessor import DataPreprocessor
from src.preprocessor_utils import remove_columns, convert_to_lowercase

from src.recommendation_utils import get_recommendation_weights

In [2]:
# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

### Load the Data

In [3]:
trainX, trainY = read_csv('data/train.csv', ylabel='price')
# data_preprocessor = DataPreprocessor()
# trainX, trainY = data_preprocessor.fit_transform_for_recommendations(trainX, trainY)

df = pd.concat([trainX, trainY], axis=1)

## Recommendation Setup

### Features for Property Similarity

- `subzone` - Boolean variable to represent if the properties have the same subzone
- `planning_area` - Boolean variable to represent if the properties have the same planning area
- `price` - Boolean variable to represent if the property prices are in the same discrete bin
- ...many more

### User Input and Preferences

<img src="images/99co_recommendation.jpg" style="width: 700px;"/>

[99.co](99.co) shows four possible priorities that the user can provide to sort 'similar listings'. We adapt the same in our setup. While the `get_top_recommendations` is capable of handling highly nuanced weightage of each feature, we provide xxx pre-defined settings below. Uncomment any one to continue, or make no changes to see the universal recommendations.

In [4]:
feature_list = ['subzone', 'planning_area', 'price']
feature_weightage = {k:1 for k in feature_list}

########## Prioritize 'nearby' property ##########
# feature_weightage['subzone'] = 10
# feature_weightage['planning_area'] = 10

########## Prioritize units with similar 'price' ##########
# feature_weightage['price'] = 10

########## And a few more ##########

## Computing the Top Recommendations

In [24]:
def get_top_recommendations(row, df, feature_weightage, k=3):

    ######## Remove input from dataframe (To stop recommending the input itself)
    print(row.name)
    df = df.drop(row.name)
    
    ######## Get Recommendation Weights For Each Property
    df_weights = get_recommendation_weights(row, df, feature_weightage)
    
    ######## Filter Out Top Properties (More Than Requested)
    top_property_indices = df_weights.argsort()[-5*k:]
    
    ######## Introduce Randomness in Choice From The Top Recommendations
    prob = df_weights[top_property_indices]
    prob = prob/prob.sum()
    rec_index = np.random.choice(df.index[top_property_indices], size=k, replace=False, p=prob)
    
    return rec_index

## Testing the Recommendation Engine

### Pick a Sample Listing as Input

In [25]:
# Pick a row id of choice
row_id = 10
#row_id = 20
#row_id = 30
#row_id = 40
#row_id = 50

# Get the row from the dataframe (an valid row ids will throw an error)
row = df.iloc[row_id]

# Just for printing it nicely, we create a new dataframe from this single row
# data_preprocessor.inverse_transform(pd.DataFrame([row]))
pd.DataFrame([row])

Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,...,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price
10,397777,2 bed condo for sale in bedok court,299 bedok south avenue 3,bedok court,condo,99-year leasehold,1985.0,2.0,,1733,...,unspecified,"studio, 1, 2, 3, 4 br",280.0,https://www.99.co/singapore/condos-apartments/...,1.322153,103.945223,0,bedok south,bedok,2205000.0


### Compute and Display the recommendations

In [26]:
k = 3

recommendation_list = get_top_recommendations(row, df, feature_weightage, k=k)

# data_preprocessor.inverse_transform(df.iloc[recommendation_list])
df.iloc[recommendation_list]

10


Unnamed: 0,listing_id,title,address,property_name,property_type,tenure,built_year,num_beds,num_baths,size_sqft,...,furnishing,available_unit_types,total_num_units,property_details_url,lat,lng,elevation,subzone,planning_area,price
1297,384899,4 bed condo for sale in bedok residences,26 bedok north drive,bedok residences,condo,99-year leasehold,2015.0,4.0,5.0,2637,...,unspecified,"studio, 1, 2, 3, 4 br",583.0,https://www.99.co/singapore/condos-apartments/...,1.321972,103.946825,0,bedok south,bedok,3864000.0
20128,126696,2 bed condo for sale in the glades,20 bedok rise,the glades,condo,99-year leasehold,2017.0,2.0,1.0,762,...,unspecified,"studio, 1, 2, 3, 4, 5 br",726.0,https://www.99.co/singapore/condos-apartments/...,1.326567,103.947897,0,bedok south,bedok,1260000.0
17275,102625,3 bed house for sale in bedok ria,bedok ria crescent,bedok ria,terraced house,freehold,1993.0,3.0,3.0,2240,...,partial,"2, 3, 4, 5, 6, 7 br",178.0,https://www.99.co/singapore/houses/bedok-ria-d...,1.327233,103.953518,0,bedok south,bedok,3990000.0
