In [2]:
import numpy as np
import math
import pandas as pd
import matplotlib as plt
import datetime
%matplotlib inline

In [3]:
subset = pd.read_csv("scored_housing/full_housing.csv")
subset.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Listing ID,Location,Lat_Long,Monthly Rental Price,Bedrooms,Bathrooms,Square Footage,Latitude,...,num_restaurants,num_subways,num_trees,grocers_normalized,nightlife_normalized,noise_normalized,restaurants_normalized,subways_normalized,trees_normalized,id
0,0,0,3021099,211 North End Avenue #12A,"['40.71628546,-74.01546103']",4600,1.0,1.0,860.0,40.716285,...,26,0,430,-1.116985,-0.961782,-1.153283,-0.909475,-0.765301,0.183035,3021099
1,1,1,3020988,150 East 39th Street #601,"['40.74890137,-73.97660065']",2650,0.0,1.0,0.0,40.748901,...,108,3,514,0.304876,-0.137721,-0.217637,0.343502,-0.321111,0.661842,3020988
2,2,2,3026635,257 West 136th Street #4AA,"['40.81700815,-73.94508554']",2195,2.0,1.0,0.0,40.817008,...,12,6,561,0.399667,-0.373167,0.366263,-1.123398,0.123079,0.929746,3026635
3,3,3,3026633,272 West 139th Street #3A,"['40.81876179,-73.9446072']",2650,2.0,1.0,0.0,40.818762,...,13,2,554,0.020504,-0.608613,0.795394,-1.108118,-0.469174,0.889845,3026633
4,4,4,3026626,408 West 34th Street #6E,"['40.75329123,-73.9971017']",1975,0.0,1.0,0.0,40.753291,...,71,0,210,-0.832613,-0.49089,0.387368,-0.221866,-0.765301,-1.070983,3026626


## Getting User Data

* User fills out survey, ranking the features 1-6
* User also provides constraints, (e.g. number of bedrooms, number of bathrooms, max rental price)

## Initial Recommendation

* Filter listings by the constraints, ONLY show those that satisfy the constraints.
    * Do **NOT** include those that are already shown or those that are disliked.
* Shift all the norm values up by 10 to get rid of negative values
* Matrix multiply user rankings with the new norm values to create a new score for that user's weights
* Show the top listing
    * If **LIKE:**
        * Add to _Likes_ dictionary, add to _Shown_ dictionary
        * Prompt user _"Do you want to see more like these?"_
            * If **YES:**
                * Move into the next code chunk, show similar listings, return to the top of the loop.
            * If **NO:**
                * Encounter finished, exit.
    * If **DISLIKE:**
        * Add to _Dislikes_ dictionary, add to _Shown_ dictionary
        * Add similar to this listing to the _Dislikes_ dictionary
        * Generate a new initial recommendation, return to the top of the loop.

In [39]:
dislikes = []
def initial_rec(preferences, subset):
    #preferences is a list with the ranked features of
    # grocers_normalized
    # nightlife_normalized
    # noise_normalized
    # restaurants_normalized
    # subways_normalized
    # trees_normalized
    
    if len(preferences) != 6:
        print("error")
        return
    preferences = np.array(preferences)
    subset["User Score"] = 0.0
    for i in range(subset.shape[0]):
        score = preferences * np.array(subset.iloc[i, 18:24] + 10)
        subset["User Score"][i] = sum(score)
    subset = subset.sort_values(by = ['User Score'], ascending = False)
    counter = 0
    
    while True:
        potential = subset.iloc[counter, :]
        if potential not in dislikes:
            break
        else:
            counter = counter + 1
            
    return potential
    
    
    

In [None]:
initial_rec([1,2,5,4,1,5], subset)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


## Similar Listings after Initial Recommendation (Content Based Recommender)
https://towardsdatascience.com/how-to-build-from-scratch-a-content-based-movie-recommender-with-natural-language-processing-25ad400eb243

In [34]:
vals = subset.iloc[:, 18:24] + 10
vals.head()

Unnamed: 0,grocers_normalized,nightlife_normalized,noise_normalized,restaurants_normalized,subways_normalized,trees_normalized
0,8.883015,9.038218,8.846717,9.090525,9.234699,10.183035
1,10.304876,9.862279,9.782363,10.343502,9.678889,10.661842
2,10.399667,9.626833,10.366263,8.876602,10.123079,10.929746
3,10.020504,9.391387,10.795394,8.891882,9.530826,10.889845
4,9.167387,9.50911,10.387368,9.778134,9.234699,8.929017


In [35]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [36]:
cosine_sim = cosine_similarity(vals)
cosine_sim

array([[1.        , 0.99924818, 0.99820424, ..., 0.99954346, 0.999533  ,
        0.99395805],
       [0.99924818, 1.        , 0.99769486, ..., 0.99971877, 0.99953334,
        0.99297043],
       [0.99820424, 0.99769486, 1.        , ..., 0.99861665, 0.99835162,
        0.99603691],
       ...,
       [0.99954346, 0.99971877, 0.99861665, ..., 1.        , 0.99990587,
        0.9947071 ],
       [0.999533  , 0.99953334, 0.99835162, ..., 0.99990587, 1.        ,
        0.99475874],
       [0.99395805, 0.99297043, 0.99603691, ..., 0.9947071 , 0.99475874,
        1.        ]])

In [37]:
indices = pd.Series(subset["Location"])

# Defining the function that takes in listing name
# as input and returns the top 10 recommended listings
def recommendations(address, cosine_sim = cosine_sim):
    
    # initializing the empty list of recommended listings
    recommended_addresses = []
    
    # gettin the index of the listing that matches the title
    idx = indices[indices == address].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)

    # getting the indexes of the 10 most similar listings
    top_10_indexes = list(score_series.iloc[1:11].index)
    
    # populating the list with the titles of the best 10 matching listings
    for i in top_10_indexes:
        recommended_addresses.append(list(vals.index)[i])
        
    return recommended_addresses

In [38]:
[indices[x] for x in recommendations("112 Stanton Street #4")]

['173 Ludlow Street #1A',
 '169 Allen Street #1B',
 '25 Clinton Street #304A',
 '18 Avenue A #5A',
 '171 Suffolk Street #2B',
 '171 Suffolk Street #11D',
 '171 Suffolk Street #6J',
 '94 Orchard Street #3D',
 '76 E 1st Street #1AD',
 '191 Avenue A #2']

## Visualizations

When users receive a recommendation, they should be able to see visually **where the listing is on a map.**
Ideally, they should be able to click on a listing and a pop-up should appear with the following:
* Apartment name
* Beds
* Bathrooms
* Square footage (if available)
* Price
* Some generic description of why we recommended
    * _You prioritized **X**, there are **Y** instances of this within a 0.25 mile radius of this listing._

Users should also be able to see things they marked as _**Liked**_ in a similar visualization as mentioned above.