In [1]:
import numpy as np
import pickle
import pandas as pd
import streamlit as st
import joblib
import time
from numpy.linalg import norm


In [2]:
# load in sd_trans dataframe to be transformed
sd_trans = pd.read_csv('sd_trans', index_col = 0)

# load in url_listings dataframe to be joined
sd_listings_url = pd.read_csv('url_listings', index_col = 0)

# load in sd_pp, FOR SELECTION OF URL WITHIN THE PREPROCESSED DF
sd_pp = pd.read_csv('sd_pp', index_col = 0)
# load in sd_clustered
sd_clustered = pd.read_csv('sd_clustered', index_col = 0)

# merge url listings with sd_trans
sd_merged = sd_listings_url.join(sd_trans)

# SELECT THE LISTING FROM UNPROCESSED DATASET 
# get sd_clustered and merge with urls on index
sd_clustered = sd_clustered.join(sd_listings_url)

# select a listing from sd_merged
selected_listing = st.selectbox("Choose a listing", sd_merged.listing_url)


In [4]:
# select listing
selected_listing = 'https://www.airbnb.com/rooms/21898446'

In [5]:
# based on selected listing, get the index from sd_pp
index_value = sd_merged.listing_url[sd_merged.listing_url == str(selected_listing)].index[0]
selected_listing_df = pd.DataFrame(sd_pp.iloc[index_value]).T

# unpickle and load in column transformer
ct = joblib.load("column_transformer.pkl")

In [6]:
selected_listing_df

Unnamed: 0,Allied Gardens,Alta Vista,Amphitheater And Water Park,Balboa Park,Bario Logan,Bay Ho,Bay Park,Bay Terrace,Bird Land,Bonita Long Canyon,...,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,instant_bookable,is_business_travel_ready,require_guest_profile_picture,require_guest_phone_verification
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.464525,0.272019,0.278668,0.342562,0.598182,0.0,0.9703,0.0,-0.16778,-0.183109


In [18]:
# get a recommendation based on url selection
def get_recommendations(df, listing):
    """
    Takes in preprocessed dataframe and selected listing as inputs and gives top 5 (including listing)
    recommendations based on cosine similarity. 
    """
    # reset the index
    df = df.reset_index(drop = 'index')
    
    # convert single listing to an array
    listing_array = listing.values

    # convert all listings to an array
    df_array = df.values
    
    # get arrays into a single dimension
    A = np.squeeze(np.asarray(df_array))
    B = np.squeeze(np.asarray(listing_array))
    
    # compute cosine similarity 
    cosine = np.dot(A,B)/(norm(A, axis = 1)*norm(B))
    
    # add similarity into recommendations df and reset the index
    rec = sd_clustered.copy().reset_index(drop = 'index')
    rec['similarity'] = pd.DataFrame(cosine).values
    
    # reorder column names
    rec = rec[['listing_url', 'similarity', 'cluster_label', 'latitude', 'longitude',
       'neighbourhood_cleansed', 'zipcode', 'property_type', 'room_type',
       'accommodates', 'bathrooms', 'bedrooms', 'beds', 'bed_type',
       'nightly_price', 'price_per_stay', 'security_deposit', 'cleaning_fee',
       'guests_included', 'extra_people', 'minimum_nights', 'maximum_nights',
       'host_response_time', 'host_response_rate', 'host_is_superhost',
       'host_total_listings_count', 'host_has_profile_pic',
       'host_identity_verified', 'number_of_reviews', 'number_of_stays',
       'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', 'requires_license', 'instant_bookable',
       'is_business_travel_ready', 'cancellation_policy',
       'require_guest_profile_picture', 'require_guest_phone_verification']]
    
    # sort by top 5 descending
    return rec.sort_values(by = ['similarity'], ascending = False).head(6)

In [19]:
# get recommendation
get_recommendations(sd_pp, selected_listing_df)

Unnamed: 0,listing_url,similarity,cluster_label,latitude,longitude,neighbourhood_cleansed,zipcode,property_type,room_type,accommodates,...,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,requires_license,instant_bookable,is_business_travel_ready,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification
2,https://www.airbnb.com/rooms/21898446,1.0,3,32.79797,-117.2425,Pacific Beach,92109,Townhouse,Private room,1,...,10.0,10.0,10.0,10.0,0.0,1.0,0.0,flexible,0.0,0.0
747,https://www.airbnb.com/rooms/28494961,0.888938,3,32.57711,-117.0653,Mission Bay,92154,House,Private room,2,...,10.0,10.0,10.0,9.0,0.0,1.0,0.0,flexible,0.0,0.0
103,https://www.airbnb.com/rooms/28260942,0.874711,3,32.823,-117.18029,Clairemont Mesa,92117,Condominium,Private room,1,...,10.0,10.0,10.0,10.0,0.0,1.0,0.0,flexible,0.0,0.0
3,https://www.airbnb.com/rooms/25948680,0.867435,3,32.77545,-117.05923,College Area,92120,Apartment,Entire home/apt,1,...,10.0,10.0,10.0,10.0,0.0,0.0,0.0,flexible,0.0,0.0
116,https://www.airbnb.com/rooms/12591014,0.847913,3,32.80027,-117.1362,Serra Mesa,92123,House,Private room,1,...,10.0,10.0,10.0,10.0,0.0,1.0,0.0,flexible,0.0,0.0
736,https://www.airbnb.com/rooms/6707650,0.843769,3,32.71139,-117.15816,East Village,92101,Apartment,Entire home/apt,2,...,10.0,10.0,10.0,9.0,0.0,1.0,0.0,flexible,0.0,0.0
