In [10]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sentence_transformers import SentenceTransformer
import pandas as pd
from tqdm import tqdm

data = pd.read_csv('seattle_airbnb_listings_cleaned.csv')
model = SentenceTransformer('all-MiniLM-L6-v2')

data['description'] = data['neighborhood'].fillna('') + ' ' + data['property_type'].fillna('') + ' ' + data['room_type'].fillna('')
data['description'] = data['description'].astype(str)

tqdm.pandas(desc="Generating embeddings")
data['embedding'] = data['description'].progress_apply(lambda x: model.encode(x).tolist())

def recommend_listings(user_input, data, model, top_n=5):
    user_embedding = model.encode(user_input).tolist()
    data['similarity'] = data['embedding'].apply(lambda x: cosine_similarity([user_embedding], [x])[0][0])
    top_listings = data.sort_values(by='similarity', ascending=False).head(top_n)

    return top_listings.assign(similarity=top_listings['similarity'])


user_input = "I am looking for an apartment in Wallingford that has 2 bedrooms and is around 100 dollars and has a review score above 90"
top_n = 7
recommended_listings = recommend_listings(user_input, data, model, top_n)
recommended_listings

Generating embeddings: 100%|██████████████████| 232/232 [00:07<00:00, 32.08it/s]


Unnamed: 0,price,review_scores_rating,number_of_reviews,security_deposit,cleaning_fee,neighborhood,property_type,room_type,accommodates,bathrooms,...,beds,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_has_profile_pic,host_identity_verified,description,embedding,similarity
46,100,98,12,500,50,Wallingford,Apartment,Entire home/apt,4,1.0,...,3,within an hour,1.0,1,f,t,f,Wallingford Apartment Entire home/apt,"[0.0432128831744194, -0.016597870737314224, 0....",0.641273
11,135,97,71,200,30,Wallingford,Apartment,Entire home/apt,6,1.0,...,3,within a few hours,0.9,1,t,t,t,Wallingford Apartment Entire home/apt,"[0.0432128831744194, -0.016597870737314224, 0....",0.641273
37,90,93,52,200,40,Wallingford,Apartment,Entire home/apt,2,1.0,...,1,within a few hours,0.9,1,f,t,t,Wallingford Apartment Entire home/apt,"[0.0432128831744194, -0.016597870737314224, 0....",0.641273
31,100,97,6,100,75,Wallingford,Apartment,Entire home/apt,4,1.0,...,2,within a few hours,1.0,1,f,t,t,Wallingford Apartment Entire home/apt,"[0.0432128831744194, -0.016597870737314224, 0....",0.641273
30,70,95,12,250,75,Wallingford,Apartment,Entire home/apt,4,1.0,...,2,within an hour,1.0,1,f,t,t,Wallingford Apartment Entire home/apt,"[0.0432128831744194, -0.016597870737314224, 0....",0.641273
38,85,88,5,100,8,Wallingford,Apartment,Entire home/apt,2,1.0,...,1,within a day,0.71,1,f,t,f,Wallingford Apartment Entire home/apt,"[0.0432128831744194, -0.016597870737314224, 0....",0.641273
42,109,99,36,100,60,Wallingford,Apartment,Entire home/apt,4,1.0,...,3,within an hour,1.0,1,t,t,t,Wallingford Apartment Entire home/apt,"[0.0432128831744194, -0.016597870737314224, 0....",0.641273
