In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [21]:
import pickle
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors

In [22]:
merged = pd.read_csv("/content/drive/MyDrive/listings_reviews_final.csv")
embed_df = pd.read_parquet("/content/drive/MyDrive/listing_embedding_store/listing_embeddings.parquet")
listing_ids = np.load("/content/drive/MyDrive/listing_embedding_store/listing_ids.npy")

In [23]:
merged = merged.drop(columns=["id", "scrape_id"])

embed_df["listing_id"] = embed_df["listing_id"].astype(int)
merged["listing_id"] = merged["listing_id"].astype(int)

embed_df = embed_df[embed_df["listing_id"].isin(merged["listing_id"])]

In [24]:
listing_ids = embed_df["listing_id"].values
X = embed_df.drop(columns=["listing_id"]).values.astype("float32")

In [25]:
missing = set(embed_df["listing_id"]) - set(merged["listing_id"])
missing

set()

In [26]:
nn_model = NearestNeighbors(metric="cosine")
nn_model.fit(X)

In [27]:
text_model = SentenceTransformer("all-mpnet-base-v2")

In [28]:
# load files
merged = pd.read_csv("/content/drive/MyDrive/listings_reviews_final.csv")
embed_df = pd.read_parquet("/content/drive/MyDrive/listing_embedding_store/listing_embeddings.parquet")

# ensure same dtype
merged["listing_id"] = merged["listing_id"].astype(int)
embed_df["listing_id"] = embed_df["listing_id"].astype(int)

# keep only listing_ids that exist in merged df
valid_ids = set(merged["listing_id"])
embed_df = embed_df[embed_df["listing_id"].isin(valid_ids)]

# rebuild ids + matrix
listing_ids = embed_df["listing_id"].values
X = embed_df.drop(columns=["listing_id"]).values.astype("float32")

embed_df.shape, len(listing_ids)


((4182, 769), 4182)

In [29]:
nn_model = NearestNeighbors(metric="cosine")
nn_model.fit(X)

In [30]:
text_model = SentenceTransformer("all-mpnet-base-v2")

In [31]:
def recommend(prompt, n=5):
    q = text_model.encode([prompt], convert_to_numpy=True)
    d, idx = nn_model.kneighbors(q, n_neighbors=n)
    ids = listing_ids[idx[0]]
    recommendations = merged[merged["listing_id"].isin(ids)]
    recommendations = recommendations.dropna()
    recommendation_ids = recommendations["listing_id"].tolist()
    return recommendations, recommendation_ids

In [32]:
#r, rs = recommend("calm peaceful stay")
#r

In [None]:
#rs

In [None]:
with open("/content/drive/MyDrive/model_emotion.pkl", "rb") as f:
    model = pickle.load(f)

with open("/content/drive/MyDrive/model_emotion_columns.pkl", "rb") as f:
    col_info = pickle.load(f)

In [None]:
cols = col_info["columns"]

In [None]:
def predict_prices(ids, merged):
    df = merged[merged["listing_id"].isin(ids)].copy()
    if df.empty:
        return pd.DataFrame()

    df["price"] = df["price"].astype(str).str.replace("$", "").str.replace(",", "")
    df["price"] = pd.to_numeric(df["price"], errors="coerce")
    df = df.dropna(subset=["price"])

    q33 = merged["price"].quantile(0.33)
    q67 = merged["price"].quantile(0.67)

    def seg(x):
        if x <= q33:
            return "Budget"
        if x <= q67:
            return "Standard"
        return "Luxury"

    df["segment"] = df["price"].apply(seg)

    feature_cols = ["accommodates", "bathrooms", "bedrooms", "review_scores_rating", "review_scores_accuracy", "segment"]
    dfm = df[feature_cols].copy()
    dfm = pd.get_dummies(dfm, columns=["segment"], drop_first=False)

    for c in cols:
        if c not in dfm:
            dfm[c] = 0

    dfm = dfm[cols]

    df["predicted_price"] = model.predict(dfm)

    return df[["listing_id", "price", "predicted_price", "name"]]

In [46]:
recs, rec_ids = recommend("newly renovated")
preds = predict_prices(rec_ids, merged)
preds

Unnamed: 0,listing_id,price,predicted_price,name
2716,52859106,193.0,161.974564,The Addison by Kasa | Traditional Queen Studio
2717,52859379,880.0,913.987,The Addison by Kasa | Premium Queen Loft
3171,736397045443196784,143.0,158.91275,"32W - Humble & Cozy 2B1B near Zoo, GGP, Park, ..."
3664,1040099353824783630,186.0,145.326065,Newly remodeled Condo with easy public transport
3863,1182646584388613044,118.0,74.992134,Newly Built Modern Suite
