# Create Embeddings

create embeddings based from data generated

In [1]:
import lancedb
import warnings
warnings.filterwarnings('ignore')
import pandas as pd

df = pd.read_csv("../data/chatgpt_house_match_data.csv")
df.head()

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,name,year,location,layout,price,description
0,Modern and Spacious House in Kawasaki,2015,"Kawasaki City, Takatsu Ward",3LDK,"¥200,000/month",This beautiful house was built in 2015 and is ...
1,Cozy House in Fujisawa,1995,"Fujisawa, Kanagawa Prefecture",2LDK,"¥150,000/month",This lovely two-bedroom house is located in th...
2,Saitama Green House,2014,"Saitama City, Omiya Ward",1LDK,"¥120,000/month",This cozy house is located in the quiet reside...
3,Spacious Yokohama House,2010,"Yokohama, Kanagawa",3LDK,"¥200,000/month",This modern house is located in a quiet neighb...
4,Modern House in Saitama,2018,"Saitama City, Omiya Ward",2LDK,"¥170,000/month",This modern house is perfect for individuals o...


In [2]:
import dotenv
from typing import List
from langchain_openai.embeddings import OpenAIEmbeddings
dotenv.load_dotenv()

model = OpenAIEmbeddings()

def embed(texts: List[str]):
    # 
    if isinstance(texts, str):
        texts = [texts]
    
    result = model.embed_documents(texts)
    return result

output = embed(["hello", "world"])
assert len(output) == 2, "Not Prepared For Batch Processing."
    

In [3]:
import lancedb
from lancedb.embeddings import with_embeddings

data = with_embeddings(embed, df, column="description", show_progress=True)
data.to_pandas().head(1)

  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:02<00:00,  2.29s/it]


Unnamed: 0,name,year,location,layout,price,description,vector
0,Modern and Spacious House in Kawasaki,2015,"Kawasaki City, Takatsu Ward",3LDK,"¥200,000/month",This beautiful house was built in 2015 and is ...,"[0.011045732, 0.0205944, -0.022403896, 0.00356..."


In [4]:
!rm -rf ../tmp/lancedb

db = lancedb.connect("../tmp/lancedb")
tbl = db.create_table("house_match", data)

A simple semantic search function here, although I implent function at here, but in app. it would be realized by langchain.

In [10]:
# semantic sim serach function
import numpy as np

def semantic_search(query:str):
    
    embed_query = np.array(model.embed_documents([query])).ravel()
    ans = tbl.search(embed_query).to_pandas()
    return ans

user_prefs = "I want to find a house in Yokohama, near the subway station in 8 minutes, and allowed pet and smoke."
semantic_search(user_prefs)

Unnamed: 0,name,year,location,layout,price,description,vector,_distance
0,Spacious Yokohama House,2010,"Yokohama, Kanagawa",3LDK,"¥200,000/month",This modern house is located in a quiet neighb...,"[-0.0068428, 0.013918216, -0.013931139, -0.001...",0.171217
1,Yokohama Seaside House,2018,"Yokohama City, Kanagawa Ward",2LDK,"¥200,000/month",This spacious 2LDK house is located in the bea...,"[-0.00041430985, 0.013863211, -0.006179868, 0....",0.191367
2,Cozy Yokohama House,2007,"Yokohama, Naka-ku",1LDK,"¥150,000/month","Located in the heart of Yokohama, this cozy 1L...","[-0.004016443, 0.013934999, -0.0051537273, -0....",0.199738
3,Peaceful House in Yokohama,2015,"Yokohama City, Tsurumi Ward",2LDK,"¥150,000/month",This cozy and modern 2LDK house is located in ...,"[-0.004344028, 0.020817315, -0.0069739968, 0.0...",0.206322
4,Yokohama Cozy House,2010,"Yokohama City, Nishi Ward",1LDK,"¥150,000/month",This cozy 1LDK house is located in the quiet a...,"[0.009084539, 0.011742461, -0.014109466, -0.00...",0.206712
5,Cozy Yokohama House,2005,"Yokohama, Kanagawa",2LDK,"¥200,000/month",This charming 2LDK house is perfect for a fami...,"[-0.006130764, 0.024798742, -0.015491009, -0.0...",0.209058
6,Yokohama Cozy House,2014,"Yokohama City, Naka Ward",2LDK,"¥150,000/month",This cozy house is located in the heart of Yok...,"[0.002706505, 0.0006733615, -0.020372244, 0.00...",0.211336
7,Modern Ocean-View House in Yokohama,2018,"Yokohama, Kanagawa Prefecture, Naka Ward",2LDK,"¥180,000/month",This newly built house offers stunning ocean v...,"[-0.003068515, 0.0068445164, -0.012227992, -0....",0.211899
8,Yokohama Seaside House,2015,"Yokohama-shi, Naka-ku",2LDK,"¥180,000/month",This modern and spacious house is located in t...,"[-0.010894447, 0.0043512746, -0.0066992715, 0....",0.213166
9,Cozy Townhouse in Yokohama,2016,"Kanagawa Prefecture, Yokohama City, Naka Ward",2LDK,"¥180,000/month",This modern townhouse was built in 2016 and is...,"[-0.0034293109, 0.01870413, -0.006825552, -0.0...",0.215313
